#!/usr/bin/env python3 import csv import sys from pandas import DataFrame from pandas import Grouper import seaborn as sns from matplotlib import pyplot import matplotlib.cm as cm jobs = sys.argv[1].split(",") prefix = sys.argv[2].split(",") fileformat = ".png" print("Plotting the job: " + str(sys.argv[1])) print("Plotting with prefix: " + str(sys.argv[2])) # Color map colorMap = { "md_file_create": cm.tab10(0), "md_file_delete": cm.tab10(1), "md_mod": cm.tab10(2), "md_other": cm.tab10(3), "md_read": cm.tab10(4), "read_bytes": cm.tab10(5), "read_calls": cm.tab10(6), "write_bytes": cm.tab10(7), "write_calls": cm.tab10(8) } markerMap = { "md_file_create": "^", "md_file_delete": "v", "md_other": ".", "md_mod": "<", "md_read": ">", "read_bytes": "h", "read_calls": "H", "write_bytes": "D", "write_calls": "d" } linestyleMap = { "md_file_create": ":", "md_file_delete": ":", "md_mod": ":", "md_other": ":", "md_read": ":", "read_bytes": "--", "read_calls": "--", "write_bytes": "-.", "write_calls": "-." } # Plot the timeseries def plot(prefix, header, row): x = { h : d for (h, d) in zip(header, row)} jobid = x["jobid"] del x["jobid"] result = [] for k in x: timeseries = x[k].split(":") timeseries = [ float(x) for x in timeseries] if sum(timeseries) == 0: continue timeseries = [ [k, x, s] for (s,x) in zip(timeseries, range(0, len(timeseries))) ] result.extend(timeseries) if len(result) == 0: print("Empty job! Cannot plot!") return data = DataFrame(result, columns=["metrics", "segment", "value"]) groups = data.groupby(["metrics"]) metrics = DataFrame() labels = [] colors = [] style = [] for name, group in groups: style.append(linestyleMap[name] + markerMap[name]) colors.append(colorMap[name]) if name == "md_file_delete": name = "file_delete" if name == "md_file_create": name = "file_create" metrics[name] = [x[2] for x in group.values] labels.append(name) fsize = (8, 1 + 1.1 * len(labels)) fsizeFixed = (8, 2) fsizeHist = (8, 4) pyplot.close('all') if len(labels) < 4 : ax = metrics.plot(legend=True, sharex=True, grid = True, sharey=True, markersize=10, figsize=fsizeFixed, color=colors, style=style) ax.set_ylabel("Value") else: ax = metrics.plot(subplots=True, legend=False, sharex=True, grid = True, sharey=True, markersize=10, figsize=fsize, color=colors, style=style) for (i, l) in zip(range(0, len(labels)), labels): ax[i].set_ylabel(l) pyplot.xlabel("Segment number") pyplot.savefig(prefix + "timeseries" + jobid + fileformat, bbox_inches='tight', dpi=150) # Create a facetted grid #g = sns.FacetGrid(tips, col="time", margin_titles=True) #bins = np.linspace(0, 60, 13) #g.map(plt.hist, "total_bill", color="steelblue", bins=bins) ax = metrics.hist(sharex=True, grid = True, sharey=True, figsize=fsizeHist, bins=10) pyplot.savefig(prefix + "hist" + jobid + fileformat, bbox_inches='tight', dpi=150) # Plot first 30 segments if len(timeseries) <= 50: return if len(labels) < 4 : ax = metrics.plot(legend=True, xlim=(0,30), sharex=True, grid = True, sharey=True, markersize=10, figsize=fsizeFixed, color=colors, style=style) ax.set_ylabel("Value") else: ax = metrics.plot(subplots=True, xlim=(0,30), legend=False, sharex=True, grid = True, sharey=True, markersize=10, figsize=fsize, color=colors, style=style) for (i, l) in zip(range(0, len(labels)), labels): ax[i].set_ylabel(l) pyplot.xlabel("Segment number") pyplot.savefig(prefix + "timeseries" + jobid + "-30" + fileformat, bbox_inches='tight', dpi=150) ### end plotting function #with open('job-io-datasets/datasets/job_codings.csv') as csv_file: # EB: old codings with open('./datasets/job_codings_v4.csv') as csv_file: # EB: v3 codings moved to this repo csv_reader = csv.reader(csv_file, delimiter=',') line_count = 0 for row in csv_reader: if line_count == 0: header = row line_count += 1 continue job = row[0].strip() if not job in jobs: continue else: index = jobs.index(job) plot(prefix[index] + "-ks-" + str(index), header, row)