#!/usr/bin/env python3 import csv import sys from pandas import DataFrame from pandas import Grouper from matplotlib import pyplot import matplotlib.cm as cm jobs = sys.argv[1].split(",") prefix = sys.argv[2].split(",") print("Plotting the job: " + str(jobs)) # Color map colorMap = { "md_file_create": cm.tab10(0), "md_file_delete": cm.tab10(1), "md_mod": cm.tab10(2), "md_other": cm.tab10(3), "md_read": cm.tab10(4), "read_bytes": cm.tab10(5), "read_calls": cm.tab10(6), "write_bytes": cm.tab10(7), "write_calls": cm.tab10(8) } markerMap = { "md_file_create": "^", "md_file_delete": "v", "md_other": ".", "md_mod": "<", "md_read": ">", "read_bytes": "h", "read_calls": "H", "write_bytes": "D", "write_calls": "d" } linestyleMap = { "md_file_create": ":", "md_file_delete": ":", "md_mod": ":", "md_other": ":", "md_read": ":", "read_bytes": "--", "read_calls": "--", "write_bytes": "-.", "write_calls": "-." } # Plot the timeseries def plot(prefix, header, row): x = { h : d for (h, d) in zip(header, row)} jobid = x["jobid"] del x["jobid"] del x["coding_abs"] del x["coding_abs_aggzeros"] result = [] for k in x: timeseries = x[k].split(":") timeseries = [ float(x) for x in timeseries] if sum(timeseries) == 0: continue timeseries = [ [k, x, s] for (s,x) in zip(timeseries, range(1, len(timeseries))) ] result.extend(timeseries) if len(result) == 0: print("Empty job! Cannot plot!") return data = DataFrame(result, columns=["metrics", "segment", "value"]) groups = data.groupby(["metrics"]) metrics = DataFrame() labels = [] colors = [] style = [] for name, group in groups: metrics[name] = [x[2] for x in group.values] labels.append(name) style.append(linestyleMap[name] + markerMap[name]) colors.append(colorMap[name]) fsize = (8, 1 + 1.5 * len(labels)) fsizeFixed = (8, 2) pyplot.close('all') if len(labels) < 4 : ax = metrics.plot(legend=True, sharex=True, grid = True, sharey=True, markersize=10, figsize=fsizeFixed, color=colors, style=style) ax.set_ylabel("Value") else: ax = metrics.plot(subplots=True, legend=False, sharex=True, grid = True, sharey=True, markersize=10, figsize=fsize, color=colors, style=style) for (i, l) in zip(range(0, len(labels)), labels): ax[i].set_ylabel(l) pyplot.xlabel("Segment number") pyplot.savefig(prefix + "timeseries" + jobid + ".pdf", bbox_inches='tight') # Plot first 30 segments if len(timeseries) <= 50: return if len(labels) < 4 : ax = metrics.plot(legend=True, xlim=(0,30), sharex=True, grid = True, sharey=True, markersize=10, figsize=fsizeFixed, color=colors, style=style) ax.set_ylabel("Value") else: ax = metrics.plot(subplots=True, xlim=(0,30), legend=False, sharex=True, grid = True, sharey=True, markersize=10, figsize=fsize, color=colors, style=style) for (i, l) in zip(range(0, len(labels)), labels): ax[i].set_ylabel(l) pyplot.xlabel("Segment number") pyplot.savefig(prefix + "timeseries" + jobid + "-30.pdf", bbox_inches='tight') ### end plotting function with open('job-io-datasets/datasets/job_codings.csv') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') line_count = 0 job = 0 for row in csv_reader: if line_count == 0: header = row line_count += 1 continue if not row[0].strip() in jobs: continue else: plot(prefix[job], header, row) job += 1