diff --git a/datasets/job_codings_v4.csv b/datasets/job_codings_v4.csv new file mode 100644 index 0000000..2cc84ef Binary files /dev/null and b/datasets/job_codings_v4.csv differ diff --git a/scripts/plot-single-job.py b/scripts/plot-single-job.py index a49b538..2a9c085 100755 --- a/scripts/plot-single-job.py +++ b/scripts/plot-single-job.py @@ -10,7 +10,7 @@ import matplotlib.cm as cm jobs = sys.argv[1].split(",") prefix = sys.argv[2].split(",") -fileformat = ".pdf" +fileformat = ".png" print("Plotting the job: " + str(sys.argv[1])) print("Plotting with prefix: " + str(sys.argv[2])) @@ -83,7 +83,7 @@ def plot(prefix, header, row): timeseries = [ float(x) for x in timeseries] if sum(timeseries) == 0: continue - timeseries = [ [k, x, s] for (s,x) in zip(timeseries, range(1, len(timeseries))) ] + timeseries = [ [k, x, s] for (s,x) in zip(timeseries, range(0, len(timeseries))) ] result.extend(timeseries) if len(result) == 0: diff --git a/scripts/plot-single-ks-jobs.py b/scripts/plot-single-ks-jobs.py new file mode 100755 index 0000000..4d3fb2d --- /dev/null +++ b/scripts/plot-single-ks-jobs.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 + +import csv +import sys +from pandas import DataFrame +from pandas import Grouper +import seaborn as sns +from matplotlib import pyplot +import matplotlib.cm as cm + +jobs = sys.argv[1].split(",") +prefix = sys.argv[2].split(",") + +fileformat = ".png" + +print("Plotting the job: " + str(sys.argv[1])) +print("Plotting with prefix: " + str(sys.argv[2])) + + +# Color map +colorMap = { "md_file_create": cm.tab10(0), +"md_file_delete": cm.tab10(1), +"md_mod": cm.tab10(2), +"md_other": cm.tab10(3), +"md_read": cm.tab10(4), +"read_bytes": cm.tab10(5), +"read_calls": cm.tab10(6), +"write_bytes": cm.tab10(7), +"write_calls": cm.tab10(8) +} + +markerMap = { "md_file_create": "^", +"md_file_delete": "v", +"md_other": ".", +"md_mod": "<", +"md_read": ">", +"read_bytes": "h", +"read_calls": "H", +"write_bytes": "D", +"write_calls": "d" +} + +linestyleMap = { "md_file_create": ":", +"md_file_delete": ":", +"md_mod": ":", +"md_other": ":", +"md_read": ":", +"read_bytes": "--", +"read_calls": "--", +"write_bytes": "-.", +"write_calls": "-." +} + +# Plot the timeseries +def plot(prefix, header, row): + x = { h : d for (h, d) in zip(header, row)} + jobid = x["jobid"] + del x["jobid"] + result = [] + for k in x: + timeseries = x[k].split(":") + timeseries = [ float(x) for x in timeseries] + if sum(timeseries) == 0: + continue + timeseries = [ [k, x, s] for (s,x) in zip(timeseries, range(0, len(timeseries))) ] + result.extend(timeseries) + + if len(result) == 0: + print("Empty job! Cannot plot!") + return + + data = DataFrame(result, columns=["metrics", "segment", "value"]) + groups = data.groupby(["metrics"]) + metrics = DataFrame() + labels = [] + colors = [] + style = [] + for name, group in groups: + style.append(linestyleMap[name] + markerMap[name]) + colors.append(colorMap[name]) + if name == "md_file_delete": + name = "file_delete" + if name == "md_file_create": + name = "file_create" + metrics[name] = [x[2] for x in group.values] + labels.append(name) + + fsize = (8, 1 + 1.1 * len(labels)) + fsizeFixed = (8, 2) + fsizeHist = (8, 4) + + pyplot.close('all') + + if len(labels) < 4 : + ax = metrics.plot(legend=True, sharex=True, grid = True, sharey=True, markersize=10, figsize=fsizeFixed, color=colors, style=style) + ax.set_ylabel("Value") + else: + ax = metrics.plot(subplots=True, legend=False, sharex=True, grid = True, sharey=True, markersize=10, figsize=fsize, color=colors, style=style) + for (i, l) in zip(range(0, len(labels)), labels): + ax[i].set_ylabel(l) + + pyplot.xlabel("Segment number") + pyplot.savefig(prefix + "timeseries" + jobid + fileformat, bbox_inches='tight', dpi=150) + + # Create a facetted grid + #g = sns.FacetGrid(tips, col="time", margin_titles=True) + #bins = np.linspace(0, 60, 13) + #g.map(plt.hist, "total_bill", color="steelblue", bins=bins) + + ax = metrics.hist(sharex=True, grid = True, sharey=True, figsize=fsizeHist, bins=10) + pyplot.savefig(prefix + "hist" + jobid + fileformat, bbox_inches='tight', dpi=150) + + + # Plot first 30 segments + if len(timeseries) <= 50: + return + + if len(labels) < 4 : + ax = metrics.plot(legend=True, xlim=(0,30), sharex=True, grid = True, sharey=True, markersize=10, figsize=fsizeFixed, color=colors, style=style) + ax.set_ylabel("Value") + else: + ax = metrics.plot(subplots=True, xlim=(0,30), legend=False, sharex=True, grid = True, sharey=True, markersize=10, figsize=fsize, color=colors, style=style) + for (i, l) in zip(range(0, len(labels)), labels): + ax[i].set_ylabel(l) + + pyplot.xlabel("Segment number") + pyplot.savefig(prefix + "timeseries" + jobid + "-30" + fileformat, bbox_inches='tight', dpi=150) + +### end plotting function + + + +#with open('job-io-datasets/datasets/job_codings.csv') as csv_file: # EB: old codings +with open('./datasets/job_codings_v4.csv') as csv_file: # EB: v3 codings moved to this repo + csv_reader = csv.reader(csv_file, delimiter=',') + line_count = 0 + for row in csv_reader: + if line_count == 0: + header = row + line_count += 1 + continue + job = row[0].strip() + if not job in jobs: + continue + else: + index = jobs.index(job) + plot(prefix[index] + "-ks-" + str(index), header, row)