5 changed files with 170 additions and 8 deletions
@ -0,0 +1,154 @@ |
|||
#!/usr/bin/env python3 |
|||
|
|||
import csv |
|||
import sys |
|||
import pandas as pd |
|||
from pandas import DataFrame |
|||
from pandas import Grouper |
|||
import seaborn as sns |
|||
from matplotlib import pyplot |
|||
import matplotlib.cm as cm |
|||
|
|||
jobs = sys.argv[1].split(",") |
|||
prefix = sys.argv[2].split(",") |
|||
|
|||
fileformat = ".png" |
|||
|
|||
print("Plotting the job: " + str(sys.argv[1])) |
|||
print("Plotting with prefix: " + str(sys.argv[2])) |
|||
|
|||
|
|||
# Color map |
|||
colorMap = { "md_file_create": cm.tab10(0), |
|||
"md_file_delete": cm.tab10(1), |
|||
"md_mod": cm.tab10(2), |
|||
"md_other": cm.tab10(3), |
|||
"md_read": cm.tab10(4), |
|||
"read_bytes": cm.tab10(5), |
|||
"read_calls": cm.tab10(6), |
|||
"write_bytes": cm.tab10(7), |
|||
"write_calls": cm.tab10(8) |
|||
} |
|||
|
|||
markerMap = { "md_file_create": "^", |
|||
"md_file_delete": "v", |
|||
"md_other": ".", |
|||
"md_mod": "<", |
|||
"md_read": ">", |
|||
"read_bytes": "h", |
|||
"read_calls": "H", |
|||
"write_bytes": "D", |
|||
"write_calls": "d" |
|||
} |
|||
|
|||
linestyleMap = { "md_file_create": ":", |
|||
"md_file_delete": ":", |
|||
"md_mod": ":", |
|||
"md_other": ":", |
|||
"md_read": ":", |
|||
"read_bytes": "--", |
|||
"read_calls": "--", |
|||
"write_bytes": "-.", |
|||
"write_calls": "-." |
|||
} |
|||
|
|||
# Plot the timeseries |
|||
def plot(prefix, header, row): |
|||
x = { h : d for (h, d) in zip(header, row)} |
|||
jobid = x["jobid"] |
|||
del x["jobid"] |
|||
result = [] |
|||
for k in x: |
|||
timeseries = x[k].split(":") |
|||
timeseries = [ float(x) for x in timeseries] |
|||
if sum(timeseries) == 0: |
|||
continue |
|||
timeseries = [ [k, x, s] for (s,x) in zip(timeseries, range(0, len(timeseries))) ] |
|||
result.extend(timeseries) |
|||
|
|||
if len(result) == 0: |
|||
print("Empty job! Cannot plot!") |
|||
return |
|||
|
|||
data = DataFrame(result, columns=["metrics", "segment", "value"]) |
|||
groups = data.groupby(["metrics"]) |
|||
metrics = DataFrame() |
|||
labels = [] |
|||
colors = [] |
|||
style = [] |
|||
for name, group in groups: |
|||
style.append(linestyleMap[name] + markerMap[name]) |
|||
colors.append(colorMap[name]) |
|||
if name == "md_file_delete": |
|||
name = "file_delete" |
|||
if name == "md_file_create": |
|||
name = "file_create" |
|||
try: |
|||
metrics[name] = pd.Series([x[2] for x in group.values]) |
|||
except: |
|||
print("Error processing %s with" % jobid) |
|||
print(group.values) |
|||
return |
|||
|
|||
labels.append(name) |
|||
|
|||
fsize = (8, 1 + 1.1 * len(labels)) |
|||
fsizeFixed = (8, 2) |
|||
fsizeHist = (8, 4) |
|||
|
|||
pyplot.close('all') |
|||
|
|||
if len(labels) < 4 : |
|||
ax = metrics.plot(legend=True, sharex=True, grid = True, sharey=True, markersize=10, figsize=fsizeFixed, color=colors, style=style) |
|||
ax.set_ylabel("Value") |
|||
else: |
|||
ax = metrics.plot(subplots=True, legend=False, sharex=True, grid = True, sharey=True, markersize=10, figsize=fsize, color=colors, style=style) |
|||
for (i, l) in zip(range(0, len(labels)), labels): |
|||
ax[i].set_ylabel(l) |
|||
|
|||
pyplot.xlabel("Segment number") |
|||
pyplot.savefig(prefix + "timeseries" + jobid + fileformat, bbox_inches='tight', dpi=150) |
|||
|
|||
# Create a facetted grid |
|||
#g = sns.FacetGrid(tips, col="time", margin_titles=True) |
|||
#bins = np.linspace(0, 60, 13) |
|||
#g.map(plt.hist, "total_bill", color="steelblue", bins=bins) |
|||
|
|||
ax = metrics.hist(sharex=True, grid = True, sharey=True, figsize=fsizeHist, bins=10) |
|||
pyplot.savefig(prefix + "hist" + jobid + fileformat, bbox_inches='tight', dpi=150) |
|||
|
|||
|
|||
# Plot first 30 segments |
|||
if len(timeseries) <= 50: |
|||
return |
|||
|
|||
if len(labels) < 4 : |
|||
ax = metrics.plot(legend=True, xlim=(0,30), sharex=True, grid = True, sharey=True, markersize=10, figsize=fsizeFixed, color=colors, style=style) |
|||
ax.set_ylabel("Value") |
|||
else: |
|||
ax = metrics.plot(subplots=True, xlim=(0,30), legend=False, sharex=True, grid = True, sharey=True, markersize=10, figsize=fsize, color=colors, style=style) |
|||
for (i, l) in zip(range(0, len(labels)), labels): |
|||
ax[i].set_ylabel(l) |
|||
|
|||
pyplot.xlabel("Segment number") |
|||
pyplot.savefig(prefix + "timeseries" + jobid + "-30" + fileformat, bbox_inches='tight', dpi=150) |
|||
|
|||
### end plotting function |
|||
|
|||
|
|||
|
|||
#with open('job-io-datasets/datasets/job_codings.csv') as csv_file: # EB: old codings |
|||
with open('./datasets/job_codings_v4.csv') as csv_file: # EB: v3 codings moved to this repo |
|||
csv_reader = csv.reader(csv_file, delimiter=',') |
|||
line_count = 0 |
|||
for row in csv_reader: |
|||
if line_count == 0: |
|||
header = row |
|||
line_count += 1 |
|||
continue |
|||
job = row[0].strip() |
|||
if not job in jobs: |
|||
continue |
|||
else: |
|||
index = jobs.index(job) |
|||
plot(prefix[index] + "-ks-" + str(index), header, row) |
Loading…
Reference in new issue