mistral-io-datasets/scripts/plot-job-timelines-ks.py

155 lines
4.3 KiB
Python
Executable File

#!/usr/bin/env python3
import csv
import sys
import pandas as pd
from pandas import DataFrame
from pandas import Grouper
import seaborn as sns
from matplotlib import pyplot
import matplotlib.cm as cm
jobs = sys.argv[1].split(",")
prefix = sys.argv[2].split(",")
fileformat = ".pdf"
print("Plotting the job: " + str(sys.argv[1]))
print("Plotting with prefix: " + str(sys.argv[2]))
# Color map
colorMap = { "md_file_create": cm.tab10(0),
"md_file_delete": cm.tab10(1),
"md_mod": cm.tab10(2),
"md_other": cm.tab10(3),
"md_read": cm.tab10(4),
"read_bytes": cm.tab10(5),
"read_calls": cm.tab10(6),
"write_bytes": cm.tab10(7),
"write_calls": cm.tab10(8)
}
markerMap = { "md_file_create": "^",
"md_file_delete": "v",
"md_other": ".",
"md_mod": "<",
"md_read": ">",
"read_bytes": "h",
"read_calls": "H",
"write_bytes": "D",
"write_calls": "d"
}
linestyleMap = { "md_file_create": ":",
"md_file_delete": ":",
"md_mod": ":",
"md_other": ":",
"md_read": ":",
"read_bytes": "--",
"read_calls": "--",
"write_bytes": "-.",
"write_calls": "-."
}
# Plot the timeseries
def plot(prefix, header, row):
x = { h : d for (h, d) in zip(header, row)}
jobid = x["jobid"]
del x["jobid"]
result = []
for k in x:
timeseries = x[k].split(":")
timeseries = [ float(x) for x in timeseries]
if sum(timeseries) == 0:
continue
timeseries = [ [k, x, s] for (s,x) in zip(timeseries, range(0, len(timeseries))) ]
result.extend(timeseries)
if len(result) == 0:
print("Empty job! Cannot plot!")
return
data = DataFrame(result, columns=["metrics", "segment", "value"])
groups = data.groupby(["metrics"])
metrics = DataFrame()
labels = []
colors = []
style = []
for name, group in groups:
style.append(linestyleMap[name] + markerMap[name])
colors.append(colorMap[name])
if name == "md_file_delete":
name = "file_delete"
if name == "md_file_create":
name = "file_create"
try:
metrics[name] = pd.Series([x[2] for x in group.values])
except:
print("Error processing %s with" % jobid)
print(group.values)
return
labels.append(name)
fsize = (8, 1 + 1.1 * len(labels))
fsizeFixed = (8, 2)
fsizeHist = (8, 6.5)
pyplot.close('all')
if len(labels) < 4 :
ax = metrics.plot(legend=True, sharex=True, grid = True, sharey=True, markersize=10, figsize=fsizeFixed, color=colors, style=style)
ax.set_ylabel("Value")
else:
ax = metrics.plot(subplots=True, legend=False, sharex=True, grid = True, sharey=True, markersize=10, figsize=fsize, color=colors, style=style)
for (i, l) in zip(range(0, len(labels)), labels):
ax[i].set_ylabel(l)
pyplot.xlabel("Segment number")
pyplot.savefig(prefix + "timeseries" + jobid + fileformat, bbox_inches='tight', dpi=150)
# Create a facetted grid
#g = sns.FacetGrid(tips, col="time", margin_titles=True)
#bins = np.linspace(0, 60, 13)
#g.map(plt.hist, "total_bill", color="steelblue", bins=bins)
ax = metrics.hist(grid = True, sharey=True, figsize=fsizeHist, bins=15, range=(0, 15))
pyplot.xlim(0, 15)
pyplot.savefig(prefix + "hist" + jobid + fileformat, bbox_inches='tight', dpi=150)
# Plot first 30 segments
if len(timeseries) <= 50:
return
if len(labels) < 4 :
ax = metrics.plot(legend=True, xlim=(0,30), sharex=True, grid = True, sharey=True, markersize=10, figsize=fsizeFixed, color=colors, style=style)
ax.set_ylabel("Value")
else:
ax = metrics.plot(subplots=True, xlim=(0,30), legend=False, sharex=True, grid = True, sharey=True, markersize=10, figsize=fsize, color=colors, style=style)
for (i, l) in zip(range(0, len(labels)), labels):
ax[i].set_ylabel(l)
pyplot.xlabel("Segment number")
pyplot.savefig(prefix + "timeseries" + jobid + "-30" + fileformat, bbox_inches='tight', dpi=150)
### end plotting function
#with open('job-io-datasets/datasets/job_codings.csv') as csv_file: # EB: old codings
with open('./datasets/job_codings_v4.csv') as csv_file: # EB: v3 codings moved to this repo
csv_reader = csv.reader(csv_file, delimiter=',')
line_count = 0
for row in csv_reader:
if line_count == 0:
header = row
line_count += 1
continue
job = row[0].strip()
if not job in jobs:
continue
else:
index = jobs.index(job)
plot(prefix[index] + "-ks-" + str(index), header, row)