Merge branch 'master' of http://git.hps.vi4io.org/eugen.betke/mistral-io-datasets into master

2020-09-03 13:46:11 +02:00 · 2020-09-03 13:46:11 +02:00 · 65f8cdb98d
commit 65f8cdb98d
parent ef0412b41b b6cdcff1a0
3 changed files with 149 additions and 2 deletions
--- a/datasets/job_codings_v4.csv
+++ b/datasets/job_codings_v4.csv
--- a/scripts/plot-single-job.py
+++ b/scripts/plot-single-job.py
@ -10,7 +10,7 @@ import matplotlib.cm as cm
 jobs = sys.argv[1].split(",")
 prefix = sys.argv[2].split(",")
-fileformat = ".pdf"
+fileformat = ".png"
 print("Plotting the job: " + str(sys.argv[1]))
 print("Plotting with prefix: " + str(sys.argv[2]))
@ -83,7 +83,7 @@ def plot(prefix, header, row):
    timeseries = [ float(x) for x in timeseries]
    if sum(timeseries) == 0:
      continue
-    timeseries = [ [k, x, s] for (s,x) in zip(timeseries, range(1, len(timeseries))) ]
+    timeseries = [ [k, x, s] for (s,x) in zip(timeseries, range(0, len(timeseries))) ]
    result.extend(timeseries)
  if len(result) == 0:
--- a/scripts/plot-single-ks-jobs.py
+++ b/scripts/plot-single-ks-jobs.py
@ -0,0 +1,147 @@
 #!/usr/bin/env python3
 import csv
 import sys
 from pandas import DataFrame
 from pandas import Grouper
 import seaborn as sns
 from matplotlib import pyplot
 import matplotlib.cm as cm
 jobs = sys.argv[1].split(",")
 prefix = sys.argv[2].split(",")
 fileformat = ".png"
 print("Plotting the job: " + str(sys.argv[1]))
 print("Plotting with prefix: " + str(sys.argv[2]))
 # Color map
 colorMap = { "md_file_create": cm.tab10(0),
 "md_file_delete": cm.tab10(1),
 "md_mod": cm.tab10(2),
 "md_other": cm.tab10(3),
 "md_read": cm.tab10(4),
 "read_bytes": cm.tab10(5),
 "read_calls": cm.tab10(6),
 "write_bytes": cm.tab10(7),
 "write_calls": cm.tab10(8)
 }
 markerMap = { "md_file_create": "^",
 "md_file_delete": "v",
 "md_other": ".",
 "md_mod": "<",
 "md_read": ">",
 "read_bytes": "h",
 "read_calls": "H",
 "write_bytes": "D",
 "write_calls": "d"
 }
 linestyleMap = { "md_file_create": ":",
 "md_file_delete": ":",
 "md_mod": ":",
 "md_other": ":",
 "md_read": ":",
 "read_bytes": "--",
 "read_calls": "--",
 "write_bytes": "-.",
 "write_calls": "-."
 }
 # Plot the timeseries
 def plot(prefix, header, row):
  x = { h : d for (h, d) in zip(header, row)}
  jobid = x["jobid"]
  del x["jobid"]
  result = []
  for k in x:
    timeseries = x[k].split(":")
    timeseries = [ float(x) for x in timeseries]
    if sum(timeseries) == 0:
      continue
    timeseries = [ [k, x, s] for (s,x) in zip(timeseries, range(0, len(timeseries))) ]
    result.extend(timeseries)
  if len(result) == 0:
    print("Empty job! Cannot plot!")
    return
  data = DataFrame(result, columns=["metrics", "segment", "value"])
  groups = data.groupby(["metrics"])
  metrics = DataFrame()
  labels = []
  colors = []
  style = []
  for name, group in groups:
    style.append(linestyleMap[name] + markerMap[name])
    colors.append(colorMap[name])
    if name == "md_file_delete":
      name = "file_delete"
    if name == "md_file_create":
      name = "file_create"
    metrics[name] = [x[2] for x in group.values]
    labels.append(name)
  fsize = (8, 1 + 1.1 * len(labels))
  fsizeFixed = (8, 2)
  fsizeHist = (8, 4)
  pyplot.close('all')
  if len(labels) < 4 :
    ax = metrics.plot(legend=True, sharex=True, grid = True,  sharey=True, markersize=10, figsize=fsizeFixed, color=colors, style=style)
    ax.set_ylabel("Value")
  else:
    ax = metrics.plot(subplots=True, legend=False, sharex=True, grid = True,  sharey=True, markersize=10, figsize=fsize, color=colors, style=style)
    for (i, l) in zip(range(0, len(labels)), labels):
      ax[i].set_ylabel(l)
  pyplot.xlabel("Segment number")
  pyplot.savefig(prefix + "timeseries" + jobid + fileformat, bbox_inches='tight', dpi=150)
  # Create a facetted grid
  #g = sns.FacetGrid(tips, col="time", margin_titles=True)
  #bins = np.linspace(0, 60, 13)
  #g.map(plt.hist, "total_bill", color="steelblue", bins=bins)
  ax = metrics.hist(sharex=True, grid = True, sharey=True, figsize=fsizeHist, bins=10)
  pyplot.savefig(prefix + "hist" + jobid + fileformat, bbox_inches='tight', dpi=150)
  # Plot first 30 segments
  if len(timeseries) <= 50:
    return
  if len(labels) < 4 :
    ax = metrics.plot(legend=True, xlim=(0,30), sharex=True, grid = True,  sharey=True, markersize=10, figsize=fsizeFixed, color=colors, style=style)
    ax.set_ylabel("Value")
  else:
    ax = metrics.plot(subplots=True, xlim=(0,30), legend=False, sharex=True, grid = True,  sharey=True, markersize=10, figsize=fsize, color=colors, style=style)
    for (i, l) in zip(range(0, len(labels)), labels):
      ax[i].set_ylabel(l)
  pyplot.xlabel("Segment number")
  pyplot.savefig(prefix + "timeseries" + jobid + "-30" + fileformat, bbox_inches='tight', dpi=150)
 ### end plotting function
 #with open('job-io-datasets/datasets/job_codings.csv') as csv_file: # EB: old codings
 with open('./datasets/job_codings_v4.csv') as csv_file: # EB: v3 codings moved to this repo
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
      if line_count == 0:
        header = row
        line_count += 1
        continue
      job = row[0].strip()
      if not job in jobs:
        continue
      else:
        index = jobs.index(job)
        plot(prefix[index] + "-ks-" + str(index), header, row)