Merge branch 'master' of http://git.hps.vi4io.org/eugen.betke/mistral-io-datasets

2020-08-18 17:41:24 +02:00 · 2020-08-18 17:41:24 +02:00 · 2c1cbfc089
commit 2c1cbfc089
parent f894acc193 c56403a1e4
2 changed files with 85 additions and 0 deletions
--- a/scripts/plot-single-job.py
+++ b/scripts/plot-single-job.py
@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+
+import csv
+import sys
+from pandas import DataFrame
+from pandas import Grouper
+from matplotlib import pyplot
+
+jobs = [sys.argv[1]]
+prefix = sys.argv[2]
+
+print("Plotting the job: " + str(jobs))
+
+# Plot the timeseries
+def plot(prefix, header, row):
+  x = { h : d for (h, d) in zip(header, row)}
+  jobid = x["jobid"]
+  del x["jobid"]
+  del x["coding_abs"]
+  del x["coding_abs_aggzeros"]
+
+  result = []
+  for k in x:
+    timeseries = x[k].split(":")
+    timeseries = [ float(x) for x in timeseries]
+    if sum(timeseries) == 0:
+      continue
+    timeseries = [ [k, x, s] for (s,x) in zip(timeseries, range(1, len(timeseries))) ]
+    result.extend(timeseries)
+
+  if len(result) == 0:
+    print("Empty job! Cannot plot!")
+    return
+
+  data = DataFrame(result, columns=["metrics", "segment", "value"])
+  groups = data.groupby(["metrics"])
+  metrics = DataFrame()
+  labels = []
+  for name, group in groups:
+    metrics[name] = [x[2] for x in group.values]
+    labels.append(name)
+
+  ax = metrics.plot(subplots=True, legend=False, sharex=True, grid = True,  sharey=True, colormap='jet', marker='.', markersize=10, figsize=(8, 2 + 2 * len(labels)))
+  for (i, l) in zip(range(0, len(labels)), labels):
+    ax[i].set_ylabel(l)
+
+  pyplot.xlabel("Segment number")
+  pyplot.savefig(prefix + "timeseries" + jobid + ".png")
+
+  # Plot first 30 segments
+  if len(timeseries) <= 50:
+    return
+
+  ax = metrics.plot(subplots=True, legend=False, sharex=True, grid = True,  sharey=True, colormap='jet', marker='.', markersize=10, xlim=(0,30))
+  for (i, l) in zip(range(0, len(labels)), labels):
+    ax[i].set_ylabel(l)
+
+  pyplot.xlabel("Segment number")
+  pyplot.savefig(prefix + "timeseries" + jobid + "-30.png")
+
+### end plotting function
+
+
+
+with open('job-io-datasets/datasets/job_codings.csv') as csv_file:
+    csv_reader = csv.reader(csv_file, delimiter=',')
+    line_count = 0
+    for row in csv_reader:
+      if line_count == 0:
+        header = row
+        line_count += 1
+        continue
+
+      if not row[0].strip() in jobs:
+        continue
+      else:
+        plot(prefix, header, row)
--- a/scripts/plot.R
+++ b/scripts/plot.R
@ -50,6 +50,14 @@ plotJobs = function(jobs){
    #print(tbl)
    md = metadata[metadata$jobid %in% jobs,]
    print(summary(md))
+
+    # print the job timeline
+    r = e[ordered, ]
+    for (row in 1:length(jobs)) {
+      prefix = sprintf("%s-%f-%.0f-", level, r[row, "similarity"], row)
+      job = r[row, "jobid"]
+      system(sprintf("scripts/plot-single-job.py %s %s", job, prefix))
+    }
  }

 # Store the job ids in a table, each column is one algorithm