#!/usr/bin/env python3 import csv import sys from pandas import DataFrame from pandas import Grouper from matplotlib import pyplot jobs = [sys.argv[1]] prefix = sys.argv[2] print("Plotting the job: " + str(jobs)) # Plot the timeseries def plot(prefix, header, row): x = { h : d for (h, d) in zip(header, row)} jobid = x["jobid"] del x["jobid"] del x["coding_abs"] del x["coding_abs_aggzeros"] result = [] for k in x: timeseries = x[k].split(":") timeseries = [ float(x) for x in timeseries] if sum(timeseries) == 0: continue timeseries = [ [k, x, s] for (s,x) in zip(timeseries, range(1, len(timeseries))) ] result.extend(timeseries) if len(result) == 0: print("Empty job! Cannot plot!") return data = DataFrame(result, columns=["metrics", "segment", "value"]) groups = data.groupby(["metrics"]) metrics = DataFrame() labels = [] for name, group in groups: metrics[name] = [x[2] for x in group.values] labels.append(name) ax = metrics.plot(subplots=True, legend=False, sharex=True, grid = True, sharey=True, colormap='jet', marker='.', markersize=10, figsize=(8, 2 + 2 * len(labels))) for (i, l) in zip(range(0, len(labels)), labels): ax[i].set_ylabel(l) pyplot.xlabel("Segment number") pyplot.savefig(prefix + "timeseries" + jobid + ".png") # Plot first 30 segments if len(timeseries) <= 50: return ax = metrics.plot(subplots=True, legend=False, sharex=True, grid = True, sharey=True, colormap='jet', marker='.', markersize=10, xlim=(0,30)) for (i, l) in zip(range(0, len(labels)), labels): ax[i].set_ylabel(l) pyplot.xlabel("Segment number") pyplot.savefig(prefix + "timeseries" + jobid + "-30.png") ### end plotting function with open('job-io-datasets/datasets/job_codings.csv') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') line_count = 0 for row in csv_reader: if line_count == 0: header = row line_count += 1 continue if not row[0].strip() in jobs: continue else: plot(prefix, header, row)