Fix Color map for job vis.

2020-08-19 19:01:48 +01:00 · 2020-08-19 19:01:48 +01:00 · b71a0a26ef
commit b71a0a26ef
parent 8a303528ab
8 changed files with 72 additions and 10 deletions
--- a/fig/job-timeseries4296426.pdf
+++ b/fig/job-timeseries4296426.pdf
--- a/fig/job-timeseries5024292.pdf
+++ b/fig/job-timeseries5024292.pdf
--- a/fig/job-timeseries7488914-30.pdf
+++ b/fig/job-timeseries7488914-30.pdf
--- a/fig/job-timeseries7488914.pdf
+++ b/fig/job-timeseries7488914.pdf
--- a/paper/main.tex
+++ b/paper/main.tex
@ -44,7 +44,8 @@

 \usepackage{graphicx}
 \graphicspath{
-	{./pictures/}
+	{./pictures/},
+  {../fig/}
 }

 \usepackage[backend=bibtex, style=numeric]{biblatex}
@ -127,8 +128,9 @@ Check time series algorithms:

 \begin{itemize}
 	\item bin
-	\item hex\_native/hex\_lev
-	\item pm\_quant
+	\item hex\_native
+  \item hex\_lev
+	\item hex\_quant
 \end{itemize}

 \section{Evaluation}
@ -136,8 +138,9 @@ Check time series algorithms:

 Two study examples (two reference jobs):
 \begin{itemize}
-	\item jobA: shorter length, e.g. 5-10, that has a little bit IO in at least two metadata metrics (more better).
-	\item jobB: a very IO intensive longer job, e.g., length $>$ 20, with IO read or write and maybe one other metrics.
+	\item job-short: shorter length, e.g. 5-10, that has a little bit IO in at least two metadata metrics (more better).
+  \item job-mixed:
+	\item job-long: a very IO intensive longer job, e.g., length $>$ 20, with IO read or write and maybe one other metrics.
 \end{itemize}

 For each reference job: create CSV file which contains all jobs with:
@ -151,6 +154,35 @@ Insert job profiles for closest 10 jobs.

 Potentially, analyze how the rankings of different similarities look like.

+\Cref{fig:refJobs}
+
+\begin{figure}
+\begin{subfigure}{0.8\textwidth}
+\includegraphics[width=\textwidth]{job-timeseries4296426}
+\caption{Job-S} \label{fig:job-S}
+\end{subfigure}
+
+\caption{Reference jobs: timeline of mean IO activity}
+\label{fig:refJobs}
+\end{figure}
+
+
+\begin{figure}\ContinuedFloat
+
+\begin{subfigure}{0.8\textwidth}
+\includegraphics[width=\textwidth]{job-timeseries5024292}
+\caption{Job-M} \label{fig:job-M}
+\end{subfigure}
+
+\begin{subfigure}{0.8\textwidth}
+\includegraphics[width=\textwidth]{job-timeseries7488914-30.pdf}
+\caption{Job-L (first 30 segments of 400; remaining segments are similar)}
+\label{fig:job-L}
+\end{subfigure}
+\caption{Reference jobs: timeline of mean IO activity; non-shown timelines are 0}
+\end{figure}
+
+
 \section{Summary and Conclusion}
 \label{sec:summary}

--- a/scripts/create-paper-vis.sh
+++ b/scripts/create-paper-vis.sh
@ -0,0 +1,13 @@
+#!/bin/bash
+
+# This script calls all other scripts to re-create the figures for the paper
+
+mkdir fig
+for job in 5024292 4296426 7488914 ; do
+./scripts/plot-single-job.py $job "fig/job-"
+done
+
+for file in fig/*.pdf ; do
+  pdfcrop $file output.pdf
+  mv output.pdf $file
+done
--- a/scripts/plot-single-job.py
+++ b/scripts/plot-single-job.py
@ -5,12 +5,25 @@ import sys
 from pandas import DataFrame
 from pandas import Grouper
 from matplotlib import pyplot
+import matplotlib.cm as cm

 jobs = [sys.argv[1]]
 prefix = sys.argv[2]

 print("Plotting the job: " + str(jobs))

+# Color map
+colorMap = { "md_file_create": cm.tab10(0),
+"md_file_delete": cm.tab10(1),
+"md_mod": cm.tab10(2),
+"md_other": cm.tab10(3),
+"md_read": cm.tab10(4),
+"read_bytes": cm.tab10(5),
+"read_calls": cm.tab10(6),
+"write_bytes": cm.tab10(7),
+"write_calls": cm.tab10(8)
+}
+
 # Plot the timeseries
 def plot(prefix, header, row):
  x = { h : d for (h, d) in zip(header, row)}
@ -36,27 +49,31 @@ def plot(prefix, header, row):
  groups = data.groupby(["metrics"])
  metrics = DataFrame()
  labels = []
+  colors = []
  for name, group in groups:
    metrics[name] = [x[2] for x in group.values]
    labels.append(name)
+    colors.append(colorMap[name])

-  ax = metrics.plot(subplots=True, legend=False, sharex=True, grid = True,  sharey=True, colormap='jet', marker='.', markersize=10, figsize=(8, 2 + 2 * len(labels)))
+  fsize = (8, 1 + 1.5 * len(labels))
+
+  ax = metrics.plot(subplots=True, legend=False, sharex=True, grid = True,  sharey=True, marker='.', markersize=10, figsize=fsize, color=colors)
  for (i, l) in zip(range(0, len(labels)), labels):
    ax[i].set_ylabel(l)

  pyplot.xlabel("Segment number")
-  pyplot.savefig(prefix + "timeseries" + jobid + ".png")
+  pyplot.savefig(prefix + "timeseries" + jobid + ".pdf")

  # Plot first 30 segments
  if len(timeseries) <= 50:
    return

-  ax = metrics.plot(subplots=True, legend=False, sharex=True, grid = True,  sharey=True, colormap='jet', marker='.', markersize=10, xlim=(0,30))
+  ax = metrics.plot(subplots=True, legend=False, sharex=True, grid = True,  sharey=True, marker='.', color=colors, markersize=10, xlim=(0,30), figsize=fsize)
  for (i, l) in zip(range(0, len(labels)), labels):
    ax[i].set_ylabel(l)

  pyplot.xlabel("Segment number")
-  pyplot.savefig(prefix + "timeseries" + jobid + "-30.png")
+  pyplot.savefig(prefix + "timeseries" + jobid + "-30.pdf")

 ### end plotting function

--- a/scripts/plot.R
+++ b/scripts/plot.R
@ -34,7 +34,7 @@ print(summary(e))
 ggsave("ecdf-0.5.png")

 # histogram for the jobs
-ggplot(data, aes(similarity), group=alg_name) + geom_histogram(color="black", binwidth=0.025) + aes(fill = alg_name) + facet_grid(alg_name ~ ., switch = 'y') + scale_y_continuous(limits=c(0, 100), oob=squish)  +   scale_color_brewer(palette = "Set2") + ylab("Count (cropped at 100)")
+ggplot(data, aes(similarity), group=alg_name) + geom_histogram(color="black", binwidth=0.025) + aes(fill = alg_name) + facet_grid(alg_name ~ ., switch = 'y') + scale_y_continuous(limits=c(0, 100), oob=squish)  +   scale_color_brewer(palette = "Set2") + ylab("Count (cropped at 100)") + theme(legend.position = "none")
 ggsave("hist-sim.png")

 # load job information, i.e., the time series per job