Fix Color map for job vis.

This commit is contained in:
Julian M. Kunkel 2020-08-19 19:01:48 +01:00
parent 8a303528ab
commit b71a0a26ef
8 changed files with 72 additions and 10 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -44,7 +44,8 @@
\usepackage{graphicx} \usepackage{graphicx}
\graphicspath{ \graphicspath{
{./pictures/} {./pictures/},
{../fig/}
} }
\usepackage[backend=bibtex, style=numeric]{biblatex} \usepackage[backend=bibtex, style=numeric]{biblatex}
@ -127,8 +128,9 @@ Check time series algorithms:
\begin{itemize} \begin{itemize}
\item bin \item bin
\item hex\_native/hex\_lev \item hex\_native
\item pm\_quant \item hex\_lev
\item hex\_quant
\end{itemize} \end{itemize}
\section{Evaluation} \section{Evaluation}
@ -136,8 +138,9 @@ Check time series algorithms:
Two study examples (two reference jobs): Two study examples (two reference jobs):
\begin{itemize} \begin{itemize}
\item jobA: shorter length, e.g. 5-10, that has a little bit IO in at least two metadata metrics (more better). \item job-short: shorter length, e.g. 5-10, that has a little bit IO in at least two metadata metrics (more better).
\item jobB: a very IO intensive longer job, e.g., length $>$ 20, with IO read or write and maybe one other metrics. \item job-mixed:
\item job-long: a very IO intensive longer job, e.g., length $>$ 20, with IO read or write and maybe one other metrics.
\end{itemize} \end{itemize}
For each reference job: create CSV file which contains all jobs with: For each reference job: create CSV file which contains all jobs with:
@ -151,6 +154,35 @@ Insert job profiles for closest 10 jobs.
Potentially, analyze how the rankings of different similarities look like. Potentially, analyze how the rankings of different similarities look like.
\Cref{fig:refJobs}
\begin{figure}
\begin{subfigure}{0.8\textwidth}
\includegraphics[width=\textwidth]{job-timeseries4296426}
\caption{Job-S} \label{fig:job-S}
\end{subfigure}
\caption{Reference jobs: timeline of mean IO activity}
\label{fig:refJobs}
\end{figure}
\begin{figure}\ContinuedFloat
\begin{subfigure}{0.8\textwidth}
\includegraphics[width=\textwidth]{job-timeseries5024292}
\caption{Job-M} \label{fig:job-M}
\end{subfigure}
\begin{subfigure}{0.8\textwidth}
\includegraphics[width=\textwidth]{job-timeseries7488914-30.pdf}
\caption{Job-L (first 30 segments of 400; remaining segments are similar)}
\label{fig:job-L}
\end{subfigure}
\caption{Reference jobs: timeline of mean IO activity; non-shown timelines are 0}
\end{figure}
\section{Summary and Conclusion} \section{Summary and Conclusion}
\label{sec:summary} \label{sec:summary}

13
scripts/create-paper-vis.sh Executable file
View File

@ -0,0 +1,13 @@
#!/bin/bash
# This script calls all other scripts to re-create the figures for the paper
mkdir fig
for job in 5024292 4296426 7488914 ; do
./scripts/plot-single-job.py $job "fig/job-"
done
for file in fig/*.pdf ; do
pdfcrop $file output.pdf
mv output.pdf $file
done

View File

@ -5,12 +5,25 @@ import sys
from pandas import DataFrame from pandas import DataFrame
from pandas import Grouper from pandas import Grouper
from matplotlib import pyplot from matplotlib import pyplot
import matplotlib.cm as cm
jobs = [sys.argv[1]] jobs = [sys.argv[1]]
prefix = sys.argv[2] prefix = sys.argv[2]
print("Plotting the job: " + str(jobs)) print("Plotting the job: " + str(jobs))
# Color map
colorMap = { "md_file_create": cm.tab10(0),
"md_file_delete": cm.tab10(1),
"md_mod": cm.tab10(2),
"md_other": cm.tab10(3),
"md_read": cm.tab10(4),
"read_bytes": cm.tab10(5),
"read_calls": cm.tab10(6),
"write_bytes": cm.tab10(7),
"write_calls": cm.tab10(8)
}
# Plot the timeseries # Plot the timeseries
def plot(prefix, header, row): def plot(prefix, header, row):
x = { h : d for (h, d) in zip(header, row)} x = { h : d for (h, d) in zip(header, row)}
@ -36,27 +49,31 @@ def plot(prefix, header, row):
groups = data.groupby(["metrics"]) groups = data.groupby(["metrics"])
metrics = DataFrame() metrics = DataFrame()
labels = [] labels = []
colors = []
for name, group in groups: for name, group in groups:
metrics[name] = [x[2] for x in group.values] metrics[name] = [x[2] for x in group.values]
labels.append(name) labels.append(name)
colors.append(colorMap[name])
ax = metrics.plot(subplots=True, legend=False, sharex=True, grid = True, sharey=True, colormap='jet', marker='.', markersize=10, figsize=(8, 2 + 2 * len(labels))) fsize = (8, 1 + 1.5 * len(labels))
ax = metrics.plot(subplots=True, legend=False, sharex=True, grid = True, sharey=True, marker='.', markersize=10, figsize=fsize, color=colors)
for (i, l) in zip(range(0, len(labels)), labels): for (i, l) in zip(range(0, len(labels)), labels):
ax[i].set_ylabel(l) ax[i].set_ylabel(l)
pyplot.xlabel("Segment number") pyplot.xlabel("Segment number")
pyplot.savefig(prefix + "timeseries" + jobid + ".png") pyplot.savefig(prefix + "timeseries" + jobid + ".pdf")
# Plot first 30 segments # Plot first 30 segments
if len(timeseries) <= 50: if len(timeseries) <= 50:
return return
ax = metrics.plot(subplots=True, legend=False, sharex=True, grid = True, sharey=True, colormap='jet', marker='.', markersize=10, xlim=(0,30)) ax = metrics.plot(subplots=True, legend=False, sharex=True, grid = True, sharey=True, marker='.', color=colors, markersize=10, xlim=(0,30), figsize=fsize)
for (i, l) in zip(range(0, len(labels)), labels): for (i, l) in zip(range(0, len(labels)), labels):
ax[i].set_ylabel(l) ax[i].set_ylabel(l)
pyplot.xlabel("Segment number") pyplot.xlabel("Segment number")
pyplot.savefig(prefix + "timeseries" + jobid + "-30.png") pyplot.savefig(prefix + "timeseries" + jobid + "-30.pdf")
### end plotting function ### end plotting function

View File

@ -34,7 +34,7 @@ print(summary(e))
ggsave("ecdf-0.5.png") ggsave("ecdf-0.5.png")
# histogram for the jobs # histogram for the jobs
ggplot(data, aes(similarity), group=alg_name) + geom_histogram(color="black", binwidth=0.025) + aes(fill = alg_name) + facet_grid(alg_name ~ ., switch = 'y') + scale_y_continuous(limits=c(0, 100), oob=squish) + scale_color_brewer(palette = "Set2") + ylab("Count (cropped at 100)") ggplot(data, aes(similarity), group=alg_name) + geom_histogram(color="black", binwidth=0.025) + aes(fill = alg_name) + facet_grid(alg_name ~ ., switch = 'y') + scale_y_continuous(limits=c(0, 100), oob=squish) + scale_color_brewer(palette = "Set2") + ylab("Count (cropped at 100)") + theme(legend.position = "none")
ggsave("hist-sim.png") ggsave("hist-sim.png")
# load job information, i.e., the time series per job # load job information, i.e., the time series per job