diff --git a/fig/job-timeseries4296426.pdf b/fig/job-timeseries4296426.pdf
new file mode 100644
index 0000000..1fa9c71
Binary files /dev/null and b/fig/job-timeseries4296426.pdf differ
diff --git a/fig/job-timeseries5024292.pdf b/fig/job-timeseries5024292.pdf
new file mode 100644
index 0000000..1c1a0bf
Binary files /dev/null and b/fig/job-timeseries5024292.pdf differ
diff --git a/fig/job-timeseries7488914-30.pdf b/fig/job-timeseries7488914-30.pdf
new file mode 100644
index 0000000..629d16d
Binary files /dev/null and b/fig/job-timeseries7488914-30.pdf differ
diff --git a/fig/job-timeseries7488914.pdf b/fig/job-timeseries7488914.pdf
new file mode 100644
index 0000000..b370766
Binary files /dev/null and b/fig/job-timeseries7488914.pdf differ
diff --git a/paper/main.tex b/paper/main.tex
index 1bfd25f..c8a0838 100644
--- a/paper/main.tex
+++ b/paper/main.tex
@@ -44,7 +44,8 @@
 
 \usepackage{graphicx}
 \graphicspath{
-	{./pictures/}
+	{./pictures/},
+  {../fig/}
 }
 
 \usepackage[backend=bibtex, style=numeric]{biblatex}
@@ -127,30 +128,62 @@ Check time series algorithms:
 
 \begin{itemize}
 	\item bin
-	\item hex\_native/hex\_lev
-	\item pm\_quant
+	\item hex\_native
+  \item hex\_lev
+	\item hex\_quant
 \end{itemize}
 
 \section{Evaluation}
 \label{sec:evaluation}
 
-Two study examples (two reference jobs):
+In the following, we assume a job is given and we aim to identify similar jobs.
+We chose several reference jobs with different compute and IO characteristics visualized in \Cref{fig:refJobs}:
 \begin{itemize}
-	\item jobA: shorter length, e.g. 5-10, that has a little bit IO in at least two metadata metrics (more better).
-	\item jobB: a very IO intensive longer job, e.g., length $>$ 20, with IO read or write and maybe one other metrics.
+	\item Job-S: performs postprocessing on a single node. This is a typical process in climate science where data products are reformatted and annotated with metadata to a standard representation (so called CMORization). The post-processing is IO intensive.
+  \item Job-M: a typical MPI parallel 8-hour compute job on 128 nodes which writes time series data after some spin up.   %CHE.ws12
+	\item Job-L: a 66-hour 20-node job.
+  The initialization data is read at the beginning.
+  Then only a single master node writes constantly a small volume of data; in fact, the generated data is too small to be categorized as IO relevant.
 \end{itemize}
 
-For each reference job: create CSV file which contains all jobs with:
-\begin{itemize}
-	\item JOB ID, for each algorithm: the coding and the computed ranking $\rightarrow$ thus one long row.
-\end{itemize}
-Alternatively, could be one CSV for each algorithm that contains JOB ID, coding + rank
+For each reference job and algorithm, we created a CSV files with the computed similarity for all other jobs.
+
+
+Sollte man was zur Laufzeit der Algorithmen sagen? Denke Daten zu haben wäre sinnvoll.
 
 Create histograms + cumulative job distribution for all algorithms.
 Insert job profiles for closest 10 jobs.
 
 Potentially, analyze how the rankings of different similarities look like.
 
+
+\begin{figure}
+\begin{subfigure}{0.8\textwidth}
+\includegraphics[width=\textwidth]{job-timeseries4296426}
+\caption{Job-S} \label{fig:job-S}
+\end{subfigure}
+
+\caption{Reference jobs: timeline of mean IO activity}
+\label{fig:refJobs}
+\end{figure}
+
+
+\begin{figure}\ContinuedFloat
+
+\begin{subfigure}{0.8\textwidth}
+\includegraphics[width=\textwidth]{job-timeseries5024292}
+\caption{Job-M} \label{fig:job-M}
+\end{subfigure}
+
+\begin{subfigure}{0.8\textwidth}
+\includegraphics[width=\textwidth]{job-timeseries7488914-30.pdf}
+\caption{Job-L (first 30 segments of 400; remaining segments are similar)}
+\label{fig:job-L}
+\end{subfigure}
+\caption{Reference jobs: timeline of mean IO activity; non-shown timelines are 0}
+\end{figure}
+
+
 \section{Summary and Conclusion}
 \label{sec:summary}
 
diff --git a/scripts/create-paper-vis.sh b/scripts/create-paper-vis.sh
new file mode 100755
index 0000000..db9e1c7
--- /dev/null
+++ b/scripts/create-paper-vis.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+# This script calls all other scripts to re-create the figures for the paper
+
+mkdir fig
+for job in 5024292 4296426 7488914 ; do
+./scripts/plot-single-job.py $job "fig/job-"
+done
+
+# Remove whitespace around jobs
+# for file in fig/*.pdf ; do
+#   pdfcrop $file output.pdf
+#   mv output.pdf $file
+# done
diff --git a/scripts/plot-single-job.py b/scripts/plot-single-job.py
index 426f59e..60ce6c0 100755
--- a/scripts/plot-single-job.py
+++ b/scripts/plot-single-job.py
@@ -5,12 +5,47 @@ import sys
 from pandas import DataFrame
 from pandas import Grouper
 from matplotlib import pyplot
+import matplotlib.cm as cm
 
-jobs = [sys.argv[1]]
-prefix = sys.argv[2]
+jobs = sys.argv[1].split(",")
+prefix = sys.argv[2].split(",")
 
 print("Plotting the job: " + str(jobs))
 
+# Color map
+colorMap = { "md_file_create": cm.tab10(0),
+"md_file_delete": cm.tab10(1),
+"md_mod": cm.tab10(2),
+"md_other": cm.tab10(3),
+"md_read": cm.tab10(4),
+"read_bytes": cm.tab10(5),
+"read_calls": cm.tab10(6),
+"write_bytes": cm.tab10(7),
+"write_calls": cm.tab10(8)
+}
+
+markerMap = { "md_file_create": "^",
+"md_file_delete": "v",
+"md_other": ".",
+"md_mod": "<",
+"md_read": ">",
+"read_bytes": "h",
+"read_calls": "H",
+"write_bytes": "D",
+"write_calls": "d"
+}
+
+linestyleMap = { "md_file_create": ":",
+"md_file_delete": ":",
+"md_mod": ":",
+"md_other": ":",
+"md_read": ":",
+"read_bytes": "--",
+"read_calls": "--",
+"write_bytes": "-.",
+"write_calls": "-."
+}
+
 # Plot the timeseries
 def plot(prefix, header, row):
   x = { h : d for (h, d) in zip(header, row)}
@@ -36,27 +71,45 @@ def plot(prefix, header, row):
   groups = data.groupby(["metrics"])
   metrics = DataFrame()
   labels = []
+  colors = []
+  style = []
   for name, group in groups:
     metrics[name] = [x[2] for x in group.values]
     labels.append(name)
+    style.append(linestyleMap[name] + markerMap[name])
+    colors.append(colorMap[name])
 
-  ax = metrics.plot(subplots=True, legend=False, sharex=True, grid = True,  sharey=True, colormap='jet', marker='.', markersize=10, figsize=(8, 2 + 2 * len(labels)))
-  for (i, l) in zip(range(0, len(labels)), labels):
-    ax[i].set_ylabel(l)
+  fsize = (8, 1 + 1.5 * len(labels))
+  fsizeFixed = (8, 2)
+  
+  pyplot.close('all')
+
+  if len(labels) < 4 :
+    ax = metrics.plot(legend=True, sharex=True, grid = True,  sharey=True, markersize=10, figsize=fsizeFixed, color=colors, style=style)
+    ax.set_ylabel("Value")
+  else:
+    ax = metrics.plot(subplots=True, legend=False, sharex=True, grid = True,  sharey=True, markersize=10, figsize=fsize, color=colors, style=style)
+    for (i, l) in zip(range(0, len(labels)), labels):
+      ax[i].set_ylabel(l)
 
   pyplot.xlabel("Segment number")
-  pyplot.savefig(prefix + "timeseries" + jobid + ".png")
+  pyplot.savefig(prefix + "timeseries" + jobid + ".pdf", bbox_inches='tight')
 
   # Plot first 30 segments
   if len(timeseries) <= 50:
     return
 
-  ax = metrics.plot(subplots=True, legend=False, sharex=True, grid = True,  sharey=True, colormap='jet', marker='.', markersize=10, xlim=(0,30))
-  for (i, l) in zip(range(0, len(labels)), labels):
-    ax[i].set_ylabel(l)
+
+  if len(labels) < 4 :
+    ax = metrics.plot(legend=True, xlim=(0,30), sharex=True, grid = True,  sharey=True, markersize=10, figsize=fsizeFixed, color=colors, style=style)
+    ax.set_ylabel("Value")
+  else:
+    ax = metrics.plot(subplots=True, xlim=(0,30), legend=False, sharex=True, grid = True,  sharey=True, markersize=10, figsize=fsize, color=colors, style=style)
+    for (i, l) in zip(range(0, len(labels)), labels):
+      ax[i].set_ylabel(l)
 
   pyplot.xlabel("Segment number")
-  pyplot.savefig(prefix + "timeseries" + jobid + "-30.png")
+  pyplot.savefig(prefix + "timeseries" + jobid + "-30.pdf", bbox_inches='tight')
 
 ### end plotting function
 
@@ -65,6 +118,7 @@ def plot(prefix, header, row):
 with open('job-io-datasets/datasets/job_codings.csv') as csv_file:
     csv_reader = csv.reader(csv_file, delimiter=',')
     line_count = 0
+    job = 0
     for row in csv_reader:
       if line_count == 0:
         header = row
@@ -74,4 +128,5 @@ with open('job-io-datasets/datasets/job_codings.csv') as csv_file:
       if not row[0].strip() in jobs:
         continue
       else:
-        plot(prefix, header, row)
+        plot(prefix[job], header, row)
+        job += 1
diff --git a/scripts/plot.R b/scripts/plot.R
index fc79b76..086d029 100755
--- a/scripts/plot.R
+++ b/scripts/plot.R
@@ -19,10 +19,8 @@ data = read.csv(file)
 # Columns are: jobid alg_id alg_name similarity
 
 data$alg_id = as.factor(data$alg_id)
-print(nrow(data))
-
-# FILTER, TODO
-data = data %>% filter(similarity <= 1.0)
+cat("Job count:")
+cat(nrow(data))
 
 # empirical cummulative density function (ECDF)
 ggplot(data, aes(similarity, color=alg_name, group=alg_name)) + stat_ecdf(geom = "step") + xlab("SIM") + ylab("Fraction of jobs") + theme(legend.position="bottom") + scale_color_brewer(palette = "Set2")
@@ -34,7 +32,7 @@ print(summary(e))
 ggsave("ecdf-0.5.png")
 
 # histogram for the jobs
-ggplot(data, aes(similarity), group=alg_name) + geom_histogram(color="black", binwidth=0.025) + aes(fill = alg_name) + facet_grid(alg_name ~ ., switch = 'y') + scale_y_continuous(limits=c(0, 100), oob=squish)  +   scale_color_brewer(palette = "Set2") + ylab("Count (cropped at 100)")
+ggplot(data, aes(similarity), group=alg_name) + geom_histogram(color="black", binwidth=0.025) + aes(fill = alg_name) + facet_grid(alg_name ~ ., switch = 'y') + scale_y_continuous(limits=c(0, 100), oob=squish)  +   scale_color_brewer(palette = "Set2") + ylab("Count (cropped at 100)") + theme(legend.position = "none")
 ggsave("hist-sim.png")
 
 # load job information, i.e., the time series per job
@@ -51,13 +49,10 @@ plotJobs = function(jobs){
     md = metadata[metadata$jobid %in% jobs,]
     print(summary(md))
 
-    # print the job timeline
+    # print the job timelines
     r = e[ordered, ]
-    for (row in 1:length(jobs)) {
-      prefix = sprintf("%s-%f-%.0f-", level, r[row, "similarity"], row)
-      job = r[row, "jobid"]
-      system(sprintf("scripts/plot-single-job.py %s %s", job, prefix))
-    }
+    prefix = do.call("sprintf", list("%s-%.0f-", level, r$similarity))
+    system(sprintf("scripts/plot-single-job.py %s %s", paste(r$jobid, collapse=","), paste(prefix, collapse=",")))
   }
 
 # Store the job ids in a table, each column is one algorithm