Bugfix plotting of jobs. More details in paper.

This commit is contained in:
Julian M. Kunkel 2020-08-20 16:16:46 +01:00
parent c2e3353420
commit 8364582785
4 changed files with 239 additions and 15 deletions

View File

@ -239,12 +239,219 @@ Potentially, analyze how the rankings of different similarities look like.
\end{subfigure} \end{subfigure}
\centering \centering
\caption{Histogram for the number of jobs (bin width: 2.5\%, numbers are the actual job counts)} \caption{Histogram for the number of jobs (bin width: 2.5\%, numbers are the actual job counts)}
\label{fig:ecdf} \label{fig:hist}
\end{figure}
\subsection{Quantitative Analysis of Selected Jobs}
\begin{table}
\caption{User and Group Information}
\end{table}
\begin{figure}
\begin{subfigure}{0.5\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_4296426-out/jobs-nodes}
\caption{Job-S} \label{fig:nodes-job-S}
\end{subfigure}
\begin{subfigure}{0.5\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_5024292-out/jobs-nodes}
\caption{Job-M} \label{fig:nodes-job-M}
\end{subfigure}
\begin{subfigure}{0.5\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_7488914-out/jobs-nodes}
\caption{Job-L} \label{fig:nodes-job-L}
\end{subfigure}
\centering
\caption{Distribution of node counts}
\label{fig:nodes-job}
\end{figure}
\begin{figure}
\begin{subfigure}{0.5\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_4296426-out/jobs-elapsed}
\caption{Job-S} \label{fig:runtime-job-S}
\end{subfigure}
\begin{subfigure}{0.5\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_5024292-out/jobs-elapsed}
\caption{Job-M} \label{fig:runtime-job-M}
\end{subfigure}
\begin{subfigure}{0.5\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_7488914-out/jobs-elapsed}
\caption{Job-L} \label{fig:runtime-job-L}
\end{subfigure}
\centering
\caption{Distribution of elapsed runtime}
\label{fig:runtime-job}
\end{figure}
Different algorithms ...
\begin{figure}
\begin{subfigure}{0.5\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_4296426-out/intersection-heatmap}
\caption{Job-S} \label{fig:heatmap-job-S}
\end{subfigure}
\begin{subfigure}{0.5\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_5024292-out/intersection-heatmap}
\caption{Job-M} \label{fig:heatmap-job-M}
\end{subfigure}
\begin{subfigure}{0.5\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_7488914-out/intersection-heatmap}
\caption{Job-L} \label{fig:heatmap-job-L}
\end{subfigure}
\centering
\caption{Intersection of the top 100 jobs for the different algorithms}
\label{fig:heatmap-job}
\end{figure}
\section{Assessing Timelines for Similar Jobs}
\subsection{Job-S}
\begin{figure}
\begin{subfigure}{0.3\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_lev-0.9615-timeseries4297102}
\caption{Rank 2, SIM=0.9615}
\end{subfigure}
\begin{subfigure}{0.3\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_lev-0.9017-timeseries4570701}
\caption{Rank 15, SIM=0.9017}
\end{subfigure}
\begin{subfigure}{0.3\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_lev-0.7901-timeseries4693267}
\caption{Rank\,100, SIM=0.790}
\end{subfigure}
\caption{Job-S with Hex-Lev, selection of similar jobs}
\label{fig:job-S-hex-lev}
\end{figure}
\begin{figure}
\begin{subfigure}{0.3\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_native-0.9808-timeseries4567314}
\caption{Rank 2, SIM=}
\end{subfigure}
\begin{subfigure}{0.3\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_native-0.9375-timeseries4709700}
\caption{Rank 15, SIM=}
\end{subfigure}
\begin{subfigure}{0.3\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_native-0.9001-timeseries4527630}
\caption{Rank\,100, SIM=}
\end{subfigure}
\caption{Job-S with Hex-Native, selection of similar jobs}
\label{fig:job-S-hex-native}
\end{figure} \end{figure}
\section{Summary and Conclusion} \begin{figure}
\begin{subfigure}{0.3\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_phases-0.9153-timeseries4567314}
\caption{Rank 2, $SIM=$ (same job as hex native Top1)}
\end{subfigure}
\begin{subfigure}{0.3\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_native-0.9268-timeseries4557849}
\caption{Rank 15, $SIM=$}
\end{subfigure}
\begin{subfigure}{0.3\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_phases-0.7382-timeseries4693267}
\caption{Rank\,100, $SIM=$ }
\end{subfigure}
\caption{Job-S with Hex-Phases, selection of similar jobs}
\label{fig:job-S-hex-phases}
\end{figure}
% \ContinuedFloat
Bin aggzeros works quite well here too.
\subsection{Job-M}
\begin{figure}
\begin{subfigure}{0.3\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_5024292-out/bin_aggzeros-0.7755-timeseries7907734}
\caption{Rank 2, $SIM=$}
\end{subfigure}
\begin{subfigure}{0.3\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_5024292-out/bin_aggzeros-0.7347-timeseries4244400}
\caption{$SIM=$}
\end{subfigure}
\begin{subfigure}{0.3\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_5024292-out/bin_aggzeros-0.5306-timeseries8038026}
\caption{$SIM=$ }
\end{subfigure}
\caption{Job-M with Bin-Aggzero, selection of similar jobs}
\label{fig:job-M-bin-aggzero}
\end{figure}
\begin{figure}
\begin{subfigure}{0.3\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_5024292-out/bin_aggzeros-0.7755-timeseries7907734}
\caption{Rank 2, $SIM=$}
\end{subfigure}
\begin{subfigure}{0.3\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_5024292-out/bin_aggzeros-0.7347-timeseries4244400}
\caption{$SIM=$}
\end{subfigure}
\begin{subfigure}{0.3\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_5024292-out/bin_aggzeros-0.5306-timeseries8038026}
\caption{$SIM=$ }
\end{subfigure}
\caption{Job-M with Bin-Aggzero, selection of similar jobs}
\label{fig:job-M-bin-aggzero}
\end{figure}
\subsection{Job-L}
\section{Conclusion}
\label{sec:summary} \label{sec:summary}
%\printbibliography %\printbibliography

View File

@ -1,7 +1,23 @@
#!/bin/bash #!/bin/bash
# call me from the parent directory # call me from the parent directory
echo "This script performs the complete analysis steps"
function prepare(){
pushd datasets
./decompress.sh
popd
for I in datasets/*.csv ; do
ln -s $I
done
}
# prepare
for I in job_similarities_*.csv ; do for I in job_similarities_*.csv ; do
rm *.png *.pdf
./scripts/plot.R $I > description.txt ./scripts/plot.R $I > description.txt
OUT=${I%%.csv}-out OUT=${I%%.csv}-out
mkdir $OUT mkdir $OUT

View File

@ -12,7 +12,9 @@ prefix = sys.argv[2].split(",")
fileformat = ".png" fileformat = ".png"
print("Plotting the job: " + str(jobs)) print("Plotting the job: " + str(sys.argv[1]))
print("Plotting with prefix: " + str(sys.argv[2]))
# Color map # Color map
colorMap = { "md_file_create": cm.tab10(0), colorMap = { "md_file_create": cm.tab10(0),
@ -120,15 +122,14 @@ def plot(prefix, header, row):
with open('job-io-datasets/datasets/job_codings.csv') as csv_file: with open('job-io-datasets/datasets/job_codings.csv') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',') csv_reader = csv.reader(csv_file, delimiter=',')
line_count = 0 line_count = 0
job = 0
for row in csv_reader: for row in csv_reader:
if line_count == 0: if line_count == 0:
header = row header = row
line_count += 1 line_count += 1
continue continue
job = row[0].strip()
if not row[0].strip() in jobs: if not job in jobs:
continue continue
else: else:
plot(prefix[job], header, row) index = jobs.index(job)
job += 1 plot(prefix[index] + "-" + str(index), header, row)

View File

@ -4,7 +4,7 @@ library(ggplot2)
library(dplyr) library(dplyr)
require(scales) require(scales)
plotjobs = FALSE plotjobs = TRUE
# Color scheme # Color scheme
plotcolors <- c("#CC0000", "#FFA500", "#FFFF00", "#008000", "#9999ff", "#000066") plotcolors <- c("#CC0000", "#FFA500", "#FFFF00", "#008000", "#9999ff", "#000066")
@ -53,7 +53,7 @@ plotJobs = function(jobs){
r = e[ordered, ] r = e[ordered, ]
if (plotjobs) { if (plotjobs) {
prefix = do.call("sprintf", list("%s-%.0f-", level, r$similarity)) prefix = do.call("sprintf", list("%s-%.4f-", level, r$similarity))
system(sprintf("scripts/plot-single-job.py %s %s", paste(r$jobid, collapse=","), paste(prefix, collapse=","))) system(sprintf("scripts/plot-single-job.py %s %s", paste(r$jobid, collapse=","), paste(prefix, collapse=",")))
} }
} }
@ -96,7 +96,7 @@ for (l1 in levels(data$alg_name)){
print(res.intersect) print(res.intersect)
# Plot heatmap about intersection # Plot heatmap about intersection
ggplot(tbl.intersect, aes(first, second, fill=intersect)) + geom_tile() + geom_text(aes(label = round(intersect, 1))) + scale_fill_gradientn(colours = rev(plotcolors)) ggplot(tbl.intersect, aes(first, second, fill=intersect)) + geom_tile() + geom_text(aes(label = round(intersect, 1))) + scale_fill_gradientn(colours = rev(plotcolors)) + xlab("") + ylab("")
ggsave("intersection-heatmap.png", width=6, height=5) ggsave("intersection-heatmap.png", width=6, height=5)
# Collect the metadata of all jobs in a new table # Collect the metadata of all jobs in a new table
@ -105,11 +105,11 @@ for (alg_name in levels(data$alg_name)){
res.jobs = rbind(res.jobs, cbind(alg_name, metadata[metadata$jobid %in% result[, alg_name],])) res.jobs = rbind(res.jobs, cbind(alg_name, metadata[metadata$jobid %in% result[, alg_name],]))
} }
ggplot(res.jobs, aes(alg_name, total_nodes, fill=alg_name)) + geom_boxplot() + scale_y_continuous(trans = log2_trans(), breaks = trans_breaks("log2", function(x) 2^x), labels = trans_format("log2", math_format(2^.x))) ggplot(res.jobs, aes(alg_name, total_nodes, fill=alg_name)) + geom_boxplot() + scale_y_continuous(trans = log2_trans(), breaks = trans_breaks("log2", function(x) 2^x), labels = trans_format("log2", math_format(2^.x))) + theme(legend.position = "none")
ggsave("jobs-nodes.png") ggsave("jobs-nodes.png", width=6, height=4)
ggplot(res.jobs, aes(alg_name, elapsed, fill=alg_name)) + geom_boxplot() + scale_y_continuous(trans = log2_trans(), breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) + ylab("Runtime in s") + xlab("Algorithm") ggplot(res.jobs, aes(alg_name, elapsed, fill=alg_name)) + geom_boxplot() + scale_y_continuous(trans = log2_trans(), breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) + ylab("Runtime in s") + xlab("Algorithm") + theme(legend.position = "none")
ggsave("jobs-elapsed.png") ggsave("jobs-elapsed.png", width=6, height=4)