diff --git a/paper/main.tex b/paper/main.tex index aeec18d..c2069ab 100644 --- a/paper/main.tex +++ b/paper/main.tex @@ -239,12 +239,219 @@ Potentially, analyze how the rankings of different similarities look like. \end{subfigure} \centering \caption{Histogram for the number of jobs (bin width: 2.5\%, numbers are the actual job counts)} -\label{fig:ecdf} +\label{fig:hist} +\end{figure} + +\subsection{Quantitative Analysis of Selected Jobs} + +\begin{table} +\caption{User and Group Information} +\end{table} + +\begin{figure} +\begin{subfigure}{0.5\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_4296426-out/jobs-nodes} +\caption{Job-S} \label{fig:nodes-job-S} +\end{subfigure} + +\begin{subfigure}{0.5\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_5024292-out/jobs-nodes} +\caption{Job-M} \label{fig:nodes-job-M} +\end{subfigure} + +\begin{subfigure}{0.5\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_7488914-out/jobs-nodes} +\caption{Job-L} \label{fig:nodes-job-L} +\end{subfigure} +\centering +\caption{Distribution of node counts} +\label{fig:nodes-job} +\end{figure} + + +\begin{figure} +\begin{subfigure}{0.5\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_4296426-out/jobs-elapsed} +\caption{Job-S} \label{fig:runtime-job-S} +\end{subfigure} + +\begin{subfigure}{0.5\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_5024292-out/jobs-elapsed} +\caption{Job-M} \label{fig:runtime-job-M} +\end{subfigure} + +\begin{subfigure}{0.5\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_7488914-out/jobs-elapsed} +\caption{Job-L} \label{fig:runtime-job-L} +\end{subfigure} +\centering +\caption{Distribution of elapsed runtime} +\label{fig:runtime-job} +\end{figure} + +Different algorithms ... + + +\begin{figure} +\begin{subfigure}{0.5\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_4296426-out/intersection-heatmap} +\caption{Job-S} \label{fig:heatmap-job-S} +\end{subfigure} + +\begin{subfigure}{0.5\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_5024292-out/intersection-heatmap} +\caption{Job-M} \label{fig:heatmap-job-M} +\end{subfigure} + +\begin{subfigure}{0.5\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_7488914-out/intersection-heatmap} +\caption{Job-L} \label{fig:heatmap-job-L} +\end{subfigure} +\centering +\caption{Intersection of the top 100 jobs for the different algorithms} +\label{fig:heatmap-job} +\end{figure} + +\section{Assessing Timelines for Similar Jobs} + +\subsection{Job-S} + + +\begin{figure} +\begin{subfigure}{0.3\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_lev-0.9615-timeseries4297102} +\caption{Rank 2, SIM=0.9615} +\end{subfigure} +\begin{subfigure}{0.3\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_lev-0.9017-timeseries4570701} +\caption{Rank 15, SIM=0.9017} +\end{subfigure} +\begin{subfigure}{0.3\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_lev-0.7901-timeseries4693267} +\caption{Rank\,100, SIM=0.790} +\end{subfigure} + +\caption{Job-S with Hex-Lev, selection of similar jobs} +\label{fig:job-S-hex-lev} +\end{figure} + + +\begin{figure} +\begin{subfigure}{0.3\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_native-0.9808-timeseries4567314} +\caption{Rank 2, SIM=} +\end{subfigure} +\begin{subfigure}{0.3\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_native-0.9375-timeseries4709700} +\caption{Rank 15, SIM=} +\end{subfigure} +\begin{subfigure}{0.3\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_native-0.9001-timeseries4527630} +\caption{Rank\,100, SIM=} +\end{subfigure} + +\caption{Job-S with Hex-Native, selection of similar jobs} +\label{fig:job-S-hex-native} \end{figure} -\section{Summary and Conclusion} +\begin{figure} +\begin{subfigure}{0.3\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_phases-0.9153-timeseries4567314} +\caption{Rank 2, $SIM=$ (same job as hex native Top1)} +\end{subfigure} +\begin{subfigure}{0.3\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_native-0.9268-timeseries4557849} +\caption{Rank 15, $SIM=$} +\end{subfigure} +\begin{subfigure}{0.3\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_phases-0.7382-timeseries4693267} +\caption{Rank\,100, $SIM=$ } +\end{subfigure} + +\caption{Job-S with Hex-Phases, selection of similar jobs} +\label{fig:job-S-hex-phases} +\end{figure} + +% \ContinuedFloat + +Bin aggzeros works quite well here too. + + +\subsection{Job-M} + + + +\begin{figure} +\begin{subfigure}{0.3\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_5024292-out/bin_aggzeros-0.7755-timeseries7907734} +\caption{Rank 2, $SIM=$} +\end{subfigure} +\begin{subfigure}{0.3\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_5024292-out/bin_aggzeros-0.7347-timeseries4244400} +\caption{$SIM=$} +\end{subfigure} +\begin{subfigure}{0.3\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_5024292-out/bin_aggzeros-0.5306-timeseries8038026} +\caption{$SIM=$ } +\end{subfigure} + +\caption{Job-M with Bin-Aggzero, selection of similar jobs} +\label{fig:job-M-bin-aggzero} +\end{figure} + + + +\begin{figure} +\begin{subfigure}{0.3\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_5024292-out/bin_aggzeros-0.7755-timeseries7907734} +\caption{Rank 2, $SIM=$} +\end{subfigure} +\begin{subfigure}{0.3\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_5024292-out/bin_aggzeros-0.7347-timeseries4244400} +\caption{$SIM=$} +\end{subfigure} +\begin{subfigure}{0.3\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_5024292-out/bin_aggzeros-0.5306-timeseries8038026} +\caption{$SIM=$ } +\end{subfigure} + +\caption{Job-M with Bin-Aggzero, selection of similar jobs} +\label{fig:job-M-bin-aggzero} +\end{figure} + + +\subsection{Job-L} + + + + +\section{Conclusion} \label{sec:summary} %\printbibliography diff --git a/scripts/analyse-all.sh b/scripts/analyse-all.sh index 676d02f..1ff9a7e 100755 --- a/scripts/analyse-all.sh +++ b/scripts/analyse-all.sh @@ -1,7 +1,23 @@ #!/bin/bash # call me from the parent directory + +echo "This script performs the complete analysis steps" + +function prepare(){ + pushd datasets + ./decompress.sh + popd + + for I in datasets/*.csv ; do + ln -s $I + done +} + +# prepare + for I in job_similarities_*.csv ; do + rm *.png *.pdf ./scripts/plot.R $I > description.txt OUT=${I%%.csv}-out mkdir $OUT diff --git a/scripts/plot-single-job.py b/scripts/plot-single-job.py index 552400a..8849d7c 100755 --- a/scripts/plot-single-job.py +++ b/scripts/plot-single-job.py @@ -12,7 +12,9 @@ prefix = sys.argv[2].split(",") fileformat = ".png" -print("Plotting the job: " + str(jobs)) +print("Plotting the job: " + str(sys.argv[1])) +print("Plotting with prefix: " + str(sys.argv[2])) + # Color map colorMap = { "md_file_create": cm.tab10(0), @@ -120,15 +122,14 @@ def plot(prefix, header, row): with open('job-io-datasets/datasets/job_codings.csv') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') line_count = 0 - job = 0 for row in csv_reader: if line_count == 0: header = row line_count += 1 continue - - if not row[0].strip() in jobs: + job = row[0].strip() + if not job in jobs: continue else: - plot(prefix[job], header, row) - job += 1 + index = jobs.index(job) + plot(prefix[index] + "-" + str(index), header, row) diff --git a/scripts/plot.R b/scripts/plot.R index 6d63b30..97451c2 100755 --- a/scripts/plot.R +++ b/scripts/plot.R @@ -4,7 +4,7 @@ library(ggplot2) library(dplyr) require(scales) -plotjobs = FALSE +plotjobs = TRUE # Color scheme plotcolors <- c("#CC0000", "#FFA500", "#FFFF00", "#008000", "#9999ff", "#000066") @@ -53,7 +53,7 @@ plotJobs = function(jobs){ r = e[ordered, ] if (plotjobs) { - prefix = do.call("sprintf", list("%s-%.0f-", level, r$similarity)) + prefix = do.call("sprintf", list("%s-%.4f-", level, r$similarity)) system(sprintf("scripts/plot-single-job.py %s %s", paste(r$jobid, collapse=","), paste(prefix, collapse=","))) } } @@ -96,7 +96,7 @@ for (l1 in levels(data$alg_name)){ print(res.intersect) # Plot heatmap about intersection -ggplot(tbl.intersect, aes(first, second, fill=intersect)) + geom_tile() + geom_text(aes(label = round(intersect, 1))) + scale_fill_gradientn(colours = rev(plotcolors)) +ggplot(tbl.intersect, aes(first, second, fill=intersect)) + geom_tile() + geom_text(aes(label = round(intersect, 1))) + scale_fill_gradientn(colours = rev(plotcolors)) + xlab("") + ylab("") ggsave("intersection-heatmap.png", width=6, height=5) # Collect the metadata of all jobs in a new table @@ -105,11 +105,11 @@ for (alg_name in levels(data$alg_name)){ res.jobs = rbind(res.jobs, cbind(alg_name, metadata[metadata$jobid %in% result[, alg_name],])) } -ggplot(res.jobs, aes(alg_name, total_nodes, fill=alg_name)) + geom_boxplot() + scale_y_continuous(trans = log2_trans(), breaks = trans_breaks("log2", function(x) 2^x), labels = trans_format("log2", math_format(2^.x))) -ggsave("jobs-nodes.png") +ggplot(res.jobs, aes(alg_name, total_nodes, fill=alg_name)) + geom_boxplot() + scale_y_continuous(trans = log2_trans(), breaks = trans_breaks("log2", function(x) 2^x), labels = trans_format("log2", math_format(2^.x))) + theme(legend.position = "none") +ggsave("jobs-nodes.png", width=6, height=4) -ggplot(res.jobs, aes(alg_name, elapsed, fill=alg_name)) + geom_boxplot() + scale_y_continuous(trans = log2_trans(), breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) + ylab("Runtime in s") + xlab("Algorithm") -ggsave("jobs-elapsed.png") +ggplot(res.jobs, aes(alg_name, elapsed, fill=alg_name)) + geom_boxplot() + scale_y_continuous(trans = log2_trans(), breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) + ylab("Runtime in s") + xlab("Algorithm") + theme(legend.position = "none") +ggsave("jobs-elapsed.png", width=6, height=4)