diff --git a/paper/main.tex b/paper/main.tex index d819344..f11ff93 100644 --- a/paper/main.tex +++ b/paper/main.tex @@ -338,9 +338,9 @@ Thus, a user is likely from the same group and the number of groups is similar t \paragraph{Node distribution.} All algorithms reduce over the node dimensions, therefore, we naturally expect a big inclusion across node range -- as long as the average I/O behavior of the jobs are similar. -\Cref{fig:nodes-job} shows a boxplot for the node counts in the Top\,100. +\Cref{fig:nodes-job} shows a boxplot for the node counts in the Top\,100 -- the red line marks the reference job. For Job-M and Job-L, we can observe that indeed the range of similar nodes is between 1 and 128. -For Job-S, all 100 most similar jobs use one node. +For Job-S, all 100 top-ranked jobs use one node. As post-processing jobs use typically one node and the number of postprocessing jobs is a high proportion, it appears natural that all Top\,100 are from this class of jobs which is confirmed by investigating the job metadata. The boxplots have different shapes which is an indication, that the different algorithms identify a different set of jobs -- we will analyze this later further. @@ -448,10 +448,62 @@ From this analysis, we conclude that one representative from binary quantization \label{fig:heatmap-job} \end{figure} +%%%%%%%%%%% %%%%%%%%%%% %%%%%%%%%%% %%%%%%%%%%% %%%%%%%%%%% %%%%%%%%%%% %%%%%%%%%%% %%%%%%%%%%% + \section{Assessing Timelines for Similar Jobs} +To verify the suitability of the similarity metrics, for each algorithm, we investigated the timelines of all Top\,100 jobs. +We subjectively found that the approach works very well and identifies suitable similar jobs. +To demonstrate this, we include a selection of job timelines -- typically Rank\,2, Rank\,15, and Rank\,100 -- and selected interesting job profiles. + + \subsection{Job-S} +This job represents post-processing (CMORization) which is a typical step. +It is executed for different simulations and variables across timesteps. +The job name of Job-S suggests that is applied to the control variable. +In the metadata, we found 22,580 jobs with “cmor” in the name of which 367 jobs mention “control”. + +The bin algorithms identify one job which name doesn't include “cmor”, +All other algorithm identify only “cmor” jobs and 26-38 of these jobs are applied to “control” (see \Cref{tbl:control-jobs}). +A selection of job timelines is given in \Cref{fig:job-S-hex-lev}; all of these jobs are jobs on control variables. +The single non-cmor job and a high-ranked non-control cmor job is shown in \Cref{fig:job-S-bin-agg}. +While we cannot visually see much differences between these two jobs compared to the cmor job processing the control variables, the algorithms indicate that jobs processing the control variables must be more similar as they appear much more frequently in the Top\,100 jobs than in all jobs labeled with “cmor”. + +For Job-S, we found that all algorithms work similarly well and, therefore, omit further timelines. + +\begin{table} +\centering +\begin{tabular}{r|r} + Algorithm & Jobs \\ \hline + bin\_aggzeros & 38 \\ + bin\_all & 38 \\ + hex\_lev & 33 \\ + hex\_native & 26 \\ + hex\_phases & 33 +\end{tabular} + \caption{Job-S: number of jobs with “control” in their name in the Top-100} + \label{tbl:control-jobs} +\end{table} + +\begin{figure} +\centering +\begin{subfigure}{0.3\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_4296426-out/bin_aggzeros-0.6923--76timeseries4235560} +\caption{Non-cmor job: Rank\,76, SIM=0.69} +\end{subfigure} +\begin{subfigure}{0.3\textwidth} +\centering +\includegraphics[width=\textwidth]{job_similarities_4296426-out/bin_aggzeros-0.8077--4timeseries4483904} +\caption{Non-control job: Rank\,4, SIM=0.81} +\end{subfigure} + +\caption{Job-S: jobs with different job names when using bin\_aggzeros} +\label{fig:job-S-bin-agg} +\end{figure} + + \begin{figure} \begin{subfigure}{0.3\textwidth} \centering @@ -473,63 +525,52 @@ From this analysis, we conclude that one representative from binary quantization \label{fig:job-S-hex-lev} \end{figure} -\begin{figure} -\begin{subfigure}{0.3\textwidth} -\centering -\includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_native-0.9808--1timeseries4296288} -\caption{Rank 2, SIM=} -\end{subfigure} -\begin{subfigure}{0.3\textwidth} -\centering -\includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_native-0.9375--15timeseries4564296} -\caption{Rank 15, SIM=} -\end{subfigure} -\begin{subfigure}{0.3\textwidth} -\centering -\includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_native-0.8915--99timeseries4296785} -\caption{Rank\,100, SIM=} -\end{subfigure} - -\caption{Job-S with Hex-Native, selection of similar jobs} -\label{fig:job-S-hex-native} -\end{figure} - +% \begin{figure} +% \begin{subfigure}{0.3\textwidth} +% \centering +% \includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_native-0.9808--1timeseries4296288} +% \caption{Rank 2, SIM=} +% \end{subfigure} +% \begin{subfigure}{0.3\textwidth} +% \centering +% \includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_native-0.9375--15timeseries4564296} +% \caption{Rank 15, SIM=} +% \end{subfigure} +% \begin{subfigure}{0.3\textwidth} +% \centering +% \includegraphics[width=\textwidth]{job_similarities_4296426-out/hex_native-0.8915--99timeseries4296785} +% \caption{Rank\,100, SIM=} +% \end{subfigure} +% \caption{Job-S with Hex-Native, selection of similar jobs} +% \label{fig:job-S-hex-native} +% \end{figure} +% % \ContinuedFloat -Hex phases very similar to hex native. -Komischer JOB zu inspizieren: \verb|job_similarities_4296426-out/hex_phases-0.7429--93timeseries4237860| - - -Bin aggzeros works quite well here too. The jobs are a bit more diverse. - - - -\begin{figure} -\begin{subfigure}{0.3\textwidth} -\centering -\includegraphics[width=\textwidth]{job_similarities_4296426-out/bin_aggzeros-0.8462--1timeseries4296280} -\caption{Rank 2, SIM=} -\end{subfigure} -\begin{subfigure}{0.3\textwidth} -\centering -\includegraphics[width=\textwidth]{job_similarities_4296426-out/bin_aggzeros-0.7778--14timeseries4555405} -\caption{Rank 15, SIM=} -\end{subfigure} -\begin{subfigure}{0.3\textwidth} -\centering -\includegraphics[width=\textwidth]{job_similarities_4296426-out/bin_aggzeros-0.6923--99timeseries4687419} -\caption{Rank\,100, SIM=} -\end{subfigure} - -\caption{Job-S with bin\_aggzero, selection of similar jobs} -\label{fig:job-S-bin-aggzeros} -\end{figure} +% +% \begin{figure} +% \begin{subfigure}{0.3\textwidth} +% \centering +% \includegraphics[width=\textwidth]{job_similarities_4296426-out/bin_aggzeros-0.8462--1timeseries4296280} +% \caption{Rank 2, SIM=} +% \end{subfigure} +% \begin{subfigure}{0.3\textwidth} +% \centering +% \includegraphics[width=\textwidth]{job_similarities_4296426-out/bin_aggzeros-0.7778--14timeseries4555405} +% \caption{Rank 15, SIM=} +% \end{subfigure} +% \begin{subfigure}{0.3\textwidth} +% \centering +% \includegraphics[width=\textwidth]{job_similarities_4296426-out/bin_aggzeros-0.6923--99timeseries4687419} +% \caption{Rank\,100, SIM=} +% \end{subfigure} +% \caption{Job-S with bin\_aggzero, selection of similar jobs} +% \label{fig:job-S-bin-aggzeros} +% \end{figure} \subsection{Job-M} -Bin aggzero liefert Mist zurück. -\eb{Wegen Bug?} diff --git a/scripts/analyse-all.sh b/scripts/analyse-all.sh index 80d32a7..bcb6676 100755 --- a/scripts/analyse-all.sh +++ b/scripts/analyse-all.sh @@ -30,7 +30,7 @@ for I in job_similarities_*.csv ; do rm $OUT/* mv description.txt $OUT fi - mv *.png *.pdf $OUT + mv *.png *.pdf jobs-*.txt $OUT done # analyze peformance data diff --git a/scripts/plot.R b/scripts/plot.R index 8f8ecb1..6cddf1b 100755 --- a/scripts/plot.R +++ b/scripts/plot.R @@ -53,8 +53,10 @@ plotJobs = function(jobs){ if (plotjobs) { prefix = do.call("sprintf", list("%s-%.4f-", level, r$similarity)) - system(sprintf("scripts/plot-single-job.py %s %s", paste(r$jobid, collapse=","), paste(prefix, collapse=","))) + system(sprintf("./scripts/plot-single-job.py %s %s", paste(r$jobid, collapse=","), paste(prefix, collapse=","))) } + + system(sprintf("./scripts/extract-conf-data.sh %s > jobs-%s.txt", paste(r$jobid, collapse=" "), level)) } # Store the job ids in a table, each column is one algorithm