Merge branch 'master' of http://git.hps.vi4io.org/eugen.betke/mistral-io-datasets
This commit is contained in:
		
						commit
						7f19de642e
					
				
							
								
								
									
										1
									
								
								paper/bibliography.bib
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								paper/bibliography.bib
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1 @@ | ||||
| 
 | ||||
							
								
								
									
										1208
									
								
								paper/llncs.cls
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1208
									
								
								paper/llncs.cls
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										11
									
								
								paper/main-blx.bib
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								paper/main-blx.bib
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,11 @@ | ||||
| @Comment{$ biblatex control file $} | ||||
| @Comment{$ biblatex bcf format version 3.7 $} | ||||
| % Do not modify this file! | ||||
| % | ||||
| % This is an auxiliary file used by the 'biblatex' package. | ||||
| % This file may safely be deleted. It will be recreated as | ||||
| % required. | ||||
| 
 | ||||
| @Control{biblatex-control, | ||||
|   options = {3.7:0:0:1:0:1:1:0:0:0:0:1:3:1:3:1:0:0:3:1:79:+:+:nty}, | ||||
| } | ||||
							
								
								
									
										158
									
								
								paper/main.tex
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										158
									
								
								paper/main.tex
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,158 @@ | ||||
| \let\accentvec\vec | ||||
| \documentclass[]{llncs} | ||||
| 
 | ||||
| \usepackage{todonotes} | ||||
| \newcommand{\eb}[1]{\todo[inline]{(EB): #1}} | ||||
| \newcommand{\jk}[1]{\todo[inline]{JK: #1}} | ||||
| 
 | ||||
| \usepackage{silence} | ||||
| \WarningFilter{biblatex}{Using} | ||||
| \WarningFilter{latex}{Float too large} | ||||
| \WarningFilter{caption}{Unsupported} | ||||
| \WarningFilter{caption}{Unknown document} | ||||
| 
 | ||||
| \let\spvec\vec | ||||
| \let\vec\accentvec | ||||
| \usepackage{amsmath} | ||||
| \let\vec\spvec | ||||
| 
 | ||||
| \usepackage{array} | ||||
| \usepackage{xcolor} | ||||
| \usepackage{color} | ||||
| \usepackage{colortbl} | ||||
| \usepackage{subcaption} | ||||
| \usepackage{hyperref} | ||||
| \usepackage{listings} | ||||
| \usepackage{lstautogobble} | ||||
| \usepackage[listings,skins,breakable,raster,most]{tcolorbox} | ||||
| \usepackage{caption} | ||||
| 
 | ||||
| 
 | ||||
| \lstset{ | ||||
| 	numberbychapter=false, | ||||
| 	belowskip=-10pt, | ||||
| 	aboveskip=-10pt, | ||||
| } | ||||
| 
 | ||||
| \lstdefinestyle{lstcodebox} { | ||||
| 	basicstyle=\scriptsize\ttfamily, | ||||
| 	autogobble=true, | ||||
| 	tabsize=2, | ||||
| 	captionpos=b, | ||||
| 	float, | ||||
| } | ||||
| 
 | ||||
| \usepackage{graphicx} | ||||
| \graphicspath{ | ||||
| 	{./pictures/} | ||||
| } | ||||
| 
 | ||||
| \usepackage[backend=bibtex, style=numeric]{biblatex} | ||||
| \addbibresource{bibliography.bib} | ||||
| 
 | ||||
| 
 | ||||
| \usepackage{enumitem} | ||||
| \setitemize{noitemsep,topsep=0pt,parsep=0pt,partopsep=0pt} | ||||
| 
 | ||||
| \definecolor{darkgreen}{rgb}{0,0.5,0} | ||||
| \definecolor{darkyellow}{rgb}{0.7,0.7,0} | ||||
| 
 | ||||
| 
 | ||||
| \usepackage{cleveref} | ||||
| \crefname{codecount}{Code}{Codes} | ||||
| 
 | ||||
| \title{Using Machine Learning to Identify Similar Jobs Based on their IO Behavior} | ||||
| \author{Julian Kunkel\inst{2} \and Eugen Betke\inst{1}} | ||||
| 
 | ||||
| \institute{ | ||||
| University of Reading--% | ||||
| \email{j.m.kunkel@reading.ac.uk}% | ||||
| \and | ||||
| DKRZ -- | ||||
| \email{betke@dkrz.de}% | ||||
| } | ||||
| \begin{document} | ||||
| \maketitle | ||||
| 
 | ||||
| \begin{abstract} | ||||
| 
 | ||||
| Support staff. | ||||
| Problem, a particular job found that isn't performing well. | ||||
| Now how can we find similar jobs? | ||||
| 
 | ||||
| Problem with definition of similarity. | ||||
| 
 | ||||
| In this paper, a methodology and algorithms to identify similar jobs based on profiles and time series are  illustrated. | ||||
| Similar to a study. | ||||
| 
 | ||||
| Research questions: is this effective to find similar jobs? | ||||
| 
 | ||||
| The contribution of this paper... | ||||
| \end{abstract} | ||||
| 
 | ||||
| \section{Introduction} | ||||
| 
 | ||||
| %This paper is structured as follows. | ||||
| %We start with the related work in \Cref{sec:relwork}. | ||||
| %Then, in TODO we introduce the DKRZ monitoring systems and explain how I/O metrics are captured by the collectors. | ||||
| %In \Cref{sec:methodology} we describe the data reduction and the machine learning approaches and do an experiment in \Cref{sec:data,sec:evaluation}. | ||||
| %Finally, we finalize our paper with a summary in \Cref{sec:summary}. | ||||
| 
 | ||||
| \section{Related Work} | ||||
| \label{sec:relwork} | ||||
| 
 | ||||
| \section{Methodology} | ||||
| \label{sec:methodology} | ||||
| 
 | ||||
| Given: the reference job ID. | ||||
| Create from 4D time series data (number of nodes, per file systems, 9 metrics, time) a feature set. | ||||
| 
 | ||||
| Adapt the algorithms: | ||||
| \begin{itemize} | ||||
| 	\item iterate for all jobs | ||||
| 		\begin{itemize} | ||||
| 			\item compute distance to reference job | ||||
| 		\end{itemize} | ||||
| 	\item sort the jobs based on the distance to ref job | ||||
| 	\item create cumulative job distribution based on distance for visualization, allow users to output jobs with a given distance | ||||
| \end{itemize} | ||||
| 
 | ||||
| A user might be interested to explore say closest 10 or 50 jobs. | ||||
| 
 | ||||
| Algorithms: | ||||
| Profile algorithm: job-profiles (job-duration, job-metrics, combine both) | ||||
| $\rightarrow$ just compute geom-mean distance between profile | ||||
| 
 | ||||
| Check time series algorithms: | ||||
| 
 | ||||
| \begin{itemize} | ||||
| 	\item bin | ||||
| 	\item hex\_native/hex\_lev | ||||
| 	\item pm\_quant | ||||
| \end{itemize} | ||||
| 
 | ||||
| \section{Evaluation} | ||||
| \label{sec:evaluation} | ||||
| 
 | ||||
| Two study examples (two reference jobs): | ||||
| \begin{itemize} | ||||
| 	\item jobA: shorter length, e.g. 5-10, that has a little bit IO in at least two metadata metrics (more better). | ||||
| 	\item jobB: a very IO intensive longer job, e.g., length $>$ 20, with IO read or write and maybe one other metrics. | ||||
| \end{itemize} | ||||
| 
 | ||||
| For each reference job: create CSV file which contains all jobs with: | ||||
| \begin{itemize} | ||||
| 	\item JOB ID, for each algorithm: the coding and the computed ranking $\rightarrow$ thus one long row. | ||||
| \end{itemize} | ||||
| Alternatively, could be one CSV for each algorithm that contains JOB ID, coding + rank | ||||
| 
 | ||||
| Create histograms + cumulative job distribution for all algorithms. | ||||
| Insert job profiles for closest 10 jobs. | ||||
| 
 | ||||
| Potentially, analyze how the rankings of different similarities look like. | ||||
| 
 | ||||
| \section{Summary and Conclusion} | ||||
| \label{sec:summary} | ||||
| 
 | ||||
| %\printbibliography | ||||
| \end{document} | ||||
| @ -1,6 +1,9 @@ | ||||
| #!/bin/bash | ||||
| 
 | ||||
| # call me from parent directory | ||||
| 
 | ||||
| for I in job_similarities_*.csv ; do | ||||
|   ./plot.R $I > description.txt | ||||
|   ./scripts/plot.R $I > description.txt | ||||
|   mkdir $I.out | ||||
|   rm $I.out/* | ||||
|   mv *.png *.pdf description.txt $I.out | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user