From 7dc7328e34ababc39c3083e3bcf235b60e3f6014 Mon Sep 17 00:00:00 2001
From: "Julian M. Kunkel" <juliankunkel@googlemail.com>
Date: Fri, 21 Aug 2020 19:12:33 +0100
Subject: [PATCH] Nai

---
 fig/job-timeseries4296426.pdf | Bin 16479 -> 16457 bytes
 paper/main.tex                | 142 +++++++++++++++++++++-------------
 scripts/analyse-all.sh        |   9 ++-
 scripts/plot-single-job.py    |  16 ++--
 scripts/plot.R                |  19 ++---
 5 files changed, 115 insertions(+), 71 deletions(-)
diff --git a/fig/job-timeseries4296426.pdf b/fig/job-timeseries4296426.pdf
index 1fa9c7105acf71dca5edf41c58d95241fc9a0c49..a43a026385a81859f765cf96f67fbec9138219f1 100644
GIT binary patch
delta 4892
zcmZu!2UJtb+O@(3X@VLNFn}T|LK4yigQ6nc0wOg^?^2|N;?Wx+LIjjZ6$AyPNCyQZ
zkxLC#yb%IYB1EM377&mh%e(je<ejz7TJ!DMGjqPZzS%S9^PQ~?cedv1Z52Xd5K0Ir
z6pDtSQE(*1Y^yKOQ@2-xv!+n0yYKnvnz5<7mGtR-K09o7?+8&0x0P^`h>706%qg?E
zSoAK^Vj#Z>t|ud2DcoW?XVmUmIC{{Bl3ETHejCt{IeB53Q!)weRr|W`rqUZEv;I!y
z=<<l4`=<q^(LJ=)rGvn{*7vN(VH%}uaD|%WI~D4xZq@2>pghO}gK1A+>3uL&G$S)8
z;~z#V>zfLtLs#3o9>qfM(+6*lIE7~ndk&coTMS)wOA<cwX}<-hKX)a7W5Ah(!@U?@
zz@sxdvftX)d@$+i3o;5x#IOb}BmC{#oGCD?CupJe)nrQ^`D~W&%ydb0Y%gSJ@bN&9
z)6GVzjZ-I3C{qZdsSJ%yJ3Uy%5Y<k*F?k@VS&O>~W78!ytW#U-?m5XnDmijPXS`SJ
zhIg)yD8YH#n;#wePrf<wQwcnsA_40=PCI9!yBw1SZ+)xjwm<!ctF@c+qw5Xoi{1V0
zW0PN#z0<F9zLgV0X&rWc&rax^JYeswTo$mlIL!+z&?grndz0}~Pn01;vrC00jr?x<
z;vpfAFAtbDdgaliD1)wf4?>wooxj8(PT7VpUS_0%Oi!j*fQ!P@?+r;7?t&SArr{!j
zYed-PVAkiLwZZCMG<fRZh}8bVX8~%toz?4KA3TH(an49goaMJWYT*x>TtBxgb38e!
z051gG&l_~xeS|THTe|U@a_*bzUfj+KwVJk9{O_L$@6CRl%yMAo7w_iSIU&1n!le8U
z8R;5#U#h+~$+(<pcqN#9{A$KrG*j1o<5iAkW;3Fh4q8XsUTQlt>DtboH`+}pR`Z3(
z1Yp<ew~Pcz>R2HX#ArM49&Q+|pBF>%`T`Uz_8Yo5s|HxMR(CxmT^Fi$L?yaKZ*SAE
z*5eFz(pF!M;?@cNnK5_kulw}euyE6xKDHxuR{m4<kRV6`Uw=;IY*@o^3Mlf`3s6uf
znVGNobl-`E6~U}rGX1=Tq)(Z?Dj`zT`E|IK7s<b`_vpt_{BxOvU3hKLXogEI?|Z=D
zLH6DT-YHo!gnc=Xusf-$LEQf7+pvCE`%0%#&53h9mGh&C+2kw^k2NVe;q83|zp-I-
z4J&B%67~|f;JJg;dPrw^B-lxO^bix1^X0Qh0B7c`cWwGnLyH#s_38o+SPil7JfYAK
zJ+~alo}hs5De=%BT}$f_lq-+&!~^Fu>pRjoZ^XqEJR9XE?8OzRSnK@<lr<h*OX{%Q
zeLzm=Nzj$$lS5LoNsqnc4lDYf@@19yPEExma=Oqr^au-m2eHSV)E?s@UScJ&Vv8rs
zx)yH_`&+dt#;Jl}eo9vJ=Il~!S>9GTp_e+6<{)gBl81THL*k4m?~t4jlMG1GVllb=
z^_4-P#5GagbvYrP2Z?sp12-ybDv6;WF&`MGu0i6(knM3QUZ3@vm@oU60z~bE$y8Zw
zT;Ut}nGZUibJTu9bz=x9dQ2=f&(!}iapWfA<y-JU(Aq<)H^s4VB*yras|&*7Yk-`+
zyHC;LuJmIyg!tg32o_~T6v&OA+@nugov+;`n8j+S>#~(8%s%!#F{kMYhlhA!ss<<P
zCiOk0vs`{lt(ZzE6}i>SYU=40#KrN$v&<bXp|pT)mrji!cCu8ryWFP%+ftB!XAMwa
z&zS_VC*acecmh)Eo!`AmlhwU!<RF%6vC!N7viTd%9_9hpG3jqPRt<c3s2oOBqB7@C
znknLLFrwcnB<wBbyPQB458^B>j(ur$10UlE2T~<kXvYQINTF;fdv`=hwqqWpOQ}Ix
z_sGP~2VFX)4iL)^_`&>)-H<MY3(M1Woio}y9UybhX*Fj8j^T3UYRHSqkgg`G1H@y4
z2Mrec70f*cXwBqBQF=VZ?{ECz`G9!hqA1B=|MJM8K1I6q+Mzzj6-(FFEP;z_np$*s
zPqCr_wH23)&X3mjax%N+QM2o>1QnI3Et#YI1am{pn0`g&#(>8^<fd4TnS-O=I2mo<
zk{()XHxkQ--5+IHwO9OURYTYA9s>|cHN#)NdlX1`QLjH5bL))~f4`Z)L8klYp;>0X
z+Yen+;!^Zw-SqzYDSp<0(SHO_p|U^j@tYIKms$MIcZaE-d^SSA)qWwgr1g%-+c>EU
z_(zsw<CoAcD?^gZDE$)Yx?uz`E#3lmv=nP8vVS(%<Jx&+pJ8iVX1R18Q1Skr+|{56
zlO{qI^=bTcE4ZR!Wm+@yx^F+?cHq7%`9y@@p)f^MA01o=i9I>eld#JSV)xSf&0X{T
z(6`WjcGbPg;P~nU4Qgq=z{^Nf5=+J1a<Dcyl(@?v&|LdI2xNyhk0W?kviqu<EqUaB
z1sjBT5a>vRm^?7Uu4*Rlf<u&a4Md(+Eq(ht7%deEX0@o6j(Zz-zZ$K4A&e^ZV~Cay
zC~PRdG`gWYVfu&i(SnfTM3wi$<1_=Gk*hioCy{~hr&l`~zdSdJn!G7$$V#m-b+vvX
z{p7Hi@gMk>cea9w_7YeY5tUG}FTD6UT`o?1QrjP3Um=kUC#=mfi$ppPx6^NBDINb}
z_CU(Wd-PE5VtDFj5&!iKKRJ_Bla=bRDxp55c00o}gJQgfhyczGuhE6u_a&!Z3V!``
zO_=n3N1bp^Fy83hI3w_6vfs)$?=$j<$G~3rL2b@3CCq41z}YUa`?1MoIjP4*MQ+!(
zy7^cG0)n2pk2)rd8^w=h1wm)&7WQ<FI(9f3X5-e}9HG_|R5X2Nrv20PSZiQ?g7J_v
zm5^0rBj9Ts@n`>Y)Af|Nh;muFSI=H73cHBCFtLQYQ6){^6U;;mNYj~`Wp1hMaO@Ew
z&j*rU<1k7F^=BeNiIQTxSd9ozUCCJ90cn6<^AD!~QQ7mMHCy^8WL?G|`1p$+-Fkt@
zE*$!?38Z7cI3})YD0=9O-YR=0i@Y{&A(pP-lh@<uU(0Hh!1W3W>QnV{M`a1-qFS@v
zdK!%ys==ntmZ`!LCflja<3%_Aa{Lqa_rl2wCi}GR^L0E}lAQDcS{Vk{f)N{gL4sTr
zT(1%0`JQR36-cc7Vj2;2j&*WXvB3A@7XIV^p8F0NomvAqe4U~JoE<688UK9;c-En>
zMMEXG%d>FX)g3-qg;8JDY8*GT*SlhpUYu21q8AT}d|ZEX@zjaF4eR;oJU~{hMu_6;
z;<%huvD`K*K5Od_$IBC?oM!rJt^+H{k-il;hQuAa@fY90ZKcEaR4N2#iLcYA5)Le{
ztjW7ssgL2uJ=J<_aV#Vc-W*DRSxsBj4A<6JS{%Yt;|McWgSF&PA_z|Ude84?5D$VJ
z5yCf1+Ig^R$r}M+_lw8I5JVyPW~v`-j$PZ_e?sH-EL)IdZ9z_#5>`-Q<}CG)ACe3_
z_J~#3oM@GJie6<2Dpr-ZB#mbInH!!rVkt5jej5*7%4N!>-fmIRwNEv`p_^_K$Tkb%
zyOvgRPr}XIpS>-F_v)05*;=89Nt0}1EAZ*<Q9n1BpK(TJWa*s}A>i!zn%I`T+r32<
zLO1;Vr=@#6)3?(4m~cn1fOBz6+oiw+b=P*)?W&S1Vwziu2Lm$eT=M_p_rK28YrMVo
z6mxB*rS>89&luY)@n)~0l7mY4{cbfv5or%(^R)BcUvV*eVqJU^FkdzwmI=pGtHr;n
zmp){nKU0AUY1*}lt*;nQ8q1*oYf_U$8p$=C>Wg9Uf)FN<8^!plT7N#`f-)&M+^+zi
zl=he5ve(4}#*jHHT1};Ye6_zi^=HsbUJ&ISvlaS?nX%fi`2}SNvuUHpBk@!yXv`L$
z9ZM*{1B31d4ygrQ^>3OnZf6w^)lyPDTC+GYUmA?DFE!Nobl8QhJW>JJIlVVV3hP>n
z(}c6#Aw0aSj4R%^RFx7AiBBBiwdGYRFzW6c{>y1KXIIzz*zZ*YrI|M+GIh<q{9?J-
zX>89(%!~9~dxuL4`luNALOquAJu$m!cH>Gz2d+w#YbJDIz6loig4K+KJ1-Qr)y=rw
zr>!g%gjM3A(oh>dGu}&&Y~Pdgd^jWL_m!r2aOIg_s6X!6MQYG2@k5{I9)cb~;w3wp
zwEY=nlRY}+YH&o%gn6a~CkOeD>vyzwWZ}oMDLR$-Wj_b(;hY8B`Dj4#l+?EpVYNW2
z*$S02I*|G;Y(9K2U_E?zB>WD2BG)lji}rrfivn`ES(#RL|IDr3se_`V^Whi=C65p0
z?3A+l%eO+St<)tEvA>{!jItU_QF);RoqwT?+U9MM`wb=L)n8}{Cgu(nmX;}H0|qrS
zhY#4wDw)*Hg$xN+_?{)F-Mp3bwo3}s9wFyVK0JF+j_D*JkvyULJ?0+ydZ;0^W&5Z3
zPiBmdeC<z%10&e0g104hMhp}NghzhO=6PM#F*M%RC_s27@Iip1A@Dh9rkC8Wc8>mJ
zl~$xN6n7G?8e^stMf<Xn6>~7)+dL3jYj3VD`a#$1%8GmE1<hO=PXnK|o1bds3F-$v
z1eg{6K<jfKsbfYC?{sOzYcmQ=XB#q2d=_r=LU8$eX78BxMzKiMxVq*P_M7=h-+A^e
z6OTW4IJ+hIfCX0m81dG2)y~W8?wgkKY8b(2nC$)77ggP~?!%e%<9wt~0_lKMg@wOn
zn``V@tX5Iv+~){m_qnJ)?)IN)-|su|H;e6$wY8-Q5QgM(5&K-H+NZa;=o?k$G*ta1
zXR^n4LHT@rkfFYAqhn59U`ENo6DOMbk{V;L9W%nERG=2jl&+jxIsobE$?`W@o?4$y
zpC^5)ruAnK?E|VW^F}W6@B<eYdBTJjH3Vw;*q;4XOX03sBcFU3k7CICPWMOcWE7q_
zq!_F4FowKWv*`tYiu<!|jwUZQ!abTza{Osux9wzj61E-H?b_(&OZ!^2gW+ki%@M0|
zHzM}%8%e8nVb+F!Ijb<i9YH``n#V~ASGc0%Fiyheu8x+SNKO?0v1A+JWOT8s1K~7>
z7-3H1anHtY?<2XW1xnq;17KU&Y%FPg+tv%3`V<+o*glvvRNBqTi64$ZqWNua+WcrM
zBPo?2c?v>8q{v`5>Nqb9hDJaU7|KJHOgm+`^v3Tt6e@jFn*V1528W{l2Zlv*@nBd4
z7p4s3<|)Hblw?(X0M6Y7N20i<K*F$G7z)G9!yuJ6^AIp7*I0xy8vT<8{8x?$1P01A
z7=`&o{BIr>%C!g#4dqG%hWsBG^!LKQ=L0t6g(;(vm`w{q5!@<35opxrkAWg#h|PjS
zkq9I=hDLIYMM04OHwZ^>&IF1=Vm3<X$Ma7`Kv7sI7lwwzHY)=~BN1H5p;4IM81V0e
zXbhKJPz(&dx$K}AIJXKI6nDlL40LmSK(SEx|Hpplk3}H3GQlF@n`;4zMZ-7C3WGwC
wo7I9rm9bm}2Ssr2DJT*HKe|a)7!=LDfsil^oFaVs0v{aBFDG~Yq6Yu}0oACU(f|Me

delta 4872
zcmZuz2{@GN`?h3>?8}6#Ns;(w%r4n#DBD5SP{<nE?BtV^oGc+S<cP8*gd~cgj4h<>
zBs(z%BaEd%zftG<|Ig3-zU!LrdanDq-+901zV7#V-Vfo-qv6b@M$Eh@BuoX3Mq|)$
zI0gexy1*O&9;y_HT;D$KnWlVxf>!ow?6g+6@;qmY9Or=x;%&$tr1EirWr}W}A>>nm
zYi}tj?S(dsMgHR<OZx|vEp4=3b6!o!siiP}q6vvIGcHZk?EXn%$J7{uqtd5;ECd3b
zu}y7b-an|UJ98Q0x}4C|9PS-z6mgtJY*T%ZHrfUHl<;vJZn{>T&h7uoGUQ4*bxhAM
z57ZjIfUupexNnpBG2x`eI{A-+2J6YMmtrM8j(t5yc>Vsg<Mz^b*A3dz%F;6TLy3aB
zbvedI^<o`2LeXz@VpX`rGL{OgN}})=N499qm8gq#VThV-?+OE|g4B-xhUW(Le1?8l
zW?{Mpn0fM{!D8ingm~izt0=3a=_t|`vu$~AP=Q}XTvk9p-TSp2-alF`B=!eavWk0A
zC`>1uD-yE;)U$RIfLU+8M<?y@yY1JR;jCG&fiDN|aJHqMB5w=u8(z_zdo*q3AxjGl
zq`C%fx?%L8HJdwGTkg575|5yqX--iBNM7*jh~$pU4!UO3W1~CW-u_#+oxf#k)__aa
z`rBAT2)=CkQQOq9;tt~#B_6dauda1$02reVhwNLYJv+J6#AQCtX4gvKu}X?h?{yxy
zdf!|w(df|Vt(EO~X`k94+V^imCMZ5%{+GH_?)hibf0md9JI(0IK76Hueprr&iq&j`
zlijKg1N_Oam3S|z#tZ&1%R%VjrdO8AuXo&v-Wnex9#gtuxcH*Q?8~Ng67TV&B+HK#
zk9xwh&Rnxuzwshvm47DK)Jm?M`hb^Pl+<KNw&*xdwjfpY&-Ln9k{kUZd?lY#J;{aC
zyfU*Al*{vqVCc!D7cLbJ&SxqdChH3vKrS2_f1L)$OEgb55B+U7As?l|cOgV7Ok-*=
zHQ&XBKSVWIdPFJ;4~h`+b?xb`Jr7rj_t`v3G5NY&k4&E(9ju>;D35@s@RVJO{6zHf
z&VJ}!TmSGxNls<qqmAuU3%AxuCG&6gD%s{=SiQXapxxJu#YapUPAuw6kRkrbpa-cU
z*c9wkY(STEJ(Y~)J>4vC;#*_wYVs}F;5NClTX*g}Au(fcJbL=ox2mMdj+>LmeWh$f
zhW-k^x_-fnvdDDV!b^UR!0TQ{sAf&DC`){{j?HRZTjbTU%=O?34%p{jN~oh-BRQmS
zHu55lYD0++za+kqs(kf(8IauxD!F^OB&Ual+T)L`>{qN`j;&V?P(N_s&$`cx3KHr`
zfy!vv*nnjnxTxoOOLx_r^Dpi;QG8B)EqeMrcV|?`Ic@W>tbu}Gx}uKt%@FH)1wM6%
zo}$qy{}!UmVZJ20z|_lQj(v`Z!-OS(92sS!deRU}k4pbaa0pDL))R|Bh?Ih4V5;9=
z!#NNs*MG6@8g_B>mD5>%V~)AauWVL*5{o9*Jl1@8>bdq^^$N3=9evXTpXu_-SkisL
z+;Q{$&G)$XF|C;BmEu0+-1sok>$&;F^@oCK0T%+3){nhLlHDim3)>Hx&Axt6eA`&I
z0hcCfK3)MH9y#)+zPcPUP6(UtiKLys>U?qWr7One=#QzQ^7HFlx1uT@J)(<Ep^H5X
zwun88d$zt-*SI#a9rZQx3qA4kcj8H4bi{}TO~Q*Z0zPYxe}DGyJnZqOU~q}GJM!zm
zGCRptlM}zBh%H_fB{}pvcicA>Cqu}J;OwxJZuc1UWzGKTzT7kR-`a5RCH%w|Y%{`D
zwQuyC8!XKlo>ZksHLb7P{iM#_7g!$u<COVX{<{XR1N+4_qO<;X8(|X*!cWy#%d=8m
z@{+|!kpbSE*{BXftEJlkHmWrS*d>+0X1_bD_0>g?5f{Re3h7;Sk?xiPJt9YfyOg%z
zar-W)&jW`*JoPtAG7p%m><2G2IdA`<*o6&zDL?oArn(@eS)I%B%2>IHHe0$>y;t1e
zW9v^pOSrJdsuJ_&<^%0q^qjsZa7}AO7k|WlIW}-mO<bsprMXXQ=AE2*ky)!dQ8@NS
zWQ5yI*?sv^>Sg1`y%OLczQ4#CNoo#U?z0gOjq4za8+$iY5GrXruh#>Z<b`_l4Pn8O
zpD%@g99ovTaSdMi{mj2oIaVi)WEbY{Gp!GWvG9g}*>ZoHqNAGqqi9`E3$Smwf8uIs
z=~=NSF~*9Sf@iB6>#h84YCpE-tooM~MT{)K;5;ga+tLBhOUHJ|2rlO`qL^F9c{lFk
zQRzEsuu=Xd<*l}SX`fT464ooPqYd#LcGs{s|Fr0ouL|)0;qvxPPmZA*cT`d~PBm^V
z`r5(E4UP}Ed6N#H+p)@5a#(qfKJ7i#QkvzM%zRgyT~uMRDmlTynr`^|B8aGhq&3~|
zV>u8}S3)UBKNgWTs>+5~cH=H??(*L(;fQPOlWRiUD4wQx3gnR88rEyUVtTBL-7&eg
zQIODdoF;rekSg2Whu*kH-2!vR&hWaQQ-WCdhItoO#9^7<B$m%v2iY~As~=X-a?w)R
zbwBIn3|4RcK4qUQ){JZ;u1jYh+6u@dfp*MOgMY_t4(93^`&XG$ack4q<0%zF*&0D)
zWlSw@#?c}lIpH}vw9P(5Q)hx5@RWCzqIa9!rXth(1n0!nLk|Cyx}owoFCoTHFP!GB
z$;_eU9hTg>PZn=_tB-bACROECa|(*dMqR^mI{W2&_Bytfe!sL2>n=f{^tO`?8$lwW
z6V3#)8DXEggTm?yWDD2Zv$qJn9hkZSDK_KQ*$G@(Xq}#jA=L4-tSv5@K~@NP6Aj{<
zif#AQVj)BQF&<w<wDPW7G1<(x<;JD^bg72MDvd!0v(E5PBbsr!!OI2FY|`ZpOliLI
zm%AJ@<Rm%Md}S_o-A({a^>h>@Q#k11Sq^&i3-5s{CGp-d#PY*PS}i$5Vcadbsk}~W
zg00=!Nv{Q)+^L|G_Y_x+yqx+rW@@r@kdS%%?0jy2O>kf}A|sRgv5z(f1T|rip%7mP
zKbt(%kZai0*yk$gZC4Z_MKDAYs)l)eqoLTyqxd|qLO~f<q0`tnusYCn*bV(P*HePF
zoGwCC`pY!K&M$@kVm75T#a!Zg9A*C2pQLRXABGJf_@<)#Cr{owMRN+-qu<civ4ct-
zsr#AW{QVG{=R(HzdU_bkcGQ=zv&Cvq72+oSL}&u`v-A_eblrF$JI<Ii+bha<vELax
z`UE^;_Pom4SetSxGCXfwn8jvf$}uBM*zAbErP;*|@e>c<wcC!Lmh&(u>1+kIJ_h{U
z^j#TZm1H_Sv6Yh4-ViC@P6Bc+zN#hkpU=&tyr?<C`Gwsn&0TIpLxo+-?lOy0^6kG&
zaq@JD4Z64m%N-I@sSyo1tb*mdZNenBdp4k|etWRCQy23+o2cpAs}T)F5P4UE43x}v
z?`(6aAwAXidzoKq<Fiq@>%?nCLf>m99i5x8{F%0iEH00tg^~vlwm~X^DjNhg$<iU5
zk)O`V7Xos(uLya<4t>gd?diVH{)wKqnZE5o)(p9*WjRejH<3C>%#`?C$2Ty~Ek#Ow
zIGf&z2GbULPvjp%-rb$D!}KZpUg+19rMGknlf23ADZA@?evRK#cJ~IRwU+a1%2ti8
z30-6A`G%8>hz%3#bt7o9M0kwZzpQDj)`>&5PO%HjBbOZdUAiT$%v3dyZNt2*#4*)Z
z)!_bWrL-GOsi~?H4lQrZ?vG4|z;gK(WBo?MXyZh2vcnwBsE=owNqMwPy#Pyd<n|Z(
z77x9fwYlb=AmD#irPKiSPK8|Bm*{ZGiX=>>4rjYF(fj<s5y?w<KWis_)zY@$+xC6j
z^AC7S+pwqoySV3HI$Xk2S$>mPK_wEKzs)}^N3QI!kSO1_z@jdn!IgRM&MG&NfSK>|
zguXRVgRkNx^X3T`O)_zJ^z}Zr{<`?oMo6uCXFtQjrL3MLj#sj22E{vFDZh{0G78P;
zgOl4Qj4_`TQ>IF#j0`&;E#arQc0_M(D(32ST&^;qBG+<ZSHKh(heXQXekq#{Pbi#E
z2Tu2dgp4|cP<9SSIZ2pO7QmPV0xkYvwD)O3<<AqZ*B3R-jv1MLu66_eTo|VDJ$wW8
z7JX{pv|>Zy3%m3-{LqCFrqOwh+*f}DT`6a)?WYdx6SS>r4CV^1Z2k-nj8(q+vcy-e
zqWQzKH~~nlf`J0qto|gD=hV4i_{K`JwC1pvJ&CZ&B(v4@H!gpHvUvM3AA8NE09M~z
zBfmG*`PE#r5wTeYH<;}oF#Fm>QDatF-WEYVyAZ-vNK9{wh;OxnndxHBzI4#S`!d=0
z$vPy_BkKQ?4jw(Ln$`9XiPv0|3hwuH{M_p<8dKPUz0lKr=+U+MlIu;ow;9Tb*>^p-
zuLZ>6>mrMs*mNNA_bnzg-Xhv>V#{3r%r~@*Q`jG}N?|b~A1W%XGm6r-vOYU&UX9dv
zjw)O6D17cHsTZ_-{=~Gt1omh$Vl(AnV#*?9Q`)RT%<qTkaj{zmlO;jjbL5&=cQc>c
zh77k&szWlVY)8j*M#&>IBhBzXZ`SusI98`ekz|V}*bO~`HA3r1980ASHY-B32ZST%
zeP5$ogg>Y|jXS;>mC2jwjFtvM$bR3q^Q7g@agKhQ*v3IMf;rv`3id7np(P}aYh?mY
zKQul!jy{*$fO6?BfI`9IRN0%ZBU@Kdlz5aTC?2-c|Gm1mA7}4(ubpr4oCN+VCVhJG
zh~NR@N{f2{S;P$n)}t4)bA8oSgK}SnmfP*z=$Tl)l^Bu{c{irKWpV9jrg<%XlvlpE
z7hgN1%%)JVY$W1Ql$rT%pehtBAYU-P6j)UWk@6RDZe>~ASppGT#0e2MU~Y_O_r>Ox
zRZsh<)!3i@<iI_y)}>-MRCM4vS|)5x%w?lXt!3khTk^R2r(;C7$EJ5nPG%pF4q_Lp
z`yLAmouCuy)moQs(+RYWW5k&Z?WPiSIw1fjR=W^;0~4hU3;ilGMLK|MdsgUBkQqQ9
zCDx#3#-PWPp@2_BcPXxzCOlhnN3l@XGw0TW6QL@z$7h2~Y=q8PnMfR>|CY*o*E7S$
zlB65ETzM`^?r_IPI`bzD^GmXzFy&kP0ZJSI6beT}QAxq-1xj#)46ysi_{ebm3ZQBL
znh}H2>5eCb6p;*Um>P-^L!guHoiqj!j43q~nt_ghLm4m>z{o?R;Cp#+s2amsI1GdN
z#RLCU6&wM;7#5>|-^73OFffKqpePtaAyCBsz<_@?{^$Qdx}Z=P3JvVp7=SaX0Kie`
zy&nT0pvb*~0|+>Z5ksLE)*=BE$Ox)o_ErKQk$~FX4@6>M3>ZpHZLcx_3V~uM4uyjL
zgMt5NDgcFMkPARV5qpgWpw$>vKqDC|Mx$YS{Q+PA#D8PE`eWcohDtC9#9l7|7!+c!
ztWW?z?Ntj3sKFQv2f!Im5r6~`$M(nyrI*aG2?7Qnk~TH<_9HM{M~<8^!EyZu+(?Y>

diff --git a/paper/main.tex b/paper/main.tex
index e8aac59..a5d0feb 100644
--- a/paper/main.tex
+++ b/paper/main.tex
@@ -137,17 +137,71 @@ Check time series algorithms:
 \section{Evaluation}
 \label{sec:evaluation}
 
+For each reference job and algorithm, we created a CSV files with the computed similarity for all other jobs.
+Next, we analyzed the performance of the algorithm.
+Then the quantitative behavior and the correlation between chosen similarity and number of found jobs, and, finally, the quality of the 100 most similar jobs.
+
+\subsection{Reference Jobs}
+
 In the following, we assume a job is given and we aim to identify similar jobs.
-We chose several reference jobs with different compute and IO characteristics visualized in \Cref{fig:refJobs}:
+We chose several reference jobs with different compute and IO characteristics:
 \begin{itemize}
-	\item Job-S: performs postprocessing on a single node. This is a typical process in climate science where data products are reformatted and annotated with metadata to a standard representation (so called CMORization). The post-processing is IO intensive.
+	\item Job-S: performs post-processing on a single node. This is a typical process in climate science where data products are reformatted and annotated with metadata to a standard representation (so called CMORization). The post-processing is IO intensive.
   \item Job-M: a typical MPI parallel 8-hour compute job on 128 nodes which writes time series data after some spin up.   %CHE.ws12
 	\item Job-L: a 66-hour 20-node job.
   The initialization data is read at the beginning.
   Then only a single master node writes constantly a small volume of data; in fact, the generated data is too small to be categorized as IO relevant.
 \end{itemize}
 
-For each reference job and algorithm, we created a CSV files with the computed similarity for all other jobs.
+The segmented timeline of the jobs are visualized in \Cref{fig:refJobs}.
+This coding is also used for the HEX class of algorithms (BIN algorithms merge all timelines together as described in \jk{TODO}.
+The figures show the values of active metrics ($\neq 0$) only; if few are active then they are shown in one timeline, otherwise they are rendered individually to provide a better overview.
+For example, we can see in \Cref{fig:job-S}, that several metrics increase in Segment\,6.
+
+\begin{figure}
+\begin{subfigure}{0.8\textwidth}
+\centering
+\includegraphics[width=\textwidth]{job-timeseries4296426}
+\caption{Job-S} \label{fig:job-S}
+\end{subfigure}
+\centering
+
+
+\begin{subfigure}{0.8\textwidth}
+\centering
+\includegraphics[width=\textwidth]{job-timeseries5024292}
+\caption{Job-M} \label{fig:job-M}
+\end{subfigure}
+\centering
+
+
+\caption{Reference jobs: segmented timelines of mean IO activity}
+\label{fig:refJobs}
+\end{figure}
+
+
+\begin{figure}\ContinuedFloat
+
+\begin{subfigure}{0.8\textwidth}
+\centering
+\includegraphics[width=\textwidth]{job-timeseries7488914-30}
+\caption{Job-L (first 30 segments of 400; remaining segments are similar)}
+\label{fig:job-L}
+\end{subfigure}
+\centering
+\caption{Reference jobs: segmented timelines of mean IO activity}
+\end{figure}
+
+
+
+\subsection{Performance}
+
+\jk{Describe System at DKRZ from old paper}
+
+The runtime for computing the similarity of relevant IO jobs (580,000 and 440,000 for BIN and HEX algorithms, respectively) is shown in \Cref{fig:performance}.
+
+\jk{TO FIX, This is for clustering algorithm, not for computing SIM, which is what we do here.}
+
 
 \begin{figure}
 \centering
@@ -168,93 +222,73 @@ For each reference job and algorithm, we created a CSV files with the computed s
 \end{figure}
 
 
-Create histograms + cumulative job distribution for all algorithms.
-Insert job profiles for closest 10 jobs.
-
-Potentially, analyze how the rankings of different similarities look like.
-
-
-\begin{figure}
-\begin{subfigure}{0.8\textwidth}
-\centering
-\includegraphics[width=\textwidth]{job-timeseries4296426}
-\caption{Job-S} \label{fig:job-S}
-\end{subfigure}
-\centering
-
-\caption{Reference jobs: timeline of mean IO activity}
-\label{fig:refJobs}
-\end{figure}
-
-
-\begin{figure}\ContinuedFloat
-
-\begin{subfigure}{0.8\textwidth}
-\centering
-\includegraphics[width=\textwidth]{job-timeseries5024292}
-\caption{Job-M} \label{fig:job-M}
-\end{subfigure}
-\centering
-
-\begin{subfigure}{0.8\textwidth}
-\centering
-\includegraphics[width=\textwidth]{job-timeseries7488914-30.pdf}
-\caption{Job-L (first 30 segments of 400; remaining segments are similar)}
-\label{fig:job-L}
-\end{subfigure}
-\centering
-\caption{Reference jobs: timeline of mean IO activity; non-shown timelines are 0}
-\end{figure}
+\subsection{Quantitative Analysis}
 
+In the quantitative analysis, we explore for the different algorithms how the similarity of our pool of jobs behaves to our three reference jobs (Job-S, Job-M, and Job-L).
+The cumulative distribution of similarity to the reference jobs is shown in \Cref{fig:ecdf}.
+For example, in \Cref{fig:ecdf-job-S}, we see that about 70\% have a similarity of less than 10\% to Job-S for HEX\_native.
+BIN\_aggzeros shows some steep increases, e.g., more than 75\% of jobs have the same low similarity below 2\%.
+The different algorithms lead to different curves for our reference jobs, e.g., for Job-S, HEX\_phases bundles more jobs with low similarity compared to the other jobs; in Job-L, it is the slowest.
+% This indicates that the algorithms
 
+The support team in a data center may have time to investigate the most similar jobs.
+Time for the analysis is typically bound, for instance, the team may analyze the 100 most similar jobs.
+In \Cref{fig:hist}, the histograms with the actual number of jobs for a given similarity are shown.
+As we focus on a feasible number of jobs, the diagram should be read from right (100\% similarity) to left and for a bin we show at most 100 jobs (total number is still given).
+It turns out that both BIN algorithms produce nearly identical histograms and we omit one of them.
+In the figures, we can see again a different behavior of the algorithms depending on the reference job.
+Especially for Job-S, we can see clusters with jobs of higher similarity while for Job-M, the growth in the relevant section is more steady.
+For Job-L, we find barely similar jobs, except when using the HEX\_phases algorithm.
 
 \begin{figure}
 
 \begin{subfigure}{0.8\textwidth}
 \centering
-\includegraphics[width=\textwidth]{job_similarities_4296426-out/ecdf.png}
+\includegraphics[width=\textwidth]{job_similarities_4296426-out/ecdf}
 \caption{Job-S} \label{fig:ecdf-job-S}
 \end{subfigure}
 \centering
 
 \begin{subfigure}{0.8\textwidth}
 \centering
-\includegraphics[width=\textwidth]{job_similarities_5024292-out/ecdf.png}
+\includegraphics[width=\textwidth]{job_similarities_5024292-out/ecdf}
 \caption{Job-M} \label{fig:ecdf-job-M}
 \end{subfigure}
 \centering
 
 \begin{subfigure}{0.8\textwidth}
 \centering
-\includegraphics[width=\textwidth]{job_similarities_7488914-out/ecdf.png}
+\includegraphics[width=\textwidth]{job_similarities_7488914-out/ecdf}
 \caption{Job-L} \label{fig:ecdf-job-L}
 \end{subfigure}
 \centering
-\caption{Empirical cumulative density function}
+\caption{Quantitative job similarity -- empirical cumulative density function}
 \label{fig:ecdf}
 \end{figure}
 
 
 \begin{figure}
-
-\begin{subfigure}{0.5\textwidth}
 \centering
-\includegraphics[width=\textwidth]{job_similarities_4296426-out/hist-sim}
+
+\begin{subfigure}{0.75\textwidth}
+\centering
+\includegraphics[width=\textwidth,trim={0 0 0 2.2cm},clip]{job_similarities_4296426-out/hist-sim}
 \caption{Job-S} \label{fig:hist-job-S}
 \end{subfigure}
-\begin{subfigure}{0.5\textwidth}
+
+\begin{subfigure}{0.75\textwidth}
 \centering
-\includegraphics[width=\textwidth]{job_similarities_5024292-out/hist-sim}
+\includegraphics[width=\textwidth,trim={0 0 0 2.2cm},clip]{job_similarities_5024292-out/hist-sim}
 \caption{Job-M} \label{fig:hist-job-M}
 \end{subfigure}
 
-\begin{subfigure}{0.5\textwidth}
+\begin{subfigure}{0.75\textwidth}
 \centering
-\includegraphics[width=\textwidth]{job_similarities_7488914-out/hist-sim}
+\includegraphics[width=\textwidth,trim={0 0 0 2.2cm},clip]{job_similarities_7488914-out/hist-sim}
 \caption{Job-L} \label{fig:hist-job-L}
 \end{subfigure}
 \centering
-\caption{Histogram for the number of jobs (bin width: 2.5\%, numbers are the actual job counts)}
+\caption{Histogram for the number of jobs (bin width: 2.5\%, numbers are the actual job counts). BIN\_aggzeros is nearly identical to BIN\_all.}
 \label{fig:hist}
 \end{figure}
 
@@ -415,7 +449,7 @@ One consideration is to identify jobs that meet a rank threshold for all differe
 % \ContinuedFloat
 
 Hex phases very similar to hex native.
-Komischer JOB zu inspizieren: \verb|job_similarities_4296426-out/hex_phases-0.7429--93timeseries4237860.png|
+Komischer JOB zu inspizieren: \verb|job_similarities_4296426-out/hex_phases-0.7429--93timeseries4237860|
 
 
 Bin aggzeros works quite well here too. The jobs are a bit more diverse.
@@ -602,7 +636,7 @@ Bin aggzero liefert Mist zurück.
 \end{subfigure}
 
 \caption{Job-L with hex\_lev, selection of similar jobs}
-\label{fig:job-L-hex-phases}
+\label{fig:job-L-hex-lev}
 \end{figure}
 
 
diff --git a/scripts/analyse-all.sh b/scripts/analyse-all.sh
index 1ff9a7e..d7b968c 100755
--- a/scripts/analyse-all.sh
+++ b/scripts/analyse-all.sh
@@ -4,6 +4,8 @@
 
 echo "This script performs the complete analysis steps"
 
+CLEAN=0 # Set to 0 to make some update
+
 function prepare(){
   pushd datasets
   ./decompress.sh
@@ -21,6 +23,9 @@ for I in job_similarities_*.csv ; do
   ./scripts/plot.R $I > description.txt
   OUT=${I%%.csv}-out
   mkdir $OUT
-  rm $OUT/*
-  mv *.png *.pdf description.txt $OUT
+  if [[ $CLEAN != "0" ]] ; then
+    rm $OUT/*
+    mv description.txt $OUT
+  fi
+  mv *.png *.pdf $OUT
 done
diff --git a/scripts/plot-single-job.py b/scripts/plot-single-job.py
index 8849d7c..e9f6392 100755
--- a/scripts/plot-single-job.py
+++ b/scripts/plot-single-job.py
@@ -10,7 +10,7 @@ import matplotlib.cm as cm
 jobs = sys.argv[1].split(",")
 prefix = sys.argv[2].split(",")
 
-fileformat = ".png"
+fileformat = ".pdf"
 
 print("Plotting the job: " + str(sys.argv[1]))
 print("Plotting with prefix: " + str(sys.argv[2]))
@@ -78,12 +78,16 @@ def plot(prefix, header, row):
   colors = []
   style = []
   for name, group in groups:
-    metrics[name] = [x[2] for x in group.values]
-    labels.append(name)
     style.append(linestyleMap[name] + markerMap[name])
     colors.append(colorMap[name])
+    if name == "md_file_delete":
+      name = "file_delete"
+    if name == "md_file_create":
+      name = "file_create"
+    metrics[name] = [x[2] for x in group.values]
+    labels.append(name)
 
-  fsize = (8, 1 + 1.5 * len(labels))
+  fsize = (8, 1 + 1.1 * len(labels))
   fsizeFixed = (8, 2)
 
   pyplot.close('all')
@@ -97,7 +101,7 @@ def plot(prefix, header, row):
       ax[i].set_ylabel(l)
 
   pyplot.xlabel("Segment number")
-  pyplot.savefig(prefix + "timeseries" + jobid + fileformat, bbox_inches='tight')
+  pyplot.savefig(prefix + "timeseries" + jobid + fileformat, bbox_inches='tight', dpi=150)
 
   # Plot first 30 segments
   if len(timeseries) <= 50:
@@ -113,7 +117,7 @@ def plot(prefix, header, row):
       ax[i].set_ylabel(l)
 
   pyplot.xlabel("Segment number")
-  pyplot.savefig(prefix + "timeseries" + jobid + "-30" + fileformat, bbox_inches='tight')
+  pyplot.savefig(prefix + "timeseries" + jobid + "-30" + fileformat, bbox_inches='tight', dpi=150)
 
 ### end plotting function
 
diff --git a/scripts/plot.R b/scripts/plot.R
index 642c61b..c8ff172 100755
--- a/scripts/plot.R
+++ b/scripts/plot.R
@@ -4,7 +4,7 @@ library(ggplot2)
 library(dplyr)
 require(scales)
 
-plotjobs = TRUE
+plotjobs = FALSE
 
 # Color scheme
 plotcolors <- c("#CC0000", "#FFA500", "#FFFF00", "#008000", "#9999ff", "#000066")
@@ -22,19 +22,20 @@ cat("Job count:")
 cat(nrow(data))
 
 # empirical cumulative density function (ECDF)
-ggplot(data, aes(similarity, color=alg_name, group=alg_name)) + stat_ecdf(geom = "step") + xlab("SIM") + ylab("Fraction of jobs") + theme(legend.position=c(0.9, 0.4)) + scale_color_brewer(palette = "Set2")
-ggsave("ecdf.png", width=8, height=3)
+data$sim = data$similarity*100
+ggplot(data, aes(sim, color=alg_name, group=alg_name)) + stat_ecdf(geom = "step") + xlab("Similarity in %") + ylab("Fraction of jobs") + theme(legend.position=c(0.9, 0.4)) + scale_color_brewer(palette = "Set2") + scale_x_log10()
+ggsave("ecdf.png", width=8, height=2.5)
 
-ggplot(data, aes(similarity, color=alg_name, group=alg_name)) + stat_ecdf(geom = "step") + xlab("SIM") + ylab("Fraction of jobs") + theme(legend.position=c(0.9, 0.4))  + scale_color_brewer(palette = "Set2") + xlim(0.5, 1.0)
-ggsave("ecdf-0.5.png", width=8, height=3)
+# histogram for the jobs
+ggplot(data, aes(sim), group=alg_name) + geom_histogram(color="black", binwidth=2.5) + aes(fill = alg_name) + facet_grid(alg_name ~ ., switch = 'y') + xlab("Similarity in %") + scale_y_continuous(limits=c(0, 100), oob=squish)  +   scale_color_brewer(palette = "Set2") + ylab("Count (cropped at 100)") + theme(legend.position = "none") + stat_bin(binwidth=2.5, geom="text", adj=1.0, angle = 90, colour="black", size=3, aes(label=..count.., y=0*(..count..)+95))
+ggsave("hist-sim.png", width=6, height=4.5)
+
+#ggplot(data, aes(similarity, color=alg_name, group=alg_name)) + stat_ecdf(geom = "step") + xlab("SIM") + ylab("Fraction of jobs") + theme(legend.position=c(0.9, 0.4))  + scale_color_brewer(palette = "Set2") + xlim(0.5, 1.0)
+#ggsave("ecdf-0.5.png", width=8, height=3)
 
 e = data %>% filter(similarity >= 0.5)
 print(summary(e))
 
-# histogram for the jobs
-ggplot(data, aes(similarity), group=alg_name) + geom_histogram(color="black", binwidth=0.025) + aes(fill = alg_name) + facet_grid(alg_name ~ ., switch = 'y') + scale_y_continuous(limits=c(0, 100), oob=squish)  +   scale_color_brewer(palette = "Set2") + ylab("Count (cropped at 100)") + theme(legend.position = "none") + stat_bin(binwidth=0.025, geom="text", angle = 90, colour="black", size=3, aes(label=..count.., y=0*(..count..)+20))
-ggsave("hist-sim.png")
-
 # load job information, i.e., the time series per job
 jobData = read.csv("job-io-datasets/datasets/job_codings.csv")
 metadata = read.csv("job-io-datasets/datasets/job_metadata.csv")