From 58709e01e654f0185f929bd34aa45d2a509646fa Mon Sep 17 00:00:00 2001
From: "Julian M. Kunkel" <juliankunkel@googlemail.com>
Date: Thu, 20 Aug 2020 12:11:35 +0100
Subject: [PATCH] Better plotting.

---
 scripts/analyse-all.sh     | 10 +++++-----
 scripts/plot-single-job.py |  8 +++++---
 scripts/plot.R             | 17 +++++++++--------
 3 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/scripts/analyse-all.sh b/scripts/analyse-all.sh
index 7ee3a58..676d02f 100755
--- a/scripts/analyse-all.sh
+++ b/scripts/analyse-all.sh
@@ -1,10 +1,10 @@
 #!/bin/bash
 
-# call me from parent directory
-
+# call me from the parent directory
 for I in job_similarities_*.csv ; do
   ./scripts/plot.R $I > description.txt
-  mkdir $I.out
-  rm $I.out/*
-  mv *.png *.pdf description.txt $I.out
+  OUT=${I%%.csv}-out
+  mkdir $OUT
+  rm $OUT/*
+  mv *.png *.pdf description.txt $OUT
 done
diff --git a/scripts/plot-single-job.py b/scripts/plot-single-job.py
index 60ce6c0..552400a 100755
--- a/scripts/plot-single-job.py
+++ b/scripts/plot-single-job.py
@@ -10,6 +10,8 @@ import matplotlib.cm as cm
 jobs = sys.argv[1].split(",")
 prefix = sys.argv[2].split(",")
 
+fileformat = ".png"
+
 print("Plotting the job: " + str(jobs))
 
 # Color map
@@ -81,7 +83,7 @@ def plot(prefix, header, row):
 
   fsize = (8, 1 + 1.5 * len(labels))
   fsizeFixed = (8, 2)
-  
+
   pyplot.close('all')
 
   if len(labels) < 4 :
@@ -93,7 +95,7 @@ def plot(prefix, header, row):
       ax[i].set_ylabel(l)
 
   pyplot.xlabel("Segment number")
-  pyplot.savefig(prefix + "timeseries" + jobid + ".pdf", bbox_inches='tight')
+  pyplot.savefig(prefix + "timeseries" + jobid + fileformat, bbox_inches='tight')
 
   # Plot first 30 segments
   if len(timeseries) <= 50:
@@ -109,7 +111,7 @@ def plot(prefix, header, row):
       ax[i].set_ylabel(l)
 
   pyplot.xlabel("Segment number")
-  pyplot.savefig(prefix + "timeseries" + jobid + "-30.pdf", bbox_inches='tight')
+  pyplot.savefig(prefix + "timeseries" + jobid + "-30" + fileformat, bbox_inches='tight')
 
 ### end plotting function
 
diff --git a/scripts/plot.R b/scripts/plot.R
index 086d029..6f59252 100755
--- a/scripts/plot.R
+++ b/scripts/plot.R
@@ -23,16 +23,17 @@ cat("Job count:")
 cat(nrow(data))
 
 # empirical cummulative density function (ECDF)
-ggplot(data, aes(similarity, color=alg_name, group=alg_name)) + stat_ecdf(geom = "step") + xlab("SIM") + ylab("Fraction of jobs") + theme(legend.position="bottom") + scale_color_brewer(palette = "Set2")
-ggsave("ecdf.png")
+ggplot(data, aes(similarity, color=alg_name, group=alg_name)) + stat_ecdf(geom = "step") + xlab("SIM") + ylab("Fraction of jobs") + theme(legend.position=c(0.9, 0.4)) + scale_color_brewer(palette = "Set2")
+ggsave("ecdf.png", width=8, height=3)
+
+ggplot(data, aes(similarity, color=alg_name, group=alg_name)) + stat_ecdf(geom = "step") + xlab("SIM") + ylab("Fraction of jobs") + theme(legend.position=c(0.9, 0.4))  + scale_color_brewer(palette = "Set2") + xlim(0.5, 1.0)
+ggsave("ecdf-0.5.png", width=8, height=3)
 
 e = data %>% filter(similarity >= 0.5)
-ggplot(e, aes(similarity, color=alg_name, group=alg_name)) + stat_ecdf(geom = "step") + xlab("SIM") + ylab("Fraction of jobs") + theme(legend.position="bottom")  + scale_color_brewer(palette = "Set2")
 print(summary(e))
-ggsave("ecdf-0.5.png")
 
 # histogram for the jobs
-ggplot(data, aes(similarity), group=alg_name) + geom_histogram(color="black", binwidth=0.025) + aes(fill = alg_name) + facet_grid(alg_name ~ ., switch = 'y') + scale_y_continuous(limits=c(0, 100), oob=squish)  +   scale_color_brewer(palette = "Set2") + ylab("Count (cropped at 100)") + theme(legend.position = "none")
+ggplot(data, aes(similarity), group=alg_name) + geom_histogram(color="black", binwidth=0.025) + aes(fill = alg_name) + facet_grid(alg_name ~ ., switch = 'y') + scale_y_continuous(limits=c(0, 100), oob=squish)  +   scale_color_brewer(palette = "Set2") + ylab("Count (cropped at 100)") + theme(legend.position = "none") + stat_bin(binwidth=0.025, geom="text", angle = 90, colour="black", size=3, aes(label=..count.., y=0*(..count..)+20))
 ggsave("hist-sim.png")
 
 # load job information, i.e., the time series per job
@@ -51,8 +52,8 @@ plotJobs = function(jobs){
 
     # print the job timelines
     r = e[ordered, ]
-    prefix = do.call("sprintf", list("%s-%.0f-", level, r$similarity))
-    system(sprintf("scripts/plot-single-job.py %s %s", paste(r$jobid, collapse=","), paste(prefix, collapse=",")))
+    #prefix = do.call("sprintf", list("%s-%.0f-", level, r$similarity))
+    #system(sprintf("scripts/plot-single-job.py %s %s", paste(r$jobid, collapse=","), paste(prefix, collapse=",")))
   }
 
 # Store the job ids in a table, each column is one algorithm
@@ -94,7 +95,7 @@ print(res.intersect)
 
 # Plot heatmap about intersection
 ggplot(tbl.intersect, aes(first, second, fill=intersect)) + geom_tile() + geom_text(aes(label = round(intersect, 1))) + scale_fill_gradientn(colours = rev(plotcolors))
-ggsave("intersection-heatmap.png")
+ggsave("intersection-heatmap.png", width=6, height=5)
 
 # Collect the metadata of all jobs in a new table
 res.jobs = tibble()