diff --git a/scripts/analyse-all.sh b/scripts/analyse-all.sh index 7ee3a58..676d02f 100755 --- a/scripts/analyse-all.sh +++ b/scripts/analyse-all.sh @@ -1,10 +1,10 @@ #!/bin/bash -# call me from parent directory - +# call me from the parent directory for I in job_similarities_*.csv ; do ./scripts/plot.R $I > description.txt - mkdir $I.out - rm $I.out/* - mv *.png *.pdf description.txt $I.out + OUT=${I%%.csv}-out + mkdir $OUT + rm $OUT/* + mv *.png *.pdf description.txt $OUT done diff --git a/scripts/plot-single-job.py b/scripts/plot-single-job.py index 60ce6c0..552400a 100755 --- a/scripts/plot-single-job.py +++ b/scripts/plot-single-job.py @@ -10,6 +10,8 @@ import matplotlib.cm as cm jobs = sys.argv[1].split(",") prefix = sys.argv[2].split(",") +fileformat = ".png" + print("Plotting the job: " + str(jobs)) # Color map @@ -81,7 +83,7 @@ def plot(prefix, header, row): fsize = (8, 1 + 1.5 * len(labels)) fsizeFixed = (8, 2) - + pyplot.close('all') if len(labels) < 4 : @@ -93,7 +95,7 @@ def plot(prefix, header, row): ax[i].set_ylabel(l) pyplot.xlabel("Segment number") - pyplot.savefig(prefix + "timeseries" + jobid + ".pdf", bbox_inches='tight') + pyplot.savefig(prefix + "timeseries" + jobid + fileformat, bbox_inches='tight') # Plot first 30 segments if len(timeseries) <= 50: @@ -109,7 +111,7 @@ def plot(prefix, header, row): ax[i].set_ylabel(l) pyplot.xlabel("Segment number") - pyplot.savefig(prefix + "timeseries" + jobid + "-30.pdf", bbox_inches='tight') + pyplot.savefig(prefix + "timeseries" + jobid + "-30" + fileformat, bbox_inches='tight') ### end plotting function diff --git a/scripts/plot.R b/scripts/plot.R index 086d029..6f59252 100755 --- a/scripts/plot.R +++ b/scripts/plot.R @@ -23,16 +23,17 @@ cat("Job count:") cat(nrow(data)) # empirical cummulative density function (ECDF) -ggplot(data, aes(similarity, color=alg_name, group=alg_name)) + stat_ecdf(geom = "step") + xlab("SIM") + ylab("Fraction of jobs") + theme(legend.position="bottom") + scale_color_brewer(palette = "Set2") -ggsave("ecdf.png") +ggplot(data, aes(similarity, color=alg_name, group=alg_name)) + stat_ecdf(geom = "step") + xlab("SIM") + ylab("Fraction of jobs") + theme(legend.position=c(0.9, 0.4)) + scale_color_brewer(palette = "Set2") +ggsave("ecdf.png", width=8, height=3) + +ggplot(data, aes(similarity, color=alg_name, group=alg_name)) + stat_ecdf(geom = "step") + xlab("SIM") + ylab("Fraction of jobs") + theme(legend.position=c(0.9, 0.4)) + scale_color_brewer(palette = "Set2") + xlim(0.5, 1.0) +ggsave("ecdf-0.5.png", width=8, height=3) e = data %>% filter(similarity >= 0.5) -ggplot(e, aes(similarity, color=alg_name, group=alg_name)) + stat_ecdf(geom = "step") + xlab("SIM") + ylab("Fraction of jobs") + theme(legend.position="bottom") + scale_color_brewer(palette = "Set2") print(summary(e)) -ggsave("ecdf-0.5.png") # histogram for the jobs -ggplot(data, aes(similarity), group=alg_name) + geom_histogram(color="black", binwidth=0.025) + aes(fill = alg_name) + facet_grid(alg_name ~ ., switch = 'y') + scale_y_continuous(limits=c(0, 100), oob=squish) + scale_color_brewer(palette = "Set2") + ylab("Count (cropped at 100)") + theme(legend.position = "none") +ggplot(data, aes(similarity), group=alg_name) + geom_histogram(color="black", binwidth=0.025) + aes(fill = alg_name) + facet_grid(alg_name ~ ., switch = 'y') + scale_y_continuous(limits=c(0, 100), oob=squish) + scale_color_brewer(palette = "Set2") + ylab("Count (cropped at 100)") + theme(legend.position = "none") + stat_bin(binwidth=0.025, geom="text", angle = 90, colour="black", size=3, aes(label=..count.., y=0*(..count..)+20)) ggsave("hist-sim.png") # load job information, i.e., the time series per job @@ -51,8 +52,8 @@ plotJobs = function(jobs){ # print the job timelines r = e[ordered, ] - prefix = do.call("sprintf", list("%s-%.0f-", level, r$similarity)) - system(sprintf("scripts/plot-single-job.py %s %s", paste(r$jobid, collapse=","), paste(prefix, collapse=","))) + #prefix = do.call("sprintf", list("%s-%.0f-", level, r$similarity)) + #system(sprintf("scripts/plot-single-job.py %s %s", paste(r$jobid, collapse=","), paste(prefix, collapse=","))) } # Store the job ids in a table, each column is one algorithm @@ -94,7 +95,7 @@ print(res.intersect) # Plot heatmap about intersection ggplot(tbl.intersect, aes(first, second, fill=intersect)) + geom_tile() + geom_text(aes(label = round(intersect, 1))) + scale_fill_gradientn(colours = rev(plotcolors)) -ggsave("intersection-heatmap.png") +ggsave("intersection-heatmap.png", width=6, height=5) # Collect the metadata of all jobs in a new table res.jobs = tibble()