mistral-io-datasets/scripts/plot-performance-clustering.R

22 lines
1.1 KiB
R
Executable File

#!/usr/bin/env Rscript
library(ggplot2)
library(dplyr)
require(scales)
# Plot the performance numbers of the clustering
data = read.csv("datasets/clustering_progress.csv")
e = data %>% filter(min_sim %in% c(0.1, 0.5, 0.99))
e$percent = paste("SIM =", as.factor(round(e$min_sim*100,0)), " %")
# Development when adding more jobs
ggplot(e, aes(x=jobs_done, y=elapsed, color=alg_name)) + geom_point() + facet_grid(percent ~ .) + ylab("Cummulative runtime in s") + xlab("Jobs processed") + scale_y_log10() + theme(legend.position = "bottom")
ggsave("fig/runtime-cummulative.png", width=6, height=4.5)
# Bar chart for the maximum
e = data %>% filter(jobs_done >= (jobs_total - 9998))
e$percent = as.factor(round(e$min_sim*100,0))
ggplot(e, aes(y=elapsed, x=percent, fill=alg_name)) + geom_bar(stat="identity") + facet_grid(. ~ alg_name, switch = 'y') + scale_y_log10() + theme(legend.position = "none") + ylab("Runtime in s") + xlab("Minimum similarity in %") + geom_text(aes(label = round(elapsed,0), angle = 90, y=0*(elapsed)+20))
ggsave("fig/runtime-overview.png", width=7, height=2)