file="job_similarities_5024292.csv"# for manual execution
file=args[1]
if (!exists("file")){
file="job_similarities_5024292.csv"# for manual execution
}
print(file)
jobID=str_extract(file,regex("[0-9]+"))
data=read.csv(file)
@ -28,12 +32,12 @@ cat(nrow(data))
# empirical cumulative density function (ECDF)
data$sim=data$similarity*100
ggplot(data,aes(sim,color=alg_name,group=alg_name))+stat_ecdf(geom="step")+xlab("Similarity in %")+ylab("Fraction of jobs")+theme(legend.position=c(0.05,0.5),legend.title=element_blank())+scale_color_brewer(palette="Set2")+scale_x_log10()
ggplot(data,aes(sim,color=alg_name,group=alg_name))+stat_ecdf(geom="step")+xlab("Similarity in %")+ylab("Fraction of jobs")+theme(legend.position=c(0.9,0.5),legend.title=element_blank())+scale_color_brewer(palette="Set2")# + scale_x_log10() +
ggsave("ecdf.png",width=8,height=2.5)
# histogram for the jobs
ggplot(data,aes(sim),group=alg_name)+geom_histogram(color="black",binwidth=2.5)+aes(fill=alg_name)+facet_grid(alg_name~.,switch='y')+xlab("Similarity in %")+scale_y_continuous(limits=c(0,100),oob=squish)+scale_color_brewer(palette="Set2")+ylab("Count (cropped at 100)")+theme(legend.position="none")+stat_bin(binwidth=2.5,geom="text",adj=1.0,angle=90,colour="black",size=3,aes(label=..count..,y=0*(..count..)+95))