ggplot(data,aes(similarity,color=alg_name,group=alg_name))+stat_ecdf(geom="step")+xlab("SIM")+ylab("Fraction of jobs")+theme(legend.position="bottom")+scale_color_brewer(palette="Set2")
ggsave("ecdf.png")
ggplot(data,aes(similarity,color=alg_name,group=alg_name))+stat_ecdf(geom="step")+xlab("SIM")+ylab("Fraction of jobs")+theme(legend.position=c(0.9,0.4))+scale_color_brewer(palette="Set2")
ggsave("ecdf.png",width=8,height=3)
ggplot(data,aes(similarity,color=alg_name,group=alg_name))+stat_ecdf(geom="step")+xlab("SIM")+ylab("Fraction of jobs")+theme(legend.position=c(0.9,0.4))+scale_color_brewer(palette="Set2")+xlim(0.5,1.0)
ggsave("ecdf-0.5.png",width=8,height=3)
e=data%>%filter(similarity>=0.5)
ggplot(e,aes(similarity,color=alg_name,group=alg_name))+stat_ecdf(geom="step")+xlab("SIM")+ylab("Fraction of jobs")+theme(legend.position="bottom")+scale_color_brewer(palette="Set2")
print(summary(e))
ggsave("ecdf-0.5.png")
# histogram for the jobs
ggplot(data,aes(similarity),group=alg_name)+geom_histogram(color="black",binwidth=0.025)+aes(fill=alg_name)+facet_grid(alg_name~.,switch='y')+scale_y_continuous(limits=c(0,100),oob=squish)+scale_color_brewer(palette="Set2")+ylab("Count (cropped at 100)")+theme(legend.position="none")
ggplot(data,aes(similarity),group=alg_name)+geom_histogram(color="black",binwidth=0.025)+aes(fill=alg_name)+facet_grid(alg_name~.,switch='y')+scale_y_continuous(limits=c(0,100),oob=squish)+scale_color_brewer(palette="Set2")+ylab("Count (cropped at 100)")+theme(legend.position="none")+stat_bin(binwidth=0.025,geom="text",angle=90,colour="black",size=3,aes(label=..count..,y=0*(..count..)+20))
ggsave("hist-sim.png")
# load job information, i.e., the time series per job