extern crate kolmogorov_smirnov as ks; extern crate csv; extern crate serde; extern crate chrono; use std::fs::File; use std::io::prelude::*; //use std::fs::File; use serde::Deserialize; use serde::Serialize; use std::collections::HashMap; use std::env; use std::io::LineWriter; pub type Score = u32; pub type JobCoding = Vec; pub type Similarity = f32; pub type Jobid = u32; pub type QCodings = HashMap; #[derive(Debug, Deserialize)] pub struct Record { jobid: u32, //q16_coding: String, ks_coding: String, } #[derive(Debug, Serialize)] pub struct SimilarityRow { pub jobid: u32, pub alg_id: u32, pub alg_name: String, pub similarity: f32 } #[derive(Debug, Serialize)] pub struct ProgressRow { jobid: u32, alg_id: u32, alg_name: String, delta: i64, } pub fn convert_to_coding(coding: String) -> Vec { let split = coding.split(":"); let vec: Vec = split .filter(|s| !s.is_empty()) //.map(|s| s.parse::().unwrap()) .map(|s| s.parse().unwrap()) .collect(); vec } //fn ks_similarity(xs: &Vec, ys: &Vec) -> Result { // let confidence = 0.95; // ks::test(xs, ys, confidence)? // let reject_probability = match result { // Ok(v) => v.reject_probability, // Err(_) => 1.0, // }; // //println!("is_rejected: {:?}\nstatistic: {:?}\nreject_probability: {:?}\ncritical_value: {:?}\nconfidence: {:?}", result.is_rejected, result.statistic, result.reject_probability, result.critical_value, result.confidence); // (1.0 - reject_probability) as Similarity //} fn run(dataset_fn: String, jobid: Jobid, similarities_fn: String, progress_fn: String, log_fn: String) { let mut q_codings: QCodings = HashMap::new(); let file = File::open(&dataset_fn).expect("Unable to open dataset."); let mut rdr = csv::Reader::from_reader(file); //for result in rdr.deserialize().take(10000) { for result in rdr.deserialize() { let record: Record = result.expect("bla bla"); //let q_coding = convert_to_coding(record.q16_coding); let q_coding = convert_to_coding(record.ks_coding); // Insert Non-Zero jobs only if q_coding.iter().sum::() > (0 as Score) { q_codings.insert(record.jobid, q_coding); } } let probe = q_codings[&jobid].clone(); let similarities_file = File::create(&similarities_fn).expect("Unable to open"); let mut wtr_similarities = csv::Writer::from_writer(&similarities_file); let alg_name = "ks"; let alg_id = 6; let progress_file = File::create(&progress_fn).expect("Unable to open"); let mut wtr_progress = csv::Writer::from_writer(&progress_file); let mut start = chrono::Utc::now(); let mut counter = 1; let mut avail_codings: Vec<(u32, &JobCoding)>; avail_codings = q_codings.iter().map(|(k, v)| (*k, v)).collect(); let mut similarities: Vec<(Jobid, Similarity)> = Vec::new(); let log_file = File::create(&log_fn).expect("Unable to open"); let mut log_file = LineWriter::new(log_file); while let Some((jobid, q_coding)) = avail_codings.pop() { if (counter % 10_000) == 0 { let stop = chrono::Utc::now(); let progress_row = ProgressRow { jobid: jobid, alg_id: alg_id, alg_name: String::from(alg_name), delta: ((stop - start).num_nanoseconds().unwrap()) }; wtr_progress.serialize(progress_row).unwrap(); start = stop; } //println!("Processing {:?}", jobid); //let similarity = ks_similarity(q_coding, &probe); let confidence = 0.95; let similarity = match ks::test(q_coding, &probe, confidence) { Ok(sim) => (1.0 - sim.reject_probability) as Similarity, Err(e) => { let message = format!("jobid failed {:?}, because \" {:?}\"\n", jobid, e); log_file.write_all(message.as_bytes()).unwrap(); 1.0 } }; similarities.push((jobid, similarity)); counter += 1; } for (jobid, similarity) in similarities.iter() { let similarity_row = SimilarityRow { jobid: *jobid, alg_id: alg_id, alg_name: String::from(alg_name), similarity: *similarity, }; wtr_similarities.serialize(similarity_row).unwrap(); } log_file.flush().unwrap(); } fn main() { let args: Vec = env::args().collect(); let dataset_fn = args[1].clone(); let jobid = args[2].parse::().unwrap(); let sim_fn = args[3].clone(); let progress_fn = args[4].clone(); let log_fn = args[5].clone(); println!("{:?}", args); run(dataset_fn, jobid, sim_fn, progress_fn, log_fn); } mod tests { //use super::*; #[test] fn test_ks_test() { let xs = vec!(0.0 , 1.0 , 2.0 , 3.0 , 4.0 , 5.0 , 6.0 , 7.0 , 8.0 , 9.0 , 10.0 , 11.0 , 12.0); let ys = vec!(12.0 , 11.0 , 10.0 , 9.0 , 8.0 , 7.0 , 6.0 , 5.0 , 4.0 , 3.0 , 2.0 , 1.0 , 0.0); ks_test(xs, ys); let c1 = vec![141.0,143.0,142.0,238.0,132.0,486.0,38.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,128.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]; let c2 = vec![239.0,239.0,255.0,255.0,239.0,239.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,502.0,511.0,503.0]; ks_test(c1, c2); let c1 = vec![2.0,2.0,2.0,9.0,3.0,0.0,0.0,0.0]; let c2 = vec![2.0,2.0,2.0,2.0,8.0,3.0,0.0,10.0]; ks_test(c1, c2); } }