diff --git a/tools/kstest/run.sh b/tools/kstest/run.sh index 0073b31..4adb3af 100755 --- a/tools/kstest/run.sh +++ b/tools/kstest/run.sh @@ -15,8 +15,8 @@ jobids=( ${jobids[@]} 5024292 ) set -x for jobid in ${jobids[@]}; do - sim_fn="$output_dir/ks_similarities_$jobid.csv" - progress_fn="$output_dir/ks_progress_$jobid.csv" - log_fn="$output_dir/ks_fail_$jobid.log" + sim_fn="$output_dir/ks2_similarities_$jobid.csv" + progress_fn="$output_dir/ks2_progress_$jobid.csv" + log_fn="$output_dir/ks2_fail_$jobid.log" time cargo run --release -- $dataset_fn $jobid $sim_fn $progress_fn $log_fn done diff --git a/tools/kstest/src/main.rs b/tools/kstest/src/main.rs index 9eef32b..acfc7f7 100644 --- a/tools/kstest/src/main.rs +++ b/tools/kstest/src/main.rs @@ -24,15 +24,15 @@ pub type QCodings = HashMap; #[derive(Debug, Deserialize)] pub struct Record { jobid: u32, - ks_md_file_create: String, - ks_md_file_delete: String, - ks_md_mod: String, - ks_md_other: String, - ks_md_read: String, - ks_read_bytes: String, - ks_read_calls: String, - ks_write_bytes: String, - ks_write_calls: String, + md_file_create: String, + md_file_delete: String, + md_mod: String, + md_other: String, + md_read: String, + read_bytes: String, + read_calls: String, + write_bytes: String, + write_calls: String, } #[derive(Debug, Serialize)] @@ -76,15 +76,15 @@ fn run(dataset_fn: String, jobid: Jobid, similarities_fn: String, progress_fn: S for result in rdr.deserialize() { let record: Record = result.expect("bla bla"); let q_coding = vec![ - convert_to_coding(record.ks_md_file_create), - convert_to_coding(record.ks_md_file_delete), - convert_to_coding(record.ks_md_mod), - convert_to_coding(record.ks_md_other), - convert_to_coding(record.ks_md_read), - convert_to_coding(record.ks_read_bytes), - convert_to_coding(record.ks_read_calls), - convert_to_coding(record.ks_write_bytes), - convert_to_coding(record.ks_write_calls), + convert_to_coding(record.md_file_create), + convert_to_coding(record.md_file_delete), + convert_to_coding(record.md_mod), + convert_to_coding(record.md_other), + convert_to_coding(record.md_read), + convert_to_coding(record.read_bytes), + convert_to_coding(record.read_calls), + convert_to_coding(record.write_bytes), + convert_to_coding(record.write_calls), ]; // Filter Zero-Jobs @@ -102,54 +102,57 @@ fn run(dataset_fn: String, jobid: Jobid, similarities_fn: String, progress_fn: S let mut wtr_progress = csv::Writer::from_writer(&progress_file); let mut counter = 1; - let mut avail_codings: Vec<(u32, &JobCoding)>; - avail_codings = q_codings.iter().map(|(k, v)| (*k, v)).collect(); let mut similarities: Vec<(Jobid, Similarity, bool)> = Vec::new(); let log_file = File::create(&log_fn).expect("Unable to open"); let mut log_file = LineWriter::new(log_file); - let probe = q_codings[&jobid].clone(); - let mut start_chunk = chrono::Utc::now(); - let start = start_chunk; - while let Some((jobid, q_coding)) = avail_codings.pop() { - if (counter % 10_000) == 0 { - let stop_chunk = chrono::Utc::now(); - let progress_row = ProgressRow { - iteration: 0, - alg_id: alg_id, - alg_name: String::from(alg_name), - jobs_done: counter, - jobs_total: q_codings.len(), - elapsed: (((stop_chunk - start).num_milliseconds() as f64) / 1000.0), - delta: (((stop_chunk - start_chunk).num_milliseconds() as f64) / 1000.0), - }; - wtr_progress.serialize(progress_row).unwrap(); - start_chunk = stop_chunk; + for i in 0..10 { + let mut avail_codings: Vec<(u32, &JobCoding)>; + avail_codings = q_codings.iter().map(|(k, v)| (*k, v)).collect(); + + let probe = q_codings[&jobid].clone(); + let mut start_chunk = chrono::Utc::now(); + let start = start_chunk; + while let Some((jobid, q_coding)) = avail_codings.pop() { + if (counter % 10_000) == 0 { + let stop_chunk = chrono::Utc::now(); + let progress_row = ProgressRow { + iteration: i, + alg_id: alg_id, + alg_name: String::from(alg_name), + jobs_done: counter, + jobs_total: q_codings.len(), + elapsed: (((stop_chunk - start).num_milliseconds() as f64) / 1000.0), + delta: (((stop_chunk - start_chunk).num_milliseconds() as f64) / 1000.0), + }; + wtr_progress.serialize(progress_row).unwrap(); + start_chunk = stop_chunk; + } + + let mut metric_similarities = vec![]; + let mut err = false; + + let confidence = 0.95; + for metric_codings in q_coding.iter().zip(&probe) { + let metric_similarity = match ks::test(metric_codings.0, metric_codings.1, confidence) { + Ok(sim) => { + (1.0 - sim.reject_probability) as Similarity + } + Err(e) => { + err = true; + let message = format!("jobid failed {:?}, because {:?}\n", jobid, e); + log_file.write_all(message.as_bytes()).unwrap(); + 0.0 + } + }; + metric_similarities.push(metric_similarity); + } + let similarity = metric_similarities.iter().sum::() / (metric_similarities.len() as f32); + + similarities.push((jobid, similarity, err)); + counter += 1; } - - let mut metric_similarities = vec![]; - let mut err = false; - - let confidence = 0.95; - for metric_codings in q_coding.iter().zip(&probe) { - let metric_similarity = match ks::test(metric_codings.0, metric_codings.1, confidence) { - Ok(sim) => { - (1.0 - sim.reject_probability) as Similarity - } - Err(e) => { - err = true; - let message = format!("jobid failed {:?}, because {:?}\n", jobid, e); - log_file.write_all(message.as_bytes()).unwrap(); - 0.0 - } - }; - metric_similarities.push(metric_similarity); - } - let similarity = metric_similarities.iter().sum::() / (metric_similarities.len() as f32); - - similarities.push((jobid, similarity, err)); - counter += 1; }