Kolmogorov-Smirnov changed to metric-wise computation
This commit is contained in:
		
							parent
							
								
									d9fbbdb87f
								
							
						
					
					
						commit
						cd307c98da
					
				| @ -5,6 +5,7 @@ filenames=( ${filenames[@]} "clustering_progress.csv" ) | ||||
| filenames=( ${filenames[@]} $(ls job_metadata*.csv) ) | ||||
| filenames=( ${filenames[@]} $( ls job_similarities_*.csv ) ) | ||||
| filenames=( ${filenames[@]} $( ls sim_computation_times_*.csv ) ) | ||||
| filenames=( ${filenames[@]} $( ls ks_*.csv ) ) | ||||
| 
 | ||||
| echo "${filenames[*]}" | ||||
| 
 | ||||
|  | ||||
| @ -1,10 +1,5 @@ | ||||
| #!/bin/bash | ||||
| 
 | ||||
| 
 | ||||
| #7488914 19865984 | ||||
| #4296426 18672376 | ||||
| #5024292 17944118 | ||||
| 
 | ||||
| #dataset_fn="../../datasets/job_codings_v4_confidential.csv" | ||||
| #jobids=( ) | ||||
| #jobids=( ${jobids[@]} 19865984 ) | ||||
| @ -20,8 +15,8 @@ jobids=( ${jobids[@]} 5024292 ) | ||||
| 
 | ||||
| set -x | ||||
| for jobid in ${jobids[@]}; do | ||||
|     sim_fn="./ks_similarities_$jobid.csv" | ||||
|     progress_fn="./ks_progress_$jobid.csv" | ||||
|     log_fn="./ks_fail_$jobid.log" | ||||
|     cargo run --release -- $dataset_fn $jobid $sim_fn $progress_fn $log_fn  & | ||||
|     sim_fn="$output_dir/ks_similarities_$jobid.csv" | ||||
|     progress_fn="$output_dir/ks_progress_$jobid.csv" | ||||
|     log_fn="$output_dir/ks_fail_$jobid.log" | ||||
|     time cargo run --release -- $dataset_fn $jobid $sim_fn $progress_fn $log_fn | ||||
| done | ||||
|  | ||||
| @ -14,7 +14,8 @@ use std::env; | ||||
| use std::io::LineWriter; | ||||
| 
 | ||||
| pub type Score = u32; | ||||
| pub type JobCoding = Vec<Score>; | ||||
| pub type MetricCoding = Vec<Score>; | ||||
| pub type JobCoding = Vec<MetricCoding>; | ||||
| pub type Similarity = f32; | ||||
| 
 | ||||
| pub type Jobid = u32; | ||||
| @ -23,8 +24,15 @@ pub type QCodings = HashMap<Jobid, JobCoding>; | ||||
| #[derive(Debug, Deserialize)] | ||||
| pub struct Record { | ||||
|     jobid: u32, | ||||
|     //q16_coding: String,
 | ||||
|     ks_coding: String, | ||||
|     ks_md_file_create: String, | ||||
|     ks_md_file_delete: String, | ||||
|     ks_md_mod: String, | ||||
|     ks_md_other: String, | ||||
|     ks_md_read: String, | ||||
|     ks_read_bytes: String, | ||||
|     ks_read_calls: String, | ||||
|     ks_write_bytes: String, | ||||
|     ks_write_calls: String, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Serialize)] | ||||
| @ -75,15 +83,24 @@ fn run(dataset_fn: String, jobid: Jobid, similarities_fn: String, progress_fn: S | ||||
|     //for result in rdr.deserialize().take(10000) {
 | ||||
|     for result in rdr.deserialize() { | ||||
|         let record: Record = result.expect("bla bla"); | ||||
|         //let q_coding = convert_to_coding(record.q16_coding);
 | ||||
|         let q_coding = convert_to_coding(record.ks_coding); | ||||
|         // Insert Non-Zero jobs only
 | ||||
|         if q_coding.iter().sum::<Score>() > (0 as Score) { | ||||
|         let q_coding = vec![ | ||||
| 			convert_to_coding(record.ks_md_file_create), | ||||
| 			convert_to_coding(record.ks_md_file_delete), | ||||
| 			convert_to_coding(record.ks_md_mod), | ||||
| 			convert_to_coding(record.ks_md_other), | ||||
| 			convert_to_coding(record.ks_md_read), | ||||
| 			convert_to_coding(record.ks_read_bytes), | ||||
| 			convert_to_coding(record.ks_read_calls), | ||||
| 			convert_to_coding(record.ks_write_bytes), | ||||
| 			convert_to_coding(record.ks_write_calls), | ||||
|         ]; | ||||
| 
 | ||||
|         // Filter Zero-Jobs
 | ||||
|         if q_coding.iter().map(|x| x.iter().sum::<Score>()).sum::<Score>() > (0 as Score) { | ||||
|            q_codings.insert(record.jobid, q_coding); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     let probe = q_codings[&jobid].clone(); | ||||
|     let similarities_file = File::create(&similarities_fn).expect("Unable to open"); | ||||
|     let mut wtr_similarities = csv::Writer::from_writer(&similarities_file); | ||||
|     let alg_name = "ks"; | ||||
| @ -91,11 +108,9 @@ fn run(dataset_fn: String, jobid: Jobid, similarities_fn: String, progress_fn: S | ||||
| 
 | ||||
|     let progress_file = File::create(&progress_fn).expect("Unable to open"); | ||||
|     let mut wtr_progress = csv::Writer::from_writer(&progress_file); | ||||
|     let mut start = chrono::Utc::now(); | ||||
|     let mut counter = 1; | ||||
| 
 | ||||
|     let mut avail_codings: Vec<(u32, &JobCoding)>; | ||||
| 
 | ||||
|     avail_codings = q_codings.iter().map(|(k, v)| (*k, v)).collect(); | ||||
|     let mut similarities: Vec<(Jobid, Similarity)> = Vec::new(); | ||||
| 
 | ||||
| @ -103,6 +118,8 @@ fn run(dataset_fn: String, jobid: Jobid, similarities_fn: String, progress_fn: S | ||||
|     let mut log_file = LineWriter::new(log_file); | ||||
| 
 | ||||
| 
 | ||||
|     let probe = q_codings[&jobid].clone(); | ||||
|     let mut start = chrono::Utc::now(); | ||||
|     while let Some((jobid, q_coding)) = avail_codings.pop() { | ||||
|         if (counter % 10_000) == 0 { | ||||
|             let stop = chrono::Utc::now(); | ||||
| @ -118,16 +135,35 @@ fn run(dataset_fn: String, jobid: Jobid, similarities_fn: String, progress_fn: S | ||||
| 
 | ||||
|         //println!("Processing {:?}", jobid);
 | ||||
|         //let similarity = ks_similarity(q_coding, &probe);
 | ||||
|         
 | ||||
|        
 | ||||
|         let mut metric_similarities = vec![]; | ||||
| 
 | ||||
|         let confidence = 0.95; | ||||
|         let similarity = match ks::test(q_coding, &probe, confidence) { | ||||
|             Ok(sim) => (1.0 - sim.reject_probability) as Similarity, | ||||
|             Err(e) => { | ||||
|                 let message = format!("jobid failed {:?}, because \" {:?}\"\n", jobid, e); | ||||
|                 log_file.write_all(message.as_bytes()).unwrap(); | ||||
|                 1.0 | ||||
|             } | ||||
|         }; | ||||
|         for metric_codings in q_coding.iter().zip(&probe) { | ||||
|             let metric_similarity = match ks::test(metric_codings.0, metric_codings.1, confidence) { | ||||
|                 Ok(sim) => { | ||||
|                     (1.0 - sim.reject_probability) as Similarity | ||||
|                 } | ||||
|                 Err(e) => { | ||||
|                     let message = format!("jobid failed {:?}, because \" {:?}\"\n", jobid, e); | ||||
|                     log_file.write_all(message.as_bytes()).unwrap(); | ||||
|                     1.0 | ||||
|                 } | ||||
|             }; | ||||
|             metric_similarities.push(metric_similarity); | ||||
|         } | ||||
|         let similarity = metric_similarities.iter().sum::<f32>() / (metric_similarities.len() as f32); | ||||
| 
 | ||||
|         //let similarity = match ks::test(q_coding, &probe, confidence) {
 | ||||
|         //    Ok(sim) => {
 | ||||
|         //        (1.0 - sim.reject_probability) as Similarity,
 | ||||
|         //    }
 | ||||
|         //    Err(e) => {
 | ||||
|         //        let message = format!("jobid failed {:?}, because \" {:?}\"\n", jobid, e);
 | ||||
|         //        log_file.write_all(message.as_bytes()).unwrap();
 | ||||
|         //        1.0
 | ||||
|         //    }
 | ||||
|         //};
 | ||||
| 
 | ||||
|         similarities.push((jobid, similarity)); | ||||
|         counter += 1; | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user