|
|
@ -14,7 +14,8 @@ use std::env; |
|
|
|
use std::io::LineWriter; |
|
|
|
|
|
|
|
pub type Score = u32; |
|
|
|
pub type JobCoding = Vec<Score>; |
|
|
|
pub type MetricCoding = Vec<Score>; |
|
|
|
pub type JobCoding = Vec<MetricCoding>; |
|
|
|
pub type Similarity = f32; |
|
|
|
|
|
|
|
pub type Jobid = u32; |
|
|
@ -23,8 +24,15 @@ pub type QCodings = HashMap<Jobid, JobCoding>; |
|
|
|
#[derive(Debug, Deserialize)] |
|
|
|
pub struct Record { |
|
|
|
jobid: u32, |
|
|
|
//q16_coding: String,
|
|
|
|
ks_coding: String, |
|
|
|
ks_md_file_create: String, |
|
|
|
ks_md_file_delete: String, |
|
|
|
ks_md_mod: String, |
|
|
|
ks_md_other: String, |
|
|
|
ks_md_read: String, |
|
|
|
ks_read_bytes: String, |
|
|
|
ks_read_calls: String, |
|
|
|
ks_write_bytes: String, |
|
|
|
ks_write_calls: String, |
|
|
|
} |
|
|
|
|
|
|
|
#[derive(Debug, Serialize)] |
|
|
@ -75,15 +83,24 @@ fn run(dataset_fn: String, jobid: Jobid, similarities_fn: String, progress_fn: S |
|
|
|
//for result in rdr.deserialize().take(10000) {
|
|
|
|
for result in rdr.deserialize() { |
|
|
|
let record: Record = result.expect("bla bla"); |
|
|
|
//let q_coding = convert_to_coding(record.q16_coding);
|
|
|
|
let q_coding = convert_to_coding(record.ks_coding); |
|
|
|
// Insert Non-Zero jobs only
|
|
|
|
if q_coding.iter().sum::<Score>() > (0 as Score) { |
|
|
|
let q_coding = vec![ |
|
|
|
convert_to_coding(record.ks_md_file_create), |
|
|
|
convert_to_coding(record.ks_md_file_delete), |
|
|
|
convert_to_coding(record.ks_md_mod), |
|
|
|
convert_to_coding(record.ks_md_other), |
|
|
|
convert_to_coding(record.ks_md_read), |
|
|
|
convert_to_coding(record.ks_read_bytes), |
|
|
|
convert_to_coding(record.ks_read_calls), |
|
|
|
convert_to_coding(record.ks_write_bytes), |
|
|
|
convert_to_coding(record.ks_write_calls), |
|
|
|
]; |
|
|
|
|
|
|
|
// Filter Zero-Jobs
|
|
|
|
if q_coding.iter().map(|x| x.iter().sum::<Score>()).sum::<Score>() > (0 as Score) { |
|
|
|
q_codings.insert(record.jobid, q_coding); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
let probe = q_codings[&jobid].clone(); |
|
|
|
let similarities_file = File::create(&similarities_fn).expect("Unable to open"); |
|
|
|
let mut wtr_similarities = csv::Writer::from_writer(&similarities_file); |
|
|
|
let alg_name = "ks"; |
|
|
@ -91,11 +108,9 @@ fn run(dataset_fn: String, jobid: Jobid, similarities_fn: String, progress_fn: S |
|
|
|
|
|
|
|
let progress_file = File::create(&progress_fn).expect("Unable to open"); |
|
|
|
let mut wtr_progress = csv::Writer::from_writer(&progress_file); |
|
|
|
let mut start = chrono::Utc::now(); |
|
|
|
let mut counter = 1; |
|
|
|
|
|
|
|
let mut avail_codings: Vec<(u32, &JobCoding)>; |
|
|
|
|
|
|
|
avail_codings = q_codings.iter().map(|(k, v)| (*k, v)).collect(); |
|
|
|
let mut similarities: Vec<(Jobid, Similarity)> = Vec::new(); |
|
|
|
|
|
|
@ -103,6 +118,8 @@ fn run(dataset_fn: String, jobid: Jobid, similarities_fn: String, progress_fn: S |
|
|
|
let mut log_file = LineWriter::new(log_file); |
|
|
|
|
|
|
|
|
|
|
|
let probe = q_codings[&jobid].clone(); |
|
|
|
let mut start = chrono::Utc::now(); |
|
|
|
while let Some((jobid, q_coding)) = avail_codings.pop() { |
|
|
|
if (counter % 10_000) == 0 { |
|
|
|
let stop = chrono::Utc::now(); |
|
|
@ -118,16 +135,35 @@ fn run(dataset_fn: String, jobid: Jobid, similarities_fn: String, progress_fn: S |
|
|
|
|
|
|
|
//println!("Processing {:?}", jobid);
|
|
|
|
//let similarity = ks_similarity(q_coding, &probe);
|
|
|
|
|
|
|
|
|
|
|
|
let mut metric_similarities = vec![]; |
|
|
|
|
|
|
|
let confidence = 0.95; |
|
|
|
let similarity = match ks::test(q_coding, &probe, confidence) { |
|
|
|
Ok(sim) => (1.0 - sim.reject_probability) as Similarity, |
|
|
|
Err(e) => { |
|
|
|
let message = format!("jobid failed {:?}, because \" {:?}\"\n", jobid, e); |
|
|
|
log_file.write_all(message.as_bytes()).unwrap(); |
|
|
|
1.0 |
|
|
|
} |
|
|
|
}; |
|
|
|
for metric_codings in q_coding.iter().zip(&probe) { |
|
|
|
let metric_similarity = match ks::test(metric_codings.0, metric_codings.1, confidence) { |
|
|
|
Ok(sim) => { |
|
|
|
(1.0 - sim.reject_probability) as Similarity |
|
|
|
} |
|
|
|
Err(e) => { |
|
|
|
let message = format!("jobid failed {:?}, because \" {:?}\"\n", jobid, e); |
|
|
|
log_file.write_all(message.as_bytes()).unwrap(); |
|
|
|
1.0 |
|
|
|
} |
|
|
|
}; |
|
|
|
metric_similarities.push(metric_similarity); |
|
|
|
} |
|
|
|
let similarity = metric_similarities.iter().sum::<f32>() / (metric_similarities.len() as f32); |
|
|
|
|
|
|
|
//let similarity = match ks::test(q_coding, &probe, confidence) {
|
|
|
|
// Ok(sim) => {
|
|
|
|
// (1.0 - sim.reject_probability) as Similarity,
|
|
|
|
// }
|
|
|
|
// Err(e) => {
|
|
|
|
// let message = format!("jobid failed {:?}, because \" {:?}\"\n", jobid, e);
|
|
|
|
// log_file.write_all(message.as_bytes()).unwrap();
|
|
|
|
// 1.0
|
|
|
|
// }
|
|
|
|
//};
|
|
|
|
|
|
|
|
similarities.push((jobid, similarity)); |
|
|
|
counter += 1; |
|
|
|