2020-09-01 10:49:54 +00:00
|
|
|
extern crate kolmogorov_smirnov as ks;
|
|
|
|
extern crate csv;
|
|
|
|
extern crate serde;
|
|
|
|
extern crate chrono;
|
|
|
|
|
|
|
|
use std::fs::File;
|
|
|
|
use std::io::prelude::*;
|
|
|
|
|
|
|
|
//use std::fs::File;
|
|
|
|
use serde::Deserialize;
|
|
|
|
use serde::Serialize;
|
|
|
|
use std::collections::HashMap;
|
|
|
|
use std::env;
|
|
|
|
use std::io::LineWriter;
|
|
|
|
|
|
|
|
pub type Score = u32;
|
2020-09-02 18:02:56 +00:00
|
|
|
pub type MetricCoding = Vec<Score>;
|
|
|
|
pub type JobCoding = Vec<MetricCoding>;
|
2020-09-01 10:49:54 +00:00
|
|
|
pub type Similarity = f32;
|
|
|
|
|
|
|
|
pub type Jobid = u32;
|
|
|
|
pub type QCodings = HashMap<Jobid, JobCoding>;
|
|
|
|
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
|
|
pub struct Record {
|
|
|
|
jobid: u32,
|
2020-09-02 18:02:56 +00:00
|
|
|
ks_md_file_create: String,
|
|
|
|
ks_md_file_delete: String,
|
|
|
|
ks_md_mod: String,
|
|
|
|
ks_md_other: String,
|
|
|
|
ks_md_read: String,
|
|
|
|
ks_read_bytes: String,
|
|
|
|
ks_read_calls: String,
|
|
|
|
ks_write_bytes: String,
|
|
|
|
ks_write_calls: String,
|
2020-09-01 10:49:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Serialize)]
|
|
|
|
pub struct SimilarityRow {
|
|
|
|
pub jobid: u32,
|
|
|
|
pub alg_id: u32,
|
|
|
|
pub alg_name: String,
|
|
|
|
pub similarity: f32
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Serialize)]
|
|
|
|
pub struct ProgressRow {
|
|
|
|
jobid: u32,
|
|
|
|
alg_id: u32,
|
|
|
|
alg_name: String,
|
|
|
|
delta: i64,
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn convert_to_coding(coding: String) -> Vec<Score> {
|
|
|
|
let split = coding.split(":");
|
|
|
|
let vec: Vec<Score> = split
|
|
|
|
.filter(|s| !s.is_empty())
|
|
|
|
//.map(|s| s.parse::<F>().unwrap())
|
|
|
|
.map(|s| s.parse().unwrap())
|
|
|
|
.collect();
|
|
|
|
vec
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//fn ks_similarity(xs: &Vec<u32>, ys: &Vec<u32>) -> Result<Similarity, String> {
|
|
|
|
// let confidence = 0.95;
|
|
|
|
// ks::test(xs, ys, confidence)?
|
|
|
|
|
|
|
|
// let reject_probability = match result {
|
|
|
|
// Ok(v) => v.reject_probability,
|
|
|
|
// Err(_) => 1.0,
|
|
|
|
// };
|
|
|
|
// //println!("is_rejected: {:?}\nstatistic: {:?}\nreject_probability: {:?}\ncritical_value: {:?}\nconfidence: {:?}", result.is_rejected, result.statistic, result.reject_probability, result.critical_value, result.confidence);
|
|
|
|
// (1.0 - reject_probability) as Similarity
|
|
|
|
//}
|
|
|
|
|
|
|
|
|
|
|
|
fn run(dataset_fn: String, jobid: Jobid, similarities_fn: String, progress_fn: String, log_fn: String) {
|
|
|
|
let mut q_codings: QCodings = HashMap::new();
|
|
|
|
let file = File::open(&dataset_fn).expect("Unable to open dataset.");
|
|
|
|
let mut rdr = csv::Reader::from_reader(file);
|
|
|
|
|
|
|
|
//for result in rdr.deserialize().take(10000) {
|
|
|
|
for result in rdr.deserialize() {
|
|
|
|
let record: Record = result.expect("bla bla");
|
2020-09-02 18:02:56 +00:00
|
|
|
let q_coding = vec![
|
|
|
|
convert_to_coding(record.ks_md_file_create),
|
|
|
|
convert_to_coding(record.ks_md_file_delete),
|
|
|
|
convert_to_coding(record.ks_md_mod),
|
|
|
|
convert_to_coding(record.ks_md_other),
|
|
|
|
convert_to_coding(record.ks_md_read),
|
|
|
|
convert_to_coding(record.ks_read_bytes),
|
|
|
|
convert_to_coding(record.ks_read_calls),
|
|
|
|
convert_to_coding(record.ks_write_bytes),
|
|
|
|
convert_to_coding(record.ks_write_calls),
|
|
|
|
];
|
|
|
|
|
|
|
|
// Filter Zero-Jobs
|
|
|
|
if q_coding.iter().map(|x| x.iter().sum::<Score>()).sum::<Score>() > (0 as Score) {
|
2020-09-01 10:49:54 +00:00
|
|
|
q_codings.insert(record.jobid, q_coding);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
let similarities_file = File::create(&similarities_fn).expect("Unable to open");
|
|
|
|
let mut wtr_similarities = csv::Writer::from_writer(&similarities_file);
|
|
|
|
let alg_name = "ks";
|
|
|
|
let alg_id = 6;
|
|
|
|
|
|
|
|
let progress_file = File::create(&progress_fn).expect("Unable to open");
|
|
|
|
let mut wtr_progress = csv::Writer::from_writer(&progress_file);
|
|
|
|
let mut counter = 1;
|
|
|
|
|
|
|
|
let mut avail_codings: Vec<(u32, &JobCoding)>;
|
|
|
|
avail_codings = q_codings.iter().map(|(k, v)| (*k, v)).collect();
|
|
|
|
let mut similarities: Vec<(Jobid, Similarity)> = Vec::new();
|
|
|
|
|
|
|
|
let log_file = File::create(&log_fn).expect("Unable to open");
|
|
|
|
let mut log_file = LineWriter::new(log_file);
|
|
|
|
|
|
|
|
|
2020-09-02 18:02:56 +00:00
|
|
|
let probe = q_codings[&jobid].clone();
|
|
|
|
let mut start = chrono::Utc::now();
|
2020-09-01 10:49:54 +00:00
|
|
|
while let Some((jobid, q_coding)) = avail_codings.pop() {
|
|
|
|
if (counter % 10_000) == 0 {
|
|
|
|
let stop = chrono::Utc::now();
|
|
|
|
let progress_row = ProgressRow {
|
|
|
|
jobid: jobid,
|
|
|
|
alg_id: alg_id,
|
|
|
|
alg_name: String::from(alg_name),
|
|
|
|
delta: ((stop - start).num_nanoseconds().unwrap())
|
|
|
|
};
|
|
|
|
wtr_progress.serialize(progress_row).unwrap();
|
|
|
|
start = stop;
|
|
|
|
}
|
|
|
|
|
|
|
|
//println!("Processing {:?}", jobid);
|
|
|
|
//let similarity = ks_similarity(q_coding, &probe);
|
2020-09-02 18:02:56 +00:00
|
|
|
|
|
|
|
let mut metric_similarities = vec![];
|
|
|
|
|
2020-09-01 10:49:54 +00:00
|
|
|
let confidence = 0.95;
|
2020-09-02 18:02:56 +00:00
|
|
|
for metric_codings in q_coding.iter().zip(&probe) {
|
|
|
|
let metric_similarity = match ks::test(metric_codings.0, metric_codings.1, confidence) {
|
|
|
|
Ok(sim) => {
|
|
|
|
(1.0 - sim.reject_probability) as Similarity
|
|
|
|
}
|
|
|
|
Err(e) => {
|
|
|
|
let message = format!("jobid failed {:?}, because \" {:?}\"\n", jobid, e);
|
|
|
|
log_file.write_all(message.as_bytes()).unwrap();
|
|
|
|
1.0
|
|
|
|
}
|
|
|
|
};
|
|
|
|
metric_similarities.push(metric_similarity);
|
|
|
|
}
|
|
|
|
let similarity = metric_similarities.iter().sum::<f32>() / (metric_similarities.len() as f32);
|
|
|
|
|
|
|
|
//let similarity = match ks::test(q_coding, &probe, confidence) {
|
|
|
|
// Ok(sim) => {
|
|
|
|
// (1.0 - sim.reject_probability) as Similarity,
|
|
|
|
// }
|
|
|
|
// Err(e) => {
|
|
|
|
// let message = format!("jobid failed {:?}, because \" {:?}\"\n", jobid, e);
|
|
|
|
// log_file.write_all(message.as_bytes()).unwrap();
|
|
|
|
// 1.0
|
|
|
|
// }
|
|
|
|
//};
|
2020-09-01 10:49:54 +00:00
|
|
|
|
|
|
|
similarities.push((jobid, similarity));
|
|
|
|
counter += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (jobid, similarity) in similarities.iter() {
|
|
|
|
let similarity_row = SimilarityRow {
|
|
|
|
jobid: *jobid,
|
|
|
|
alg_id: alg_id,
|
|
|
|
alg_name: String::from(alg_name),
|
|
|
|
similarity: *similarity,
|
|
|
|
};
|
|
|
|
wtr_similarities.serialize(similarity_row).unwrap();
|
|
|
|
}
|
|
|
|
log_file.flush().unwrap();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fn main() {
|
|
|
|
let args: Vec<String> = env::args().collect();
|
|
|
|
let dataset_fn = args[1].clone();
|
|
|
|
let jobid = args[2].parse::<u32>().unwrap();
|
|
|
|
let sim_fn = args[3].clone();
|
|
|
|
let progress_fn = args[4].clone();
|
|
|
|
let log_fn = args[5].clone();
|
|
|
|
println!("{:?}", args);
|
|
|
|
|
|
|
|
run(dataset_fn, jobid, sim_fn, progress_fn, log_fn);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
mod tests {
|
|
|
|
//use super::*;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_ks_test() {
|
|
|
|
let xs = vec!(0.0 , 1.0 , 2.0 , 3.0 , 4.0 , 5.0 , 6.0 , 7.0 , 8.0 , 9.0 , 10.0 , 11.0 , 12.0);
|
|
|
|
let ys = vec!(12.0 , 11.0 , 10.0 , 9.0 , 8.0 , 7.0 , 6.0 , 5.0 , 4.0 , 3.0 , 2.0 , 1.0 , 0.0);
|
|
|
|
|
|
|
|
ks_test(xs, ys);
|
|
|
|
|
|
|
|
let c1 = vec![141.0,143.0,142.0,238.0,132.0,486.0,38.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,128.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0];
|
|
|
|
let c2 = vec![239.0,239.0,255.0,255.0,239.0,239.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,502.0,511.0,503.0];
|
|
|
|
ks_test(c1, c2);
|
|
|
|
|
|
|
|
|
|
|
|
let c1 = vec![2.0,2.0,2.0,9.0,3.0,0.0,0.0,0.0];
|
|
|
|
let c2 = vec![2.0,2.0,2.0,2.0,8.0,3.0,0.0,10.0];
|
|
|
|
ks_test(c1, c2);
|
|
|
|
}
|
|
|
|
}
|