2020-09-01 10:49:54 +00:00
extern crate kolmogorov_smirnov as ks;
extern crate csv;
extern crate serde;
extern crate chrono;
use std::fs::File;
use std::io::prelude::*;
//use std::fs::File;
use serde::Deserialize;
use serde::Serialize;
use std::collections::HashMap;
use std::env;
use std::io::LineWriter;
pub type Score = u32;
2020-09-02 18:02:56 +00:00
pub type MetricCoding = Vec<Score>;
pub type JobCoding = Vec<MetricCoding>;
2020-09-01 10:49:54 +00:00
pub type Similarity = f32;
pub type Jobid = u32;
pub type QCodings = HashMap<Jobid, JobCoding>;
#[derive(Debug, Deserialize)]
pub struct Record {
jobid: u32,
2020-09-02 18:02:56 +00:00
ks_md_file_create: String,
ks_md_file_delete: String,
ks_md_mod: String,
ks_md_other: String,
ks_md_read: String,
ks_read_bytes: String,
ks_read_calls: String,
ks_write_bytes: String,
ks_write_calls: String,
2020-09-01 10:49:54 +00:00
#[derive(Debug, Serialize)]
pub struct SimilarityRow {
pub jobid: u32,
pub alg_id: u32,
pub alg_name: String,
pub similarity: f32
#[derive(Debug, Serialize)]
pub struct ProgressRow {
jobid: u32,
alg_id: u32,
alg_name: String,
delta: i64,
pub fn convert_to_coding(coding: String) -> Vec<Score> {
let split = coding.split(":");
let vec: Vec<Score> = split
.filter(|s| !s.is_empty())
//.map(|s| s.parse::<F>().unwrap())
.map(|s| s.parse().unwrap())
//fn ks_similarity(xs: &Vec<u32>, ys: &Vec<u32>) -> Result<Similarity, String> {
// let confidence = 0.95;
// ks::test(xs, ys, confidence)?
// let reject_probability = match result {
// Ok(v) => v.reject_probability,
// Err(_) => 1.0,
// };
// //println!("is_rejected: {:?}\nstatistic: {:?}\nreject_probability: {:?}\ncritical_value: {:?}\nconfidence: {:?}", result.is_rejected, result.statistic, result.reject_probability, result.critical_value, result.confidence);
// (1.0 - reject_probability) as Similarity
fn run(dataset_fn: String, jobid: Jobid, similarities_fn: String, progress_fn: String, log_fn: String) {
let mut q_codings: QCodings = HashMap::new();
let file = File::open(&dataset_fn).expect("Unable to open dataset.");
let mut rdr = csv::Reader::from_reader(file);
//for result in rdr.deserialize().take(10000) {
for result in rdr.deserialize() {
let record: Record = result.expect("bla bla");
2020-09-02 18:02:56 +00:00
let q_coding = vec![
// Filter Zero-Jobs
if q_coding.iter().map(|x| x.iter().sum::<Score>()).sum::<Score>() > (0 as Score) {
2020-09-01 10:49:54 +00:00
q_codings.insert(record.jobid, q_coding);
let similarities_file = File::create(&similarities_fn).expect("Unable to open");
let mut wtr_similarities = csv::Writer::from_writer(&similarities_file);
let alg_name = "ks";
let alg_id = 6;
let progress_file = File::create(&progress_fn).expect("Unable to open");
let mut wtr_progress = csv::Writer::from_writer(&progress_file);
let mut counter = 1;
let mut avail_codings: Vec<(u32, &JobCoding)>;
avail_codings = q_codings.iter().map(|(k, v)| (*k, v)).collect();
let mut similarities: Vec<(Jobid, Similarity)> = Vec::new();
let log_file = File::create(&log_fn).expect("Unable to open");
let mut log_file = LineWriter::new(log_file);
2020-09-02 18:02:56 +00:00
let probe = q_codings[&jobid].clone();
let mut start = chrono::Utc::now();
2020-09-01 10:49:54 +00:00
while let Some((jobid, q_coding)) = avail_codings.pop() {
if (counter % 10_000) == 0 {
let stop = chrono::Utc::now();
let progress_row = ProgressRow {
jobid: jobid,
alg_id: alg_id,
alg_name: String::from(alg_name),
delta: ((stop - start).num_nanoseconds().unwrap())
start = stop;
//println!("Processing {:?}", jobid);
//let similarity = ks_similarity(q_coding, &probe);
2020-09-02 18:02:56 +00:00
let mut metric_similarities = vec![];
2020-09-01 10:49:54 +00:00
let confidence = 0.95;
2020-09-02 18:02:56 +00:00
for metric_codings in q_coding.iter().zip(&probe) {
let metric_similarity = match ks::test(metric_codings.0, metric_codings.1, confidence) {
Ok(sim) => {
(1.0 - sim.reject_probability) as Similarity
Err(e) => {
let message = format!("jobid failed {:?}, because \" {:?}\"\n", jobid, e);
let similarity = metric_similarities.iter().sum::<f32>() / (metric_similarities.len() as f32);
//let similarity = match ks::test(q_coding, &probe, confidence) {
// Ok(sim) => {
// (1.0 - sim.reject_probability) as Similarity,
// }
// Err(e) => {
// let message = format!("jobid failed {:?}, because \" {:?}\"\n", jobid, e);
// log_file.write_all(message.as_bytes()).unwrap();
// 1.0
// }
2020-09-01 10:49:54 +00:00
similarities.push((jobid, similarity));
counter += 1;
for (jobid, similarity) in similarities.iter() {
let similarity_row = SimilarityRow {
jobid: *jobid,
alg_id: alg_id,
alg_name: String::from(alg_name),
similarity: *similarity,
fn main() {
let args: Vec<String> = env::args().collect();
let dataset_fn = args[1].clone();
let jobid = args[2].parse::<u32>().unwrap();
let sim_fn = args[3].clone();
let progress_fn = args[4].clone();
let log_fn = args[5].clone();
println!("{:?}", args);
run(dataset_fn, jobid, sim_fn, progress_fn, log_fn);
mod tests {
//use super::*;
fn test_ks_test() {
let xs = vec!(0.0 , 1.0 , 2.0 , 3.0 , 4.0 , 5.0 , 6.0 , 7.0 , 8.0 , 9.0 , 10.0 , 11.0 , 12.0);
let ys = vec!(12.0 , 11.0 , 10.0 , 9.0 , 8.0 , 7.0 , 6.0 , 5.0 , 4.0 , 3.0 , 2.0 , 1.0 , 0.0);
ks_test(xs, ys);
let c1 = vec![141.0,143.0,142.0,238.0,132.0,486.0,38.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,128.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0];
let c2 = vec![239.0,239.0,255.0,255.0,239.0,239.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,511.0,502.0,511.0,503.0];
ks_test(c1, c2);
let c1 = vec![2.0,2.0,2.0,9.0,3.0,0.0,0.0,0.0];
let c2 = vec![2.0,2.0,2.0,2.0,8.0,3.0,0.0,10.0];
ks_test(c1, c2);