This commit is contained in:
eugen.betke 2020-10-13 12:52:30 +02:00
parent 92272913e3
commit e9e9143250
2 changed files with 66 additions and 63 deletions

View File

@ -15,8 +15,8 @@ jobids=( ${jobids[@]} 5024292 )
set -x set -x
for jobid in ${jobids[@]}; do for jobid in ${jobids[@]}; do
sim_fn="$output_dir/ks_similarities_$jobid.csv" sim_fn="$output_dir/ks2_similarities_$jobid.csv"
progress_fn="$output_dir/ks_progress_$jobid.csv" progress_fn="$output_dir/ks2_progress_$jobid.csv"
log_fn="$output_dir/ks_fail_$jobid.log" log_fn="$output_dir/ks2_fail_$jobid.log"
time cargo run --release -- $dataset_fn $jobid $sim_fn $progress_fn $log_fn time cargo run --release -- $dataset_fn $jobid $sim_fn $progress_fn $log_fn
done done

View File

@ -24,15 +24,15 @@ pub type QCodings = HashMap<Jobid, JobCoding>;
#[derive(Debug, Deserialize)] #[derive(Debug, Deserialize)]
pub struct Record { pub struct Record {
jobid: u32, jobid: u32,
ks_md_file_create: String, md_file_create: String,
ks_md_file_delete: String, md_file_delete: String,
ks_md_mod: String, md_mod: String,
ks_md_other: String, md_other: String,
ks_md_read: String, md_read: String,
ks_read_bytes: String, read_bytes: String,
ks_read_calls: String, read_calls: String,
ks_write_bytes: String, write_bytes: String,
ks_write_calls: String, write_calls: String,
} }
#[derive(Debug, Serialize)] #[derive(Debug, Serialize)]
@ -76,15 +76,15 @@ fn run(dataset_fn: String, jobid: Jobid, similarities_fn: String, progress_fn: S
for result in rdr.deserialize() { for result in rdr.deserialize() {
let record: Record = result.expect("bla bla"); let record: Record = result.expect("bla bla");
let q_coding = vec![ let q_coding = vec![
convert_to_coding(record.ks_md_file_create), convert_to_coding(record.md_file_create),
convert_to_coding(record.ks_md_file_delete), convert_to_coding(record.md_file_delete),
convert_to_coding(record.ks_md_mod), convert_to_coding(record.md_mod),
convert_to_coding(record.ks_md_other), convert_to_coding(record.md_other),
convert_to_coding(record.ks_md_read), convert_to_coding(record.md_read),
convert_to_coding(record.ks_read_bytes), convert_to_coding(record.read_bytes),
convert_to_coding(record.ks_read_calls), convert_to_coding(record.read_calls),
convert_to_coding(record.ks_write_bytes), convert_to_coding(record.write_bytes),
convert_to_coding(record.ks_write_calls), convert_to_coding(record.write_calls),
]; ];
// Filter Zero-Jobs // Filter Zero-Jobs
@ -102,54 +102,57 @@ fn run(dataset_fn: String, jobid: Jobid, similarities_fn: String, progress_fn: S
let mut wtr_progress = csv::Writer::from_writer(&progress_file); let mut wtr_progress = csv::Writer::from_writer(&progress_file);
let mut counter = 1; let mut counter = 1;
let mut avail_codings: Vec<(u32, &JobCoding)>;
avail_codings = q_codings.iter().map(|(k, v)| (*k, v)).collect();
let mut similarities: Vec<(Jobid, Similarity, bool)> = Vec::new(); let mut similarities: Vec<(Jobid, Similarity, bool)> = Vec::new();
let log_file = File::create(&log_fn).expect("Unable to open"); let log_file = File::create(&log_fn).expect("Unable to open");
let mut log_file = LineWriter::new(log_file); let mut log_file = LineWriter::new(log_file);
let probe = q_codings[&jobid].clone(); for i in 0..10 {
let mut start_chunk = chrono::Utc::now(); let mut avail_codings: Vec<(u32, &JobCoding)>;
let start = start_chunk; avail_codings = q_codings.iter().map(|(k, v)| (*k, v)).collect();
while let Some((jobid, q_coding)) = avail_codings.pop() {
if (counter % 10_000) == 0 { let probe = q_codings[&jobid].clone();
let stop_chunk = chrono::Utc::now(); let mut start_chunk = chrono::Utc::now();
let progress_row = ProgressRow { let start = start_chunk;
iteration: 0, while let Some((jobid, q_coding)) = avail_codings.pop() {
alg_id: alg_id, if (counter % 10_000) == 0 {
alg_name: String::from(alg_name), let stop_chunk = chrono::Utc::now();
jobs_done: counter, let progress_row = ProgressRow {
jobs_total: q_codings.len(), iteration: i,
elapsed: (((stop_chunk - start).num_milliseconds() as f64) / 1000.0), alg_id: alg_id,
delta: (((stop_chunk - start_chunk).num_milliseconds() as f64) / 1000.0), alg_name: String::from(alg_name),
}; jobs_done: counter,
wtr_progress.serialize(progress_row).unwrap(); jobs_total: q_codings.len(),
start_chunk = stop_chunk; elapsed: (((stop_chunk - start).num_milliseconds() as f64) / 1000.0),
delta: (((stop_chunk - start_chunk).num_milliseconds() as f64) / 1000.0),
};
wtr_progress.serialize(progress_row).unwrap();
start_chunk = stop_chunk;
}
let mut metric_similarities = vec![];
let mut err = false;
let confidence = 0.95;
for metric_codings in q_coding.iter().zip(&probe) {
let metric_similarity = match ks::test(metric_codings.0, metric_codings.1, confidence) {
Ok(sim) => {
(1.0 - sim.reject_probability) as Similarity
}
Err(e) => {
err = true;
let message = format!("jobid failed {:?}, because {:?}\n", jobid, e);
log_file.write_all(message.as_bytes()).unwrap();
0.0
}
};
metric_similarities.push(metric_similarity);
}
let similarity = metric_similarities.iter().sum::<f32>() / (metric_similarities.len() as f32);
similarities.push((jobid, similarity, err));
counter += 1;
} }
let mut metric_similarities = vec![];
let mut err = false;
let confidence = 0.95;
for metric_codings in q_coding.iter().zip(&probe) {
let metric_similarity = match ks::test(metric_codings.0, metric_codings.1, confidence) {
Ok(sim) => {
(1.0 - sim.reject_probability) as Similarity
}
Err(e) => {
err = true;
let message = format!("jobid failed {:?}, because {:?}\n", jobid, e);
log_file.write_all(message.as_bytes()).unwrap();
0.0
}
};
metric_similarities.push(metric_similarity);
}
let similarity = metric_similarities.iter().sum::<f32>() / (metric_similarities.len() as f32);
similarities.push((jobid, similarity, err));
counter += 1;
} }