master
eugen.betke 2020-10-13 12:52:30 +02:00
parent 92272913e3
commit e9e9143250
2 changed files with 66 additions and 63 deletions

View File

@ -15,8 +15,8 @@ jobids=( ${jobids[@]} 5024292 )
set -x
for jobid in ${jobids[@]}; do
sim_fn="$output_dir/ks_similarities_$jobid.csv"
progress_fn="$output_dir/ks_progress_$jobid.csv"
log_fn="$output_dir/ks_fail_$jobid.log"
sim_fn="$output_dir/ks2_similarities_$jobid.csv"
progress_fn="$output_dir/ks2_progress_$jobid.csv"
log_fn="$output_dir/ks2_fail_$jobid.log"
time cargo run --release -- $dataset_fn $jobid $sim_fn $progress_fn $log_fn
done

View File

@ -24,15 +24,15 @@ pub type QCodings = HashMap<Jobid, JobCoding>;
#[derive(Debug, Deserialize)]
pub struct Record {
jobid: u32,
ks_md_file_create: String,
ks_md_file_delete: String,
ks_md_mod: String,
ks_md_other: String,
ks_md_read: String,
ks_read_bytes: String,
ks_read_calls: String,
ks_write_bytes: String,
ks_write_calls: String,
md_file_create: String,
md_file_delete: String,
md_mod: String,
md_other: String,
md_read: String,
read_bytes: String,
read_calls: String,
write_bytes: String,
write_calls: String,
}
#[derive(Debug, Serialize)]
@ -76,15 +76,15 @@ fn run(dataset_fn: String, jobid: Jobid, similarities_fn: String, progress_fn: S
for result in rdr.deserialize() {
let record: Record = result.expect("bla bla");
let q_coding = vec![
convert_to_coding(record.ks_md_file_create),
convert_to_coding(record.ks_md_file_delete),
convert_to_coding(record.ks_md_mod),
convert_to_coding(record.ks_md_other),
convert_to_coding(record.ks_md_read),
convert_to_coding(record.ks_read_bytes),
convert_to_coding(record.ks_read_calls),
convert_to_coding(record.ks_write_bytes),
convert_to_coding(record.ks_write_calls),
convert_to_coding(record.md_file_create),
convert_to_coding(record.md_file_delete),
convert_to_coding(record.md_mod),
convert_to_coding(record.md_other),
convert_to_coding(record.md_read),
convert_to_coding(record.read_bytes),
convert_to_coding(record.read_calls),
convert_to_coding(record.write_bytes),
convert_to_coding(record.write_calls),
];
// Filter Zero-Jobs
@ -102,54 +102,57 @@ fn run(dataset_fn: String, jobid: Jobid, similarities_fn: String, progress_fn: S
let mut wtr_progress = csv::Writer::from_writer(&progress_file);
let mut counter = 1;
let mut avail_codings: Vec<(u32, &JobCoding)>;
avail_codings = q_codings.iter().map(|(k, v)| (*k, v)).collect();
let mut similarities: Vec<(Jobid, Similarity, bool)> = Vec::new();
let log_file = File::create(&log_fn).expect("Unable to open");
let mut log_file = LineWriter::new(log_file);
let probe = q_codings[&jobid].clone();
let mut start_chunk = chrono::Utc::now();
let start = start_chunk;
while let Some((jobid, q_coding)) = avail_codings.pop() {
if (counter % 10_000) == 0 {
let stop_chunk = chrono::Utc::now();
let progress_row = ProgressRow {
iteration: 0,
alg_id: alg_id,
alg_name: String::from(alg_name),
jobs_done: counter,
jobs_total: q_codings.len(),
elapsed: (((stop_chunk - start).num_milliseconds() as f64) / 1000.0),
delta: (((stop_chunk - start_chunk).num_milliseconds() as f64) / 1000.0),
};
wtr_progress.serialize(progress_row).unwrap();
start_chunk = stop_chunk;
for i in 0..10 {
let mut avail_codings: Vec<(u32, &JobCoding)>;
avail_codings = q_codings.iter().map(|(k, v)| (*k, v)).collect();
let probe = q_codings[&jobid].clone();
let mut start_chunk = chrono::Utc::now();
let start = start_chunk;
while let Some((jobid, q_coding)) = avail_codings.pop() {
if (counter % 10_000) == 0 {
let stop_chunk = chrono::Utc::now();
let progress_row = ProgressRow {
iteration: i,
alg_id: alg_id,
alg_name: String::from(alg_name),
jobs_done: counter,
jobs_total: q_codings.len(),
elapsed: (((stop_chunk - start).num_milliseconds() as f64) / 1000.0),
delta: (((stop_chunk - start_chunk).num_milliseconds() as f64) / 1000.0),
};
wtr_progress.serialize(progress_row).unwrap();
start_chunk = stop_chunk;
}
let mut metric_similarities = vec![];
let mut err = false;
let confidence = 0.95;
for metric_codings in q_coding.iter().zip(&probe) {
let metric_similarity = match ks::test(metric_codings.0, metric_codings.1, confidence) {
Ok(sim) => {
(1.0 - sim.reject_probability) as Similarity
}
Err(e) => {
err = true;
let message = format!("jobid failed {:?}, because {:?}\n", jobid, e);
log_file.write_all(message.as_bytes()).unwrap();
0.0
}
};
metric_similarities.push(metric_similarity);
}
let similarity = metric_similarities.iter().sum::<f32>() / (metric_similarities.len() as f32);
similarities.push((jobid, similarity, err));
counter += 1;
}
let mut metric_similarities = vec![];
let mut err = false;
let confidence = 0.95;
for metric_codings in q_coding.iter().zip(&probe) {
let metric_similarity = match ks::test(metric_codings.0, metric_codings.1, confidence) {
Ok(sim) => {
(1.0 - sim.reject_probability) as Similarity
}
Err(e) => {
err = true;
let message = format!("jobid failed {:?}, because {:?}\n", jobid, e);
log_file.write_all(message.as_bytes()).unwrap();
0.0
}
};
metric_similarities.push(metric_similarity);
}
let similarity = metric_similarities.iter().sum::<f32>() / (metric_similarities.len() as f32);
similarities.push((jobid, similarity, err));
counter += 1;
}