6 changed files with 227 additions and 0 deletions
@ -0,0 +1,8 @@ |
|||
#!/bin/bash |
|||
|
|||
filenames=$( ls *.tar.xz ) |
|||
|
|||
for filename in ${filenames[@]}; do |
|||
echo "Decompressing ${filename}" |
|||
tar -xJf "${filename}" |
|||
done |
@ -0,0 +1,11 @@ |
|||
# Required for job visualization |
|||
# job_visualization_r.ipynb |
|||
|
|||
import pandas as pd |
|||
|
|||
def read_pickle_file(file): |
|||
pickle_data = pd.read_pickle(file) |
|||
start, stop, data, metadata = pickle_data |
|||
return data.reset_index() |
|||
|
|||
|
@ -0,0 +1,208 @@ |
|||
#!/usr/bin/env Rscript |
|||
|
|||
library('ggplot2') |
|||
library('ggthemes') |
|||
library('tidyverse') |
|||
library('repr') |
|||
library('jcolors') |
|||
library("reticulate") |
|||
|
|||
#setwd(source_dir) |
|||
use_python("/mnt/lustre01/work/ku0598/k202107/software/install/python/3.8.0/bin/python3", required=T) |
|||
source_python("/work/ku0598/k202107/git/mistral-job-evaluation/scripts/jupyter/r_visual_jobs#pickle_reader.py") |
|||
|
|||
global = list() |
|||
global[['source_dir']] = '/work/ku0598/k202107/git/mistral-job-evaluation/data/eval_20200117' |
|||
global[['eval_dir']] = '../evaluation' |
|||
global[['fig_dir']] = sprintf('%s/pictures/jobs', global[['eval_dir']]) |
|||
global[['key']] = 22897682 |
|||
|
|||
config = list() |
|||
config[['crypted_jobid']] = 4296426 # has 16 levels |
|||
config[['jobid']] = bitwXor(config[['crypted_jobid']], global[['key']]) |
|||
config[['cat_fn']] = sprintf("%s/600/cats/%s.json", global[['source_dir']], config[['jobid']]) |
|||
config[['raw_fn']] = sprintf('%s/600/jobdata/%s.pkl', global[['source_dir']], config[['jobid']]) |
|||
|
|||
|
|||
graph_config = list() |
|||
# Colorized entities |
|||
# "name" : file systems |
|||
# "host" : compute nodes |
|||
# "metric" : I/O metrics |
|||
graph_config[['cols']] = c('metric', 'host', 'name') |
|||
#graph_config[['cols']] = c('host', 'name') |
|||
|
|||
# Enable views |
|||
#'default', 'jscore', 'nscore', 'mscore' |
|||
graph_config[['views']] = c('jscore', 'default', 'nscore', 'mscore') |
|||
#graph_config[['views']] = c('default') |
|||
|
|||
# Set at nth position a label |
|||
graph_config[['x_breakpoint_interval']] = 5 |
|||
|
|||
# Segments size in minutes |
|||
graph_config[['seg_size']] = 10 |
|||
|
|||
|
|||
rename_metrics <- function(data) { |
|||
data['metric'] <- lapply(data['metric'], gsub, pattern = "host.lustre.", replacement = "", fixed = TRUE) |
|||
data['metric'] <- lapply(data['metric'], gsub, pattern = "stats.", replacement = "", fixed = TRUE) |
|||
data['metric'] <- lapply(data['metric'], gsub, pattern = ".bytes", replacement = "_bytes", fixed = TRUE) |
|||
data['metric'] <- lapply(data['metric'], gsub, pattern = ".calls", replacement = "_calls", fixed = TRUE) |
|||
return(data) |
|||
} |
|||
|
|||
|
|||
visualize_categories <- function(gconf, cconf, data, view, col, x_breakpoints) { |
|||
out_dir = sprintf('%s/%d', gconf[['fig_dir']], cconf[['jobid']]) |
|||
dir.create(out_dir, recursive=TRUE) |
|||
|
|||
# Set legend title |
|||
if (col == 'host') { |
|||
gtitle = 'Node' |
|||
} |
|||
else if (col == 'metric') { |
|||
gtitle = 'Metric' |
|||
} |
|||
else if (col == 'name') { |
|||
gtitle = 'File system' |
|||
} |
|||
|
|||
|
|||
# The palette with black: |
|||
#cbp2 = c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999") |
|||
# General plot |
|||
p <- ( |
|||
ggplot(data, aes_string(x='bin', y='score', fill=col)) |
|||
#+ geom_bar(stat='summary', fun.y = "mean") |
|||
+ geom_bar(stat='identity') |
|||
+ scale_x_discrete(breaks=x_breakpoints) |
|||
#+ scale_fill_manual(values= cbp2) |
|||
#+ geom_line(data=dat,aes(x='rmin', y='value', color="Second line")) |
|||
+ guides( |
|||
fill = guide_legend(title=gtitle, nrow=15) |
|||
) |
|||
+ ylab('JScore') |
|||
+ xlab('Runtime in minutes') |
|||
+ theme_linedraw() |
|||
#+ theme_classic() |
|||
+ theme( |
|||
#guide_legend.title = element_text('File system'), #element_blank(), |
|||
#legend.text=element_text(size=6), |
|||
legend.spacing.y = unit(0, 'cm'), |
|||
#legend.spacing.x = unit(0, 'cm'), |
|||
legend.text = element_text(size=8, margin = margin(t = 1)), |
|||
strip.text.x = element_text(size = 8, color = "black"), |
|||
strip.text.y = element_text(size = 8, color = "black"), |
|||
legend.key = element_rect(size = 1), |
|||
legend.key.size = unit(0.5, 'lines'), |
|||
strip.background = element_rect(color="black", fill="#FFFFFF", linetype="solid") |
|||
# panel.grid.major=element_line(size=0.25, color=alpha('black', 0.25)), |
|||
# panel.grid.minor=element_line(size=0.25, color=alpha('black', 0.25)) |
|||
) |
|||
) |
|||
if (col == 'host') { |
|||
if (nrow(unique(data['host'])) > 13) { |
|||
p <- (p + |
|||
theme( |
|||
#legend.position='none' |
|||
) |
|||
) |
|||
} |
|||
} |
|||
else if (col == 'metric') { |
|||
p <- (p |
|||
+ scale_fill_jcolors("pal12") |
|||
) |
|||
} |
|||
else if (col == 'name') { |
|||
} |
|||
|
|||
if (view == 'jscore') { |
|||
fn = sprintf('%s/jscore_%s.png', out_dir, col) |
|||
ggsave(fn, width=10, height=2.5) |
|||
} |
|||
if (view == 'default') { |
|||
p <- ( |
|||
p |
|||
+ facet_grid(metric ~ .) |
|||
+ ylab('') |
|||
+ ylab('Score') |
|||
+ theme( |
|||
legend.position='none', |
|||
#strip.text.x = element_text(angle=0), |
|||
strip.text.y = element_text(angle=0), |
|||
) |
|||
) |
|||
fn = sprintf('%s/default_%s.png', out_dir, col) |
|||
ggsave(fn, width=7, height=7) |
|||
} |
|||
else if (view == 'nscore') { |
|||
p <- ( |
|||
p |
|||
+ facet_grid(host ~ .) |
|||
+ ylab('NScore') |
|||
+ theme( |
|||
#legend.position='none' |
|||
) |
|||
) |
|||
if (col == 'name') { |
|||
p <- p + theme(legend.position = 'bottom') |
|||
} |
|||
fn = sprintf('%s/nscore_%s.png', out_dir, col) |
|||
ggsave(fn, width=4, height=4) |
|||
} |
|||
else if (view == 'mscore') { |
|||
p <- ( |
|||
p |
|||
+ facet_grid(host ~ metric) |
|||
+ ylab('MScore') |
|||
+ theme( |
|||
legend.position='none', |
|||
axis.text.x = element_text(angle=90, hjust=1) |
|||
) |
|||
) |
|||
fn = sprintf('%s/mscore_%s.png', out_dir, col) |
|||
ggsave(fn, width=8, height=4) |
|||
} |
|||
} |
|||
|
|||
|
|||
|
|||
|
|||
# Create 10 minutes segments |
|||
cat_data <- rename_metrics(read.csv(config[['cat_fn']])) # categorized data |
|||
cat_data['rmin'] = cat_data['runtime'] / 60 # runtime in minutes |
|||
duration = max(ceiling(cat_data['rmin'])) |
|||
bins = seq(0, duration, graph_config[['seg_size']] ) |
|||
|
|||
|
|||
d2 <- cat_data %>% |
|||
group_by(cat) %>% |
|||
mutate(bin = cut(rmin, breaks = bins, labels = bins[-1])) |
|||
d3 <- d2 %>% |
|||
group_by(name, metric, host, bin) %>% |
|||
summarise(score = sum(cat)) |
|||
|
|||
x_breakpoints <- bins[seq(1, length(bins), graph_config[['x_breakpoint_interval']])] |
|||
|
|||
for (col in graph_config[['cols']]) { |
|||
for (view in graph_config[['views']]) { |
|||
visualize_categories (global, config, d3, view, col, x_breakpoints) |
|||
} |
|||
} |
|||
|
|||
|
|||
## TODO |
|||
#visualize_rawdata <- function(data) { |
|||
#} |
|||
|
|||
#pickle_data <- rename_metrics(read_pickle_file(config[['raw_fn']])) # raw data |
|||
#print(head(pickle_data)) |
|||
#offset = min(pickle_data$timestamp) |
|||
#dat = pickle_data[complete.cases(pickle_data),] |
|||
#dat$runtime = dat$timestamp - offset |
|||
#dat['rmin'] = dat['runtime'] / 60 # runtime in minutes |
|||
|
|||
#visualize_rawdata(dat) |
|||
|
Loading…
Reference in new issue