Visualization script and new structure
This commit is contained in:
parent
9ce020ae5e
commit
a7fab7d233
|
@ -0,0 +1,8 @@
|
|||
#!/bin/bash
|
||||
|
||||
filenames=$( ls *.tar.xz )
|
||||
|
||||
for filename in ${filenames[@]}; do
|
||||
echo "Decompressing ${filename}"
|
||||
tar -xJf "${filename}"
|
||||
done
|
|
@ -0,0 +1,11 @@
|
|||
# Required for job visualization
|
||||
# job_visualization_r.ipynb
|
||||
|
||||
import pandas as pd
|
||||
|
||||
def read_pickle_file(file):
|
||||
pickle_data = pd.read_pickle(file)
|
||||
start, stop, data, metadata = pickle_data
|
||||
return data.reset_index()
|
||||
|
||||
|
|
@ -0,0 +1,208 @@
|
|||
#!/usr/bin/env Rscript
|
||||
|
||||
library('ggplot2')
|
||||
library('ggthemes')
|
||||
library('tidyverse')
|
||||
library('repr')
|
||||
library('jcolors')
|
||||
library("reticulate")
|
||||
|
||||
#setwd(source_dir)
|
||||
use_python("/mnt/lustre01/work/ku0598/k202107/software/install/python/3.8.0/bin/python3", required=T)
|
||||
source_python("/work/ku0598/k202107/git/mistral-job-evaluation/scripts/jupyter/r_visual_jobs#pickle_reader.py")
|
||||
|
||||
global = list()
|
||||
global[['source_dir']] = '/work/ku0598/k202107/git/mistral-job-evaluation/data/eval_20200117'
|
||||
global[['eval_dir']] = '../evaluation'
|
||||
global[['fig_dir']] = sprintf('%s/pictures/jobs', global[['eval_dir']])
|
||||
global[['key']] = 22897682
|
||||
|
||||
config = list()
|
||||
config[['crypted_jobid']] = 4296426 # has 16 levels
|
||||
config[['jobid']] = bitwXor(config[['crypted_jobid']], global[['key']])
|
||||
config[['cat_fn']] = sprintf("%s/600/cats/%s.json", global[['source_dir']], config[['jobid']])
|
||||
config[['raw_fn']] = sprintf('%s/600/jobdata/%s.pkl', global[['source_dir']], config[['jobid']])
|
||||
|
||||
|
||||
graph_config = list()
|
||||
# Colorized entities
|
||||
# "name" : file systems
|
||||
# "host" : compute nodes
|
||||
# "metric" : I/O metrics
|
||||
graph_config[['cols']] = c('metric', 'host', 'name')
|
||||
#graph_config[['cols']] = c('host', 'name')
|
||||
|
||||
# Enable views
|
||||
#'default', 'jscore', 'nscore', 'mscore'
|
||||
graph_config[['views']] = c('jscore', 'default', 'nscore', 'mscore')
|
||||
#graph_config[['views']] = c('default')
|
||||
|
||||
# Set at nth position a label
|
||||
graph_config[['x_breakpoint_interval']] = 5
|
||||
|
||||
# Segments size in minutes
|
||||
graph_config[['seg_size']] = 10
|
||||
|
||||
|
||||
rename_metrics <- function(data) {
|
||||
data['metric'] <- lapply(data['metric'], gsub, pattern = "host.lustre.", replacement = "", fixed = TRUE)
|
||||
data['metric'] <- lapply(data['metric'], gsub, pattern = "stats.", replacement = "", fixed = TRUE)
|
||||
data['metric'] <- lapply(data['metric'], gsub, pattern = ".bytes", replacement = "_bytes", fixed = TRUE)
|
||||
data['metric'] <- lapply(data['metric'], gsub, pattern = ".calls", replacement = "_calls", fixed = TRUE)
|
||||
return(data)
|
||||
}
|
||||
|
||||
|
||||
visualize_categories <- function(gconf, cconf, data, view, col, x_breakpoints) {
|
||||
out_dir = sprintf('%s/%d', gconf[['fig_dir']], cconf[['jobid']])
|
||||
dir.create(out_dir, recursive=TRUE)
|
||||
|
||||
# Set legend title
|
||||
if (col == 'host') {
|
||||
gtitle = 'Node'
|
||||
}
|
||||
else if (col == 'metric') {
|
||||
gtitle = 'Metric'
|
||||
}
|
||||
else if (col == 'name') {
|
||||
gtitle = 'File system'
|
||||
}
|
||||
|
||||
|
||||
# The palette with black:
|
||||
#cbp2 = c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999")
|
||||
# General plot
|
||||
p <- (
|
||||
ggplot(data, aes_string(x='bin', y='score', fill=col))
|
||||
#+ geom_bar(stat='summary', fun.y = "mean")
|
||||
+ geom_bar(stat='identity')
|
||||
+ scale_x_discrete(breaks=x_breakpoints)
|
||||
#+ scale_fill_manual(values= cbp2)
|
||||
#+ geom_line(data=dat,aes(x='rmin', y='value', color="Second line"))
|
||||
+ guides(
|
||||
fill = guide_legend(title=gtitle, nrow=15)
|
||||
)
|
||||
+ ylab('JScore')
|
||||
+ xlab('Runtime in minutes')
|
||||
+ theme_linedraw()
|
||||
#+ theme_classic()
|
||||
+ theme(
|
||||
#guide_legend.title = element_text('File system'), #element_blank(),
|
||||
#legend.text=element_text(size=6),
|
||||
legend.spacing.y = unit(0, 'cm'),
|
||||
#legend.spacing.x = unit(0, 'cm'),
|
||||
legend.text = element_text(size=8, margin = margin(t = 1)),
|
||||
strip.text.x = element_text(size = 8, color = "black"),
|
||||
strip.text.y = element_text(size = 8, color = "black"),
|
||||
legend.key = element_rect(size = 1),
|
||||
legend.key.size = unit(0.5, 'lines'),
|
||||
strip.background = element_rect(color="black", fill="#FFFFFF", linetype="solid")
|
||||
# panel.grid.major=element_line(size=0.25, color=alpha('black', 0.25)),
|
||||
# panel.grid.minor=element_line(size=0.25, color=alpha('black', 0.25))
|
||||
)
|
||||
)
|
||||
if (col == 'host') {
|
||||
if (nrow(unique(data['host'])) > 13) {
|
||||
p <- (p +
|
||||
theme(
|
||||
#legend.position='none'
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
else if (col == 'metric') {
|
||||
p <- (p
|
||||
+ scale_fill_jcolors("pal12")
|
||||
)
|
||||
}
|
||||
else if (col == 'name') {
|
||||
}
|
||||
|
||||
if (view == 'jscore') {
|
||||
fn = sprintf('%s/jscore_%s.png', out_dir, col)
|
||||
ggsave(fn, width=10, height=2.5)
|
||||
}
|
||||
if (view == 'default') {
|
||||
p <- (
|
||||
p
|
||||
+ facet_grid(metric ~ .)
|
||||
+ ylab('')
|
||||
+ ylab('Score')
|
||||
+ theme(
|
||||
legend.position='none',
|
||||
#strip.text.x = element_text(angle=0),
|
||||
strip.text.y = element_text(angle=0),
|
||||
)
|
||||
)
|
||||
fn = sprintf('%s/default_%s.png', out_dir, col)
|
||||
ggsave(fn, width=7, height=7)
|
||||
}
|
||||
else if (view == 'nscore') {
|
||||
p <- (
|
||||
p
|
||||
+ facet_grid(host ~ .)
|
||||
+ ylab('NScore')
|
||||
+ theme(
|
||||
#legend.position='none'
|
||||
)
|
||||
)
|
||||
if (col == 'name') {
|
||||
p <- p + theme(legend.position = 'bottom')
|
||||
}
|
||||
fn = sprintf('%s/nscore_%s.png', out_dir, col)
|
||||
ggsave(fn, width=4, height=4)
|
||||
}
|
||||
else if (view == 'mscore') {
|
||||
p <- (
|
||||
p
|
||||
+ facet_grid(host ~ metric)
|
||||
+ ylab('MScore')
|
||||
+ theme(
|
||||
legend.position='none',
|
||||
axis.text.x = element_text(angle=90, hjust=1)
|
||||
)
|
||||
)
|
||||
fn = sprintf('%s/mscore_%s.png', out_dir, col)
|
||||
ggsave(fn, width=8, height=4)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
# Create 10 minutes segments
|
||||
cat_data <- rename_metrics(read.csv(config[['cat_fn']])) # categorized data
|
||||
cat_data['rmin'] = cat_data['runtime'] / 60 # runtime in minutes
|
||||
duration = max(ceiling(cat_data['rmin']))
|
||||
bins = seq(0, duration, graph_config[['seg_size']] )
|
||||
|
||||
|
||||
d2 <- cat_data %>%
|
||||
group_by(cat) %>%
|
||||
mutate(bin = cut(rmin, breaks = bins, labels = bins[-1]))
|
||||
d3 <- d2 %>%
|
||||
group_by(name, metric, host, bin) %>%
|
||||
summarise(score = sum(cat))
|
||||
|
||||
x_breakpoints <- bins[seq(1, length(bins), graph_config[['x_breakpoint_interval']])]
|
||||
|
||||
for (col in graph_config[['cols']]) {
|
||||
for (view in graph_config[['views']]) {
|
||||
visualize_categories (global, config, d3, view, col, x_breakpoints)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
## TODO
|
||||
#visualize_rawdata <- function(data) {
|
||||
#}
|
||||
|
||||
#pickle_data <- rename_metrics(read_pickle_file(config[['raw_fn']])) # raw data
|
||||
#print(head(pickle_data))
|
||||
#offset = min(pickle_data$timestamp)
|
||||
#dat = pickle_data[complete.cases(pickle_data),]
|
||||
#dat$runtime = dat$timestamp - offset
|
||||
#dat['rmin'] = dat['runtime'] / 60 # runtime in minutes
|
||||
|
||||
#visualize_rawdata(dat)
|
||||
|
Loading…
Reference in New Issue