Visualization scripts

This commit is contained in:
eugen.betke 2020-08-19 18:38:50 +02:00
parent bf46c043ab
commit 85f5d42274
2 changed files with 162 additions and 65 deletions

View File

@ -13,14 +13,13 @@ args <- commandArgs(trailingOnly = TRUE)
use_python("/mnt/lustre01/work/ku0598/k202107/software/install/python/3.8.0/bin/python3", required=T) use_python("/mnt/lustre01/work/ku0598/k202107/software/install/python/3.8.0/bin/python3", required=T)
source_python("/work/ku0598/k202107/git/mistral-job-evaluation/scripts/jupyter/r_visual_jobs#pickle_reader.py") source_python("/work/ku0598/k202107/git/mistral-job-evaluation/scripts/jupyter/r_visual_jobs#pickle_reader.py")
global = list() global = list()
global[['source_dir']] = '/work/ku0598/k202107/git/mistral-job-evaluation/data/eval_20200117' global[['source_dir']] = '/work/ku0598/k202107/git/mistral-job-evaluation/data/eval_20200117'
global[['eval_dir']] = '../evaluation' global[['eval_dir']] = '../evaluation'
global[['fig_dir']] = sprintf('%s/figures/job_visualization', global[['eval_dir']]) global[['fig_dir']] = sprintf('%s/figures/job_visualization', global[['eval_dir']])
global[['key']] = 22897682 global[['key']] = 22897682
config = list() config = list()
#config[['crypted_jobid']] = 4296426
config[['crypted_jobid']] = strtoi(args[1]) config[['crypted_jobid']] = strtoi(args[1])
config[['jobid']] = bitwXor(config[['crypted_jobid']], global[['key']]) config[['jobid']] = bitwXor(config[['crypted_jobid']], global[['key']])
config[['cat_fn']] = sprintf("%s/600/cats/%s.json", global[['source_dir']], config[['jobid']]) config[['cat_fn']] = sprintf("%s/600/cats/%s.json", global[['source_dir']], config[['jobid']])
@ -28,24 +27,33 @@ config[['raw_fn']] = sprintf('%s/600/jobdata/%s.pkl', global[['source_dir']], co
graph_config = list() graph_config = list()
# Colorized entities # View
# "name" : file systems graph_config[['cols']] = c('metric', 'host', 'name') # Colorized entities: "name" : file systems; "host" : compute nodes, "metric" : I/O metrics
# "host" : compute nodes graph_config[['views']] = c('jscore', 'default', 'nscore', 'mscore') # Enable views: 'default', 'jscore', 'nscore', 'mscore'
# "metric" : I/O metrics #graph_config[['views']] = c('nscore')
graph_config[['cols']] = c('metric', 'host', 'name') graph_config[['n_x_breakpoints']] = 5 # Number of breakpoints on x-axis
#graph_config[['cols']] = c('host', 'name') graph_config[['seg_size']] = 10 # Segments size in minutes
# Enable views # Size
#'default', 'jscore', 'nscore', 'mscore' graph_config[['plot_size']] = list(
graph_config[['views']] = c('jscore', 'default', 'nscore', 'mscore') 'default' = list('height'=1, 'width'=10),
#graph_config[['views']] = c('default') 'jscore' = list('height'=3, 'width'=10),
'nscore' = list('height'=1, 'width'=14),
'mscore' = list('height'=1, 'width'=1))
# Set at nth position a label # Dimensions Limits
graph_config[['x_breakpoint_interval']] = 5 graph_config[['max_dimensions']] = list(
'default' = list('seg'=1000, 'host'=13, 'name'=2, 'metric'=9),
# Segments size in minutes 'jscore' = list('seg'=1000, 'host'=13, 'name'=2, 'metric'=9),
graph_config[['seg_size']] = 10 'nscore' = list('seg'=1000, 'host'=50, 'name'=2, 'metric'=9),
'mscore' = list('seg'=1000, 'host'=50, 'name'=2, 'metric'=9))
# Legend Limits
graph_config[['max_legend_size']] = list(
'default' = list('seg'=1000, 'host'=15, 'name'=2, 'metric'=9),
'jscore' = list('seg'=1000, 'host'=15, 'name'=2, 'metric'=9),
'nscore' = list('seg'=1000, 'host'=15, 'name'=2, 'metric'=9),
'mscore' = list('seg'=1000, 'host'=15, 'name'=2, 'metric'=9))
rename_metrics <- function(data) { rename_metrics <- function(data) {
data['metric'] <- lapply(data['metric'], gsub, pattern = "host.lustre.", replacement = "", fixed = TRUE) data['metric'] <- lapply(data['metric'], gsub, pattern = "host.lustre.", replacement = "", fixed = TRUE)
@ -56,10 +64,8 @@ rename_metrics <- function(data) {
} }
visualize_categories <- function(gconf, cconf, data, view, col, x_breakpoints) {
out_dir = sprintf('%s/%d_%d', gconf[['fig_dir']], cconf[['jobid']], cconf[['crypted_jobid']]) visualize_categories <- function(fn, gconf, cconf, vconf, data, view, col, x_breakpoints, dims) {
dir.create(out_dir, recursive=TRUE)
# Set legend title # Set legend title
if (col == 'host') { if (col == 'host') {
gtitle = 'Node' gtitle = 'Node'
@ -70,14 +76,16 @@ visualize_categories <- function(gconf, cconf, data, view, col, x_breakpoints) {
else if (col == 'name') { else if (col == 'name') {
gtitle = 'File system' gtitle = 'File system'
} }
title = sprintf('JOBID: %d / %d (M:H:F:S)=(%d:%d:%d:%d)', cconf$jobid, cconf$crypted_jobid, dims$metric, dims$host, dims$name, dims$seg)
# The palette with black: # The palette with black:
#cbp2 = c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999") #cbp2 = c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999")
# General plot # General plot
p <- ( p <- (
ggplot(data, aes_string(x='bin', y='score', fill=col)) ggplot(data, aes_string(x='seg', y='score', fill=col))
#+ geom_bar(stat='summary', fun.y = "mean") #+ geom_bar(stat='summary', fun.y = "mean")
+ ggtitle(title)
+ geom_bar(stat='identity') + geom_bar(stat='identity')
+ scale_x_discrete(breaks=x_breakpoints) + scale_x_discrete(breaks=x_breakpoints)
#+ scale_fill_manual(values= cbp2) #+ scale_fill_manual(values= cbp2)
@ -85,7 +93,7 @@ visualize_categories <- function(gconf, cconf, data, view, col, x_breakpoints) {
+ guides( + guides(
fill = guide_legend(title=gtitle, nrow=15) fill = guide_legend(title=gtitle, nrow=15)
) )
+ ylab('JScore') #+ theme(aspect.ratio = 1)
+ xlab('Runtime in minutes') + xlab('Runtime in minutes')
+ theme_linedraw() + theme_linedraw()
#+ theme_classic() #+ theme_classic()
@ -94,7 +102,7 @@ visualize_categories <- function(gconf, cconf, data, view, col, x_breakpoints) {
#legend.text=element_text(size=6), #legend.text=element_text(size=6),
legend.spacing.y = unit(0, 'cm'), legend.spacing.y = unit(0, 'cm'),
#legend.spacing.x = unit(0, 'cm'), #legend.spacing.x = unit(0, 'cm'),
legend.text = element_text(size=8, margin = margin(t = 1)), legend.text = element_text(size = 8, margin = margin(t = 1)),
strip.text.x = element_text(size = 8, color = "black"), strip.text.x = element_text(size = 8, color = "black"),
strip.text.y = element_text(size = 8, color = "black"), strip.text.y = element_text(size = 8, color = "black"),
legend.key = element_rect(size = 1), legend.key = element_rect(size = 1),
@ -104,14 +112,10 @@ visualize_categories <- function(gconf, cconf, data, view, col, x_breakpoints) {
# panel.grid.minor=element_line(size=0.25, color=alpha('black', 0.25)) # panel.grid.minor=element_line(size=0.25, color=alpha('black', 0.25))
) )
) )
# Dimensions modifier
if (col == 'host') { if (col == 'host') {
if (nrow(unique(data['host'])) > 13) { # do nothing
p <- (p +
theme(
legend.position='none'
)
)
}
} }
else if (col == 'metric') { else if (col == 'metric') {
p <- (p p <- (p
@ -119,26 +123,40 @@ visualize_categories <- function(gconf, cconf, data, view, col, x_breakpoints) {
) )
} }
else if (col == 'name') { else if (col == 'name') {
# do nothing
} }
else if (col == 'seg') {
if (view == 'jscore') { # do nothing
fn = sprintf('%s/jscore_%s.png', out_dir, col)
ggsave(fn, width=10, height=2.5)
} }
# View modifiers
if (view == 'default') { if (view == 'default') {
p <- ( p <- (p
p + facet_grid(metric ~ .)
+ facet_grid(metric ~ .) + ylab('Score')
+ ylab('') + theme(
+ ylab('Score') strip.text.y = element_text(angle=0)
+ theme( )
legend.position='none',
#strip.text.x = element_text(angle=0),
strip.text.y = element_text(angle=0),
)
) )
fn = sprintf('%s/default_%s.png', out_dir, col) # Disable legend if dimensions are too large
ggsave(fn, width=7, height=7) if (dims[[col]] > vconf$max_legend_size[[view]][[col]]) {
p <- p + theme (legend.position='none')
}
ggsave(fn, width=vconf$plot_size[[view]][['width']], height=vconf$plot_size[[view]][['height']] * dims[['metric']])
}
else if (view == 'jscore') {
p <- (p
+ ylab('JScore')
+ theme (
strip.text.y = element_text(angle=0),
)
)
# Disable legend if dimensions are too large
if (dims[[col]] > vconf$max_legend_size[[view]][[col]]) {
p <- p + theme (legend.position='none')
}
ggsave(fn, width=vconf$plot_size[[view]][['width']], height=vconf$plot_size[[view]][['height']])
} }
else if (view == 'nscore') { else if (view == 'nscore') {
p <- ( p <- (
@ -146,31 +164,71 @@ visualize_categories <- function(gconf, cconf, data, view, col, x_breakpoints) {
+ facet_grid(host ~ .) + facet_grid(host ~ .)
+ ylab('NScore') + ylab('NScore')
+ theme( + theme(
legend.position='none' strip.text.y = element_text(angle=0),
aspect.ratio = vconf$plot_size$nscore$height / vconf$plot_size$nscore$width,
#legend.position='bottom'
) )
) )
if (col == 'name') { # Disable legend if dimensions are too large
p <- p + theme(legend.position = 'bottom') if (dims[[col]] > vconf$max_legend_size[[view]][[col]]) {
p <- p + theme (legend.position='none')
} }
fn = sprintf('%s/nscore_%s.png', out_dir, col) extra_space = 2
ggsave(fn, width=4, height=4) ggsave(fn, width=vconf$plot_size[[view]][['width']], height=vconf$plot_size[[view]][['height']] * (dims[['host']] + extra_space))
} }
else if (view == 'mscore') { else if (view == 'mscore') {
p <- ( p <- (
p p
+ facet_grid(host ~ metric) + facet_grid(host ~ metric)
#+ coord_fixed(ratio=dims[['host']]/dims[['metric']])
#+ coord_fixed(ratio=dims[['metric']]/dims[['host']])
#+ coord_fixed(ratio=1)
+ ylab('MScore') + ylab('MScore')
+ theme( + theme(
legend.position='none', axis.text.x = element_text(angle=90, hjust=1),
axis.text.x = element_text(angle=90, hjust=1) aspect.ratio = 1,
) )
) )
fn = sprintf('%s/mscore_%s.png', out_dir, col) # Disable legend if dimensions are too large
ggsave(fn, width=8, height=4) if (dims[[col]] > vconf$max_legend_size[[view]][[col]]) {
p <- p + theme (legend.position='none')
}
extra_space = 2
ggsave(fn, width=vconf$plot_size[[view]][['width']] * dims[['metric']], height=vconf$plot_size[[view]][['height']] * (dims[['host']] + extra_space))
} }
} }
# Check if dimensions exceed limits
exceeds_limits <- function(view, dims, graph_config) {
max_dims <- graph_config$max_dimensions[[view]]
if ((dims[['seg']] > max_dims[['seg']])) {
return(T)
}
if (view == 'default') {
if ((dims[['metric']] > max_dims[['metric']])) {
return(T)
}
}
else if (view == 'jscore') {
}
else if (view == 'nscore') {
if ((dims[['host']] > max_dims[['host']])) {
return(T)
}
}
else if (view == 'mscore') {
if ((dims[['host']] > max_dims[['host']]) || dims[['metric']] > max_dims[['metric']]) {
return(T)
}
}
else {
print("Unknown view")
exit(1)
}
return(F)
}
# Create 10 minutes segments # Create 10 minutes segments
@ -179,19 +237,48 @@ cat_data['rmin'] = cat_data['runtime'] / 60 # runtime in minutes
duration = max(ceiling(cat_data['rmin'])) duration = max(ceiling(cat_data['rmin']))
bins = seq(0, duration, graph_config[['seg_size']] ) bins = seq(0, duration, graph_config[['seg_size']] )
d2 <- cat_data %>% d2 <- cat_data %>%
group_by(cat) %>% group_by(cat) %>%
mutate(bin = cut(rmin, breaks = bins, labels = bins[-1])) mutate(seg = cut(rmin, breaks = bins, labels = bins[-1]))
d3 <- d2 %>% d3 <- d2 %>%
group_by(name, metric, host, bin) %>% group_by(name, metric, host, seg) %>%
summarise(score = sum(cat)) summarise(score = sum(cat))
x_breakpoints <- bins[seq(1, length(bins), graph_config[['x_breakpoint_interval']])] dimensions = list()
dimensions[['metric']] <- length(unique(d3$metric))
dimensions[['name']] <- length(unique(d3$name))
dimensions[['host']] <- length(unique(d3$host))
dimensions[['seg']] <- length(unique(d3$seg))
x_breakpoints <- bins[seq(1, length(bins), dimensions[['seg']]/graph_config[['n_x_breakpoints']]+1)]
#x_breakpoints[length(x_breakpoints)+1] <- (dimensions[['seg']]-0)*10
out_dir = sprintf('%s/%d_%d', global[['fig_dir']], config[['jobid']], config[['crypted_jobid']])
dir.create(out_dir, recursive=TRUE)
for (col in graph_config[['cols']]) { for (col in graph_config[['cols']]) {
for (view in graph_config[['views']]) { for (view in graph_config[['views']]) {
visualize_categories (global, config, d3, view, col, x_breakpoints) fn = sprintf('%s/%s_%s.png', out_dir, view, col)
fn_skip = sprintf("%s.skip", fn)
if (exceeds_limits(view, dimensions, graph_config)) {
if (file.exists(fn)) {
file.remove(fn)
}
f_skip<-file(fn_skip)
writeLines(c("dimensions too large"), f_skip)
close(f_skip)
print(sprintf('Skipping %s', fn))
}
else {
if (file.exists(fn_skip)) {
file.remove(fn_skip)
}
print(sprintf('Processing %s', fn))
visualize_categories(fn, global, config, graph_config, d3, view, col, x_breakpoints, dimensions)
}
} }
} }

View File

@ -1,9 +1,19 @@
#!/bin/bash #!/bin/bash
crypted_jobids=( ) crypted_jobids=( )
crypted_jobids=( $crypted_jobids 5024292 ) # ? nodes, high intensity #crypted_jobids=( $crypted_jobids 5024292 ) # ? nodes, high intensity
crypted_jobids=( $crypted_jobids 7488914 ) # ? nodes, high problem_socre, boring #crypted_jobids=( $crypted_jobids 7488914 ) # ? nodes, high problem_socre, boring
crypted_jobids=( $crypted_jobids 4296426 ) # 1 node, uses 2 file systems #crypted_jobids=( $crypted_jobids 4296426 ) # 1 node, uses 2 file systems
crypted_jobids=( ${crypted_jobids[@]} 5024292 )
crypted_jobids=( ${crypted_jobids[@]} 5208298 )
crypted_jobids=( ${crypted_jobids[@]} 5240733 )
crypted_jobids=( ${crypted_jobids[@]} 4583672 )
crypted_jobids=( ${crypted_jobids[@]} 4482783 )
crypted_jobids=( ${crypted_jobids[@]} 4496524 )
crypted_jobids=( ${crypted_jobids[@]} 4296426 )
crypted_jobids=( ${crypted_jobids[@]} 7488914 )
crypted_jobids=( ${crypted_jobids[@]} 7266845 )
set -x set -x
for cjobid in ${crypted_jobids[@]}; do for cjobid in ${crypted_jobids[@]}; do