diff --git a/scripts/visualize.R b/scripts/visualize.R index 2e4573d..c668d7a 100755 --- a/scripts/visualize.R +++ b/scripts/visualize.R @@ -13,14 +13,13 @@ args <- commandArgs(trailingOnly = TRUE) use_python("/mnt/lustre01/work/ku0598/k202107/software/install/python/3.8.0/bin/python3", required=T) source_python("/work/ku0598/k202107/git/mistral-job-evaluation/scripts/jupyter/r_visual_jobs#pickle_reader.py") -global = list() +global = list() global[['source_dir']] = '/work/ku0598/k202107/git/mistral-job-evaluation/data/eval_20200117' global[['eval_dir']] = '../evaluation' global[['fig_dir']] = sprintf('%s/figures/job_visualization', global[['eval_dir']]) global[['key']] = 22897682 config = list() -#config[['crypted_jobid']] = 4296426 config[['crypted_jobid']] = strtoi(args[1]) config[['jobid']] = bitwXor(config[['crypted_jobid']], global[['key']]) config[['cat_fn']] = sprintf("%s/600/cats/%s.json", global[['source_dir']], config[['jobid']]) @@ -28,24 +27,33 @@ config[['raw_fn']] = sprintf('%s/600/jobdata/%s.pkl', global[['source_dir']], co graph_config = list() -# Colorized entities -# "name" : file systems -# "host" : compute nodes -# "metric" : I/O metrics -graph_config[['cols']] = c('metric', 'host', 'name') -#graph_config[['cols']] = c('host', 'name') +# View +graph_config[['cols']] = c('metric', 'host', 'name') # Colorized entities: "name" : file systems; "host" : compute nodes, "metric" : I/O metrics +graph_config[['views']] = c('jscore', 'default', 'nscore', 'mscore') # Enable views: 'default', 'jscore', 'nscore', 'mscore' +#graph_config[['views']] = c('nscore') +graph_config[['n_x_breakpoints']] = 5 # Number of breakpoints on x-axis +graph_config[['seg_size']] = 10 # Segments size in minutes -# Enable views -#'default', 'jscore', 'nscore', 'mscore' -graph_config[['views']] = c('jscore', 'default', 'nscore', 'mscore') -#graph_config[['views']] = c('default') +# Size +graph_config[['plot_size']] = list( + 'default' = list('height'=1, 'width'=10), + 'jscore' = list('height'=3, 'width'=10), + 'nscore' = list('height'=1, 'width'=14), + 'mscore' = list('height'=1, 'width'=1)) -# Set at nth position a label -graph_config[['x_breakpoint_interval']] = 5 - -# Segments size in minutes -graph_config[['seg_size']] = 10 +# Dimensions Limits +graph_config[['max_dimensions']] = list( + 'default' = list('seg'=1000, 'host'=13, 'name'=2, 'metric'=9), + 'jscore' = list('seg'=1000, 'host'=13, 'name'=2, 'metric'=9), + 'nscore' = list('seg'=1000, 'host'=50, 'name'=2, 'metric'=9), + 'mscore' = list('seg'=1000, 'host'=50, 'name'=2, 'metric'=9)) +# Legend Limits +graph_config[['max_legend_size']] = list( + 'default' = list('seg'=1000, 'host'=15, 'name'=2, 'metric'=9), + 'jscore' = list('seg'=1000, 'host'=15, 'name'=2, 'metric'=9), + 'nscore' = list('seg'=1000, 'host'=15, 'name'=2, 'metric'=9), + 'mscore' = list('seg'=1000, 'host'=15, 'name'=2, 'metric'=9)) rename_metrics <- function(data) { data['metric'] <- lapply(data['metric'], gsub, pattern = "host.lustre.", replacement = "", fixed = TRUE) @@ -56,10 +64,8 @@ rename_metrics <- function(data) { } -visualize_categories <- function(gconf, cconf, data, view, col, x_breakpoints) { - out_dir = sprintf('%s/%d_%d', gconf[['fig_dir']], cconf[['jobid']], cconf[['crypted_jobid']]) - dir.create(out_dir, recursive=TRUE) - + +visualize_categories <- function(fn, gconf, cconf, vconf, data, view, col, x_breakpoints, dims) { # Set legend title if (col == 'host') { gtitle = 'Node' @@ -70,14 +76,16 @@ visualize_categories <- function(gconf, cconf, data, view, col, x_breakpoints) { else if (col == 'name') { gtitle = 'File system' } - + + title = sprintf('JOBID: %d / %d (M:H:F:S)=(%d:%d:%d:%d)', cconf$jobid, cconf$crypted_jobid, dims$metric, dims$host, dims$name, dims$seg) # The palette with black: #cbp2 = c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999") # General plot p <- ( - ggplot(data, aes_string(x='bin', y='score', fill=col)) + ggplot(data, aes_string(x='seg', y='score', fill=col)) #+ geom_bar(stat='summary', fun.y = "mean") + + ggtitle(title) + geom_bar(stat='identity') + scale_x_discrete(breaks=x_breakpoints) #+ scale_fill_manual(values= cbp2) @@ -85,7 +93,7 @@ visualize_categories <- function(gconf, cconf, data, view, col, x_breakpoints) { + guides( fill = guide_legend(title=gtitle, nrow=15) ) - + ylab('JScore') + #+ theme(aspect.ratio = 1) + xlab('Runtime in minutes') + theme_linedraw() #+ theme_classic() @@ -94,7 +102,7 @@ visualize_categories <- function(gconf, cconf, data, view, col, x_breakpoints) { #legend.text=element_text(size=6), legend.spacing.y = unit(0, 'cm'), #legend.spacing.x = unit(0, 'cm'), - legend.text = element_text(size=8, margin = margin(t = 1)), + legend.text = element_text(size = 8, margin = margin(t = 1)), strip.text.x = element_text(size = 8, color = "black"), strip.text.y = element_text(size = 8, color = "black"), legend.key = element_rect(size = 1), @@ -104,14 +112,10 @@ visualize_categories <- function(gconf, cconf, data, view, col, x_breakpoints) { # panel.grid.minor=element_line(size=0.25, color=alpha('black', 0.25)) ) ) + + # Dimensions modifier if (col == 'host') { - if (nrow(unique(data['host'])) > 13) { - p <- (p + - theme( - legend.position='none' - ) - ) - } + # do nothing } else if (col == 'metric') { p <- (p @@ -119,26 +123,40 @@ visualize_categories <- function(gconf, cconf, data, view, col, x_breakpoints) { ) } else if (col == 'name') { + # do nothing } - - if (view == 'jscore') { - fn = sprintf('%s/jscore_%s.png', out_dir, col) - ggsave(fn, width=10, height=2.5) + else if (col == 'seg') { + # do nothing } + + + # View modifiers if (view == 'default') { - p <- ( - p - + facet_grid(metric ~ .) - + ylab('') - + ylab('Score') - + theme( - legend.position='none', - #strip.text.x = element_text(angle=0), - strip.text.y = element_text(angle=0), - ) + p <- (p + + facet_grid(metric ~ .) + + ylab('Score') + + theme( + strip.text.y = element_text(angle=0) + ) ) - fn = sprintf('%s/default_%s.png', out_dir, col) - ggsave(fn, width=7, height=7) + # Disable legend if dimensions are too large + if (dims[[col]] > vconf$max_legend_size[[view]][[col]]) { + p <- p + theme (legend.position='none') + } + ggsave(fn, width=vconf$plot_size[[view]][['width']], height=vconf$plot_size[[view]][['height']] * dims[['metric']]) + } + else if (view == 'jscore') { + p <- (p + + ylab('JScore') + + theme ( + strip.text.y = element_text(angle=0), + ) + ) + # Disable legend if dimensions are too large + if (dims[[col]] > vconf$max_legend_size[[view]][[col]]) { + p <- p + theme (legend.position='none') + } + ggsave(fn, width=vconf$plot_size[[view]][['width']], height=vconf$plot_size[[view]][['height']]) } else if (view == 'nscore') { p <- ( @@ -146,31 +164,71 @@ visualize_categories <- function(gconf, cconf, data, view, col, x_breakpoints) { + facet_grid(host ~ .) + ylab('NScore') + theme( - legend.position='none' + strip.text.y = element_text(angle=0), + aspect.ratio = vconf$plot_size$nscore$height / vconf$plot_size$nscore$width, + #legend.position='bottom' ) ) - if (col == 'name') { - p <- p + theme(legend.position = 'bottom') + # Disable legend if dimensions are too large + if (dims[[col]] > vconf$max_legend_size[[view]][[col]]) { + p <- p + theme (legend.position='none') } - fn = sprintf('%s/nscore_%s.png', out_dir, col) - ggsave(fn, width=4, height=4) + extra_space = 2 + ggsave(fn, width=vconf$plot_size[[view]][['width']], height=vconf$plot_size[[view]][['height']] * (dims[['host']] + extra_space)) } else if (view == 'mscore') { p <- ( p + facet_grid(host ~ metric) + #+ coord_fixed(ratio=dims[['host']]/dims[['metric']]) + #+ coord_fixed(ratio=dims[['metric']]/dims[['host']]) + #+ coord_fixed(ratio=1) + ylab('MScore') + theme( - legend.position='none', - axis.text.x = element_text(angle=90, hjust=1) + axis.text.x = element_text(angle=90, hjust=1), + aspect.ratio = 1, ) ) - fn = sprintf('%s/mscore_%s.png', out_dir, col) - ggsave(fn, width=8, height=4) + # Disable legend if dimensions are too large + if (dims[[col]] > vconf$max_legend_size[[view]][[col]]) { + p <- p + theme (legend.position='none') + } + extra_space = 2 + ggsave(fn, width=vconf$plot_size[[view]][['width']] * dims[['metric']], height=vconf$plot_size[[view]][['height']] * (dims[['host']] + extra_space)) } } +# Check if dimensions exceed limits +exceeds_limits <- function(view, dims, graph_config) { + max_dims <- graph_config$max_dimensions[[view]] + if ((dims[['seg']] > max_dims[['seg']])) { + return(T) + } + if (view == 'default') { + if ((dims[['metric']] > max_dims[['metric']])) { + return(T) + } + } + else if (view == 'jscore') { + } + else if (view == 'nscore') { + if ((dims[['host']] > max_dims[['host']])) { + return(T) + } + } + else if (view == 'mscore') { + if ((dims[['host']] > max_dims[['host']]) || dims[['metric']] > max_dims[['metric']]) { + return(T) + } + } + else { + print("Unknown view") + exit(1) + } + return(F) +} + # Create 10 minutes segments @@ -179,19 +237,48 @@ cat_data['rmin'] = cat_data['runtime'] / 60 # runtime in minutes duration = max(ceiling(cat_data['rmin'])) bins = seq(0, duration, graph_config[['seg_size']] ) - d2 <- cat_data %>% group_by(cat) %>% - mutate(bin = cut(rmin, breaks = bins, labels = bins[-1])) + mutate(seg = cut(rmin, breaks = bins, labels = bins[-1])) d3 <- d2 %>% - group_by(name, metric, host, bin) %>% + group_by(name, metric, host, seg) %>% summarise(score = sum(cat)) -x_breakpoints <- bins[seq(1, length(bins), graph_config[['x_breakpoint_interval']])] +dimensions = list() +dimensions[['metric']] <- length(unique(d3$metric)) +dimensions[['name']] <- length(unique(d3$name)) +dimensions[['host']] <- length(unique(d3$host)) +dimensions[['seg']] <- length(unique(d3$seg)) + +x_breakpoints <- bins[seq(1, length(bins), dimensions[['seg']]/graph_config[['n_x_breakpoints']]+1)] +#x_breakpoints[length(x_breakpoints)+1] <- (dimensions[['seg']]-0)*10 + +out_dir = sprintf('%s/%d_%d', global[['fig_dir']], config[['jobid']], config[['crypted_jobid']]) +dir.create(out_dir, recursive=TRUE) + + for (col in graph_config[['cols']]) { for (view in graph_config[['views']]) { - visualize_categories (global, config, d3, view, col, x_breakpoints) + fn = sprintf('%s/%s_%s.png', out_dir, view, col) + fn_skip = sprintf("%s.skip", fn) + + if (exceeds_limits(view, dimensions, graph_config)) { + if (file.exists(fn)) { + file.remove(fn) + } + f_skip<-file(fn_skip) + writeLines(c("dimensions too large"), f_skip) + close(f_skip) + print(sprintf('Skipping %s', fn)) + } + else { + if (file.exists(fn_skip)) { + file.remove(fn_skip) + } + print(sprintf('Processing %s', fn)) + visualize_categories(fn, global, config, graph_config, d3, view, col, x_breakpoints, dimensions) + } } } diff --git a/scripts/visualize_all.sh b/scripts/visualize_all.sh index 27e3586..c78e40e 100755 --- a/scripts/visualize_all.sh +++ b/scripts/visualize_all.sh @@ -1,9 +1,19 @@ #!/bin/bash crypted_jobids=( ) -crypted_jobids=( $crypted_jobids 5024292 ) # ? nodes, high intensity -crypted_jobids=( $crypted_jobids 7488914 ) # ? nodes, high problem_socre, boring -crypted_jobids=( $crypted_jobids 4296426 ) # 1 node, uses 2 file systems +#crypted_jobids=( $crypted_jobids 5024292 ) # ? nodes, high intensity +#crypted_jobids=( $crypted_jobids 7488914 ) # ? nodes, high problem_socre, boring +#crypted_jobids=( $crypted_jobids 4296426 ) # 1 node, uses 2 file systems + +crypted_jobids=( ${crypted_jobids[@]} 5024292 ) +crypted_jobids=( ${crypted_jobids[@]} 5208298 ) +crypted_jobids=( ${crypted_jobids[@]} 5240733 ) +crypted_jobids=( ${crypted_jobids[@]} 4583672 ) +crypted_jobids=( ${crypted_jobids[@]} 4482783 ) +crypted_jobids=( ${crypted_jobids[@]} 4496524 ) +crypted_jobids=( ${crypted_jobids[@]} 4296426 ) +crypted_jobids=( ${crypted_jobids[@]} 7488914 ) +crypted_jobids=( ${crypted_jobids[@]} 7266845 ) set -x for cjobid in ${crypted_jobids[@]}; do