#!/usr/bin/env Rscript library('ggplot2') library('ggthemes') library('tidyverse') library('repr') library('jcolors') library("reticulate") args <- commandArgs(trailingOnly = TRUE) #setwd(source_dir) use_python("/mnt/lustre01/work/ku0598/k202107/software/install/python/3.8.0/bin/python3", required=T) source_python("/work/ku0598/k202107/git/mistral-job-evaluation/scripts/jupyter/r_visual_jobs#pickle_reader.py") global = list() global[['source_dir']] = '/work/ku0598/k202107/git/mistral-job-evaluation/data/eval_20200117' global[['eval_dir']] = '../evaluation' global[['fig_dir']] = sprintf('%s/figures/job_visualization', global[['eval_dir']]) global[['key']] = 22897682 config = list() config[['crypted_jobid']] = strtoi(args[1]) config[['jobid']] = bitwXor(config[['crypted_jobid']], global[['key']]) config[['cat_fn']] = sprintf("%s/600/cats/%s.json", global[['source_dir']], config[['jobid']]) config[['raw_fn']] = sprintf('%s/600/jobdata/%s.pkl', global[['source_dir']], config[['jobid']]) graph_config = list() # View graph_config[['cols']] = c('metric', 'host', 'name') # Colorized entities: "name" : file systems; "host" : compute nodes, "metric" : I/O metrics graph_config[['views']] = c('jscore', 'default', 'nscore', 'mscore') # Enable views: 'default', 'jscore', 'nscore', 'mscore' #graph_config[['views']] = c('nscore') graph_config[['n_x_breakpoints']] = 5 # Number of breakpoints on x-axis graph_config[['seg_size']] = 10 # Segments size in minutes # Size graph_config[['plot_size']] = list( 'default' = list('height'=1, 'width'=10), 'jscore' = list('height'=3, 'width'=10), 'nscore' = list('height'=1, 'width'=14), 'mscore' = list('height'=1, 'width'=1)) # Dimensions Limits graph_config[['max_dimensions']] = list( 'default' = list('seg'=1000, 'host'=13, 'name'=2, 'metric'=9), 'jscore' = list('seg'=1000, 'host'=13, 'name'=2, 'metric'=9), 'nscore' = list('seg'=1000, 'host'=129, 'name'=2, 'metric'=9), 'mscore' = list('seg'=1000, 'host'=129, 'name'=2, 'metric'=9)) # Legend Limits graph_config[['max_legend_size']] = list( 'default' = list('seg'=1000, 'host'=15, 'name'=2, 'metric'=9), 'jscore' = list('seg'=1000, 'host'=15, 'name'=2, 'metric'=9), 'nscore' = list('seg'=1000, 'host'=15, 'name'=2, 'metric'=9), 'mscore' = list('seg'=1000, 'host'=15, 'name'=2, 'metric'=9)) rename_metrics <- function(data) { data['metric'] <- lapply(data['metric'], gsub, pattern = "host.lustre.", replacement = "", fixed = TRUE) data['metric'] <- lapply(data['metric'], gsub, pattern = "stats.", replacement = "", fixed = TRUE) data['metric'] <- lapply(data['metric'], gsub, pattern = ".bytes", replacement = "_bytes", fixed = TRUE) data['metric'] <- lapply(data['metric'], gsub, pattern = ".calls", replacement = "_calls", fixed = TRUE) return(data) } visualize_categories <- function(fn, gconf, cconf, vconf, data, view, col, x_breakpoints, dims) { # Set legend title if (col == 'host') { gtitle = 'Node' } else if (col == 'metric') { gtitle = 'Metric' } else if (col == 'name') { gtitle = 'File system' } title = sprintf('JOBID: %d / %d (M:H:F:S)=(%d:%d:%d:%d)', cconf$jobid, cconf$crypted_jobid, dims$metric, dims$host, dims$name, dims$seg) # The palette with black: #cbp2 = c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999") # General plot p <- ( ggplot(data, aes_string(x='seg', y='score', fill=col)) #+ geom_bar(stat='summary', fun.y = "mean") + ggtitle(title) + geom_bar(stat='identity') + scale_x_discrete(breaks=x_breakpoints) #+ scale_fill_manual(values= cbp2) #+ geom_line(data=dat,aes(x='rmin', y='value', color="Second line")) + guides( fill = guide_legend(title=gtitle, nrow=15) ) #+ theme(aspect.ratio = 1) + xlab('Runtime in minutes') + theme_linedraw() #+ theme_classic() + theme( #guide_legend.title = element_text('File system'), #element_blank(), #legend.text=element_text(size=6), legend.spacing.y = unit(0, 'cm'), #legend.spacing.x = unit(0, 'cm'), legend.text = element_text(size = 8, margin = margin(t = 1)), strip.text.x = element_text(size = 8, color = "black"), strip.text.y = element_text(size = 8, color = "black"), legend.key = element_rect(size = 1), legend.key.size = unit(0.5, 'lines'), strip.background = element_rect(color="black", fill="#FFFFFF", linetype="solid") # panel.grid.major=element_line(size=0.25, color=alpha('black', 0.25)), # panel.grid.minor=element_line(size=0.25, color=alpha('black', 0.25)) ) ) # Dimensions modifier if (col == 'host') { # do nothing } else if (col == 'metric') { p <- (p + scale_fill_jcolors("pal12") ) } else if (col == 'name') { # do nothing } else if (col == 'seg') { # do nothing } # View modifiers if (view == 'default') { p <- (p + facet_grid(metric ~ .) + ylab('Score') + theme( strip.text.y = element_text(angle=0) ) ) # Disable legend if dimensions are too large if (dims[[col]] > vconf$max_legend_size[[view]][[col]]) { p <- p + theme (legend.position='none') } ggsave(fn, width=vconf$plot_size[[view]][['width']], height=vconf$plot_size[[view]][['height']] * dims[['metric']]) } else if (view == 'jscore') { p <- (p + ylab('JScore') + theme ( strip.text.y = element_text(angle=0), ) ) # Disable legend if dimensions are too large if (dims[[col]] > vconf$max_legend_size[[view]][[col]]) { p <- p + theme (legend.position='none') } ggsave(fn, width=vconf$plot_size[[view]][['width']], height=vconf$plot_size[[view]][['height']]) } else if (view == 'nscore') { p <- ( p + facet_grid(host ~ .) + ylab('NScore') + theme( strip.text.y = element_text(angle=0), aspect.ratio = vconf$plot_size$nscore$height / vconf$plot_size$nscore$width, #legend.position='bottom' ) ) # Disable legend if dimensions are too large if (dims[[col]] > vconf$max_legend_size[[view]][[col]]) { p <- p + theme (legend.position='none') } extra_space = 2 ggsave(fn, width=vconf$plot_size[[view]][['width']], height=vconf$plot_size[[view]][['height']] * (dims[['host']] + extra_space)) } else if (view == 'mscore') { p <- ( p + facet_grid(host ~ metric) #+ coord_fixed(ratio=dims[['host']]/dims[['metric']]) #+ coord_fixed(ratio=dims[['metric']]/dims[['host']]) #+ coord_fixed(ratio=1) + ylab('MScore') + theme( axis.text.x = element_text(angle=90, hjust=1), aspect.ratio = 1, ) ) # Disable legend if dimensions are too large if (dims[[col]] > vconf$max_legend_size[[view]][[col]]) { p <- p + theme (legend.position='none') } extra_space = 2 ggsave(fn, width=vconf$plot_size[[view]][['width']] * dims[['metric']], height=vconf$plot_size[[view]][['height']] * (dims[['host']] + extra_space)) } } # Check if dimensions exceed limits exceeds_limits <- function(view, dims, graph_config) { max_dims <- graph_config$max_dimensions[[view]] if ((dims[['seg']] > max_dims[['seg']])) { return(T) } if (view == 'default') { if ((dims[['metric']] > max_dims[['metric']])) { return(T) } } else if (view == 'jscore') { } else if (view == 'nscore') { if ((dims[['host']] > max_dims[['host']])) { return(T) } } else if (view == 'mscore') { if ((dims[['host']] > max_dims[['host']]) || dims[['metric']] > max_dims[['metric']]) { return(T) } } else { print("Unknown view") exit(1) } return(F) } # Create 10 minutes segments cat_data <- rename_metrics(read.csv(config[['cat_fn']])) # categorized data cat_data['rmin'] = cat_data['runtime'] / 60 # runtime in minutes duration = max(ceiling(cat_data['rmin'])) bins = seq(0, duration, graph_config[['seg_size']] ) d2 <- cat_data %>% group_by(cat) %>% mutate(seg = cut(rmin, breaks = bins, labels = bins[-1])) d3 <- d2 %>% group_by(name, metric, host, seg) %>% summarise(score = sum(cat)) dimensions = list() dimensions[['metric']] <- length(unique(d3$metric)) dimensions[['name']] <- length(unique(d3$name)) dimensions[['host']] <- length(unique(d3$host)) dimensions[['seg']] <- length(unique(d3$seg)) x_breakpoints <- bins[seq(1, length(bins), dimensions[['seg']]/graph_config[['n_x_breakpoints']]+1)] #x_breakpoints[length(x_breakpoints)+1] <- (dimensions[['seg']]-0)*10 out_dir = sprintf('%s/%d_%d', global[['fig_dir']], config[['jobid']], config[['crypted_jobid']]) dir.create(out_dir, recursive=TRUE) for (col in graph_config[['cols']]) { for (view in graph_config[['views']]) { fn = sprintf('%s/%s_%s.png', out_dir, view, col) fn_skip = sprintf("%s.skip", fn) if (exceeds_limits(view, dimensions, graph_config)) { if (file.exists(fn)) { file.remove(fn) } f_skip<-file(fn_skip) writeLines(c("dimensions too large"), f_skip) close(f_skip) print(sprintf('Skipping %s', fn)) } else { if (file.exists(fn_skip)) { file.remove(fn_skip) } print(sprintf('Processing %s', fn)) visualize_categories(fn, global, config, graph_config, d3, view, col, x_breakpoints, dimensions) } } } ## TODO #visualize_rawdata <- function(data) { #} #pickle_data <- rename_metrics(read_pickle_file(config[['raw_fn']])) # raw data #print(head(pickle_data)) #offset = min(pickle_data$timestamp) #dat = pickle_data[complete.cases(pickle_data),] #dat$runtime = dat$timestamp - offset #dat['rmin'] = dat['runtime'] / 60 # runtime in minutes #visualize_rawdata(dat)