299 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			R
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			299 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			R
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/env Rscript
 | |
| 
 | |
| library('ggplot2')
 | |
| library('ggthemes')
 | |
| library('tidyverse')
 | |
| library('repr')
 | |
| library('jcolors')
 | |
| library("reticulate")
 | |
| 
 | |
| args <- commandArgs(trailingOnly = TRUE)
 | |
| 
 | |
| #setwd(source_dir)
 | |
| use_python("/mnt/lustre01/work/ku0598/k202107/software/install/python/3.8.0/bin/python3", required=T)
 | |
| source_python("/work/ku0598/k202107/git/mistral-job-evaluation/scripts/jupyter/r_visual_jobs#pickle_reader.py")
 | |
| 
 | |
| global = list() 
 | |
| global[['source_dir']] = '/work/ku0598/k202107/git/mistral-job-evaluation/data/eval_20200117'
 | |
| global[['eval_dir']] = '../evaluation'
 | |
| global[['fig_dir']] = sprintf('%s/figures/job_visualization', global[['eval_dir']])
 | |
| global[['key']] = 22897682
 | |
| 
 | |
| config = list()
 | |
| config[['crypted_jobid']] = strtoi(args[1])
 | |
| config[['jobid']] = bitwXor(config[['crypted_jobid']], global[['key']])
 | |
| config[['cat_fn']] =  sprintf("%s/600/cats/%s.json", global[['source_dir']], config[['jobid']])
 | |
| config[['raw_fn']] = sprintf('%s/600/jobdata/%s.pkl', global[['source_dir']], config[['jobid']])
 | |
| 
 | |
| 
 | |
| graph_config = list()
 | |
| # View
 | |
| graph_config[['cols']] = c('metric', 'host', 'name') # Colorized entities: "name" : file systems; "host" : compute nodes, "metric" : I/O metrics
 | |
| graph_config[['views']] = c('jscore', 'default', 'nscore', 'mscore') # Enable views: 'default', 'jscore', 'nscore', 'mscore'
 | |
| #graph_config[['views']] = c('nscore')
 | |
| graph_config[['n_x_breakpoints']] = 5 # Number of breakpoints on x-axis
 | |
| graph_config[['seg_size']] = 10 # Segments size in minutes
 | |
| 
 | |
| # Size
 | |
| graph_config[['plot_size']] = list(
 | |
|     'default' = list('height'=1, 'width'=10),
 | |
|     'jscore' = list('height'=3, 'width'=10),
 | |
|     'nscore' = list('height'=1, 'width'=14),
 | |
|     'mscore' = list('height'=1, 'width'=1))
 | |
| 
 | |
| # Dimensions Limits
 | |
| graph_config[['max_dimensions']] = list(
 | |
|     'default' = list('seg'=1000, 'host'=13, 'name'=2, 'metric'=9),
 | |
|     'jscore' = list('seg'=1000, 'host'=13, 'name'=2, 'metric'=9),
 | |
|     'nscore' = list('seg'=1000, 'host'=50, 'name'=2, 'metric'=9),
 | |
|     'mscore' = list('seg'=1000, 'host'=50, 'name'=2, 'metric'=9))
 | |
| 
 | |
| # Legend Limits
 | |
| graph_config[['max_legend_size']] = list(
 | |
|     'default' = list('seg'=1000, 'host'=15, 'name'=2, 'metric'=9),
 | |
|     'jscore' = list('seg'=1000, 'host'=15, 'name'=2, 'metric'=9),
 | |
|     'nscore' = list('seg'=1000, 'host'=15, 'name'=2, 'metric'=9),
 | |
|     'mscore' = list('seg'=1000, 'host'=15, 'name'=2, 'metric'=9))
 | |
| 
 | |
| rename_metrics <- function(data) {
 | |
|     data['metric'] <- lapply(data['metric'], gsub, pattern = "host.lustre.", replacement = "", fixed = TRUE)
 | |
|     data['metric'] <- lapply(data['metric'], gsub, pattern = "stats.", replacement = "", fixed = TRUE)
 | |
|     data['metric'] <- lapply(data['metric'], gsub, pattern = ".bytes", replacement = "_bytes", fixed = TRUE)
 | |
|     data['metric'] <- lapply(data['metric'], gsub, pattern = ".calls", replacement = "_calls", fixed = TRUE)
 | |
|     return(data)
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| visualize_categories <- function(fn, gconf, cconf, vconf, data, view, col, x_breakpoints, dims) {
 | |
|     # Set legend title
 | |
|     if (col == 'host') {
 | |
|         gtitle = 'Node'
 | |
|     }
 | |
|     else if (col == 'metric') {
 | |
|         gtitle = 'Metric'
 | |
|     }
 | |
|     else if (col == 'name') {
 | |
|         gtitle = 'File system'
 | |
|     }
 | |
| 
 | |
|     title = sprintf('JOBID: %d / %d (M:H:F:S)=(%d:%d:%d:%d)', cconf$jobid, cconf$crypted_jobid, dims$metric, dims$host, dims$name, dims$seg)
 | |
|    
 | |
|     # The palette with black:
 | |
|     #cbp2 = c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999")
 | |
|     # General plot
 | |
|     p <- (
 | |
|         ggplot(data, aes_string(x='seg', y='score', fill=col))
 | |
|         #+ geom_bar(stat='summary', fun.y = "mean") 
 | |
|         + ggtitle(title)
 | |
|         + geom_bar(stat='identity') 
 | |
|         + scale_x_discrete(breaks=x_breakpoints)
 | |
|         #+ scale_fill_manual(values= cbp2)
 | |
|         #+ geom_line(data=dat,aes(x='rmin', y='value', color="Second line"))
 | |
|         + guides(
 | |
|             fill = guide_legend(title=gtitle, nrow=15)
 | |
|         )
 | |
|         #+ theme(aspect.ratio = 1)
 | |
|         + xlab('Runtime in minutes')
 | |
|         + theme_linedraw()
 | |
|         #+ theme_classic()
 | |
|         + theme(
 | |
|             #guide_legend.title = element_text('File system'), #element_blank(),
 | |
|             #legend.text=element_text(size=6),
 | |
|             legend.spacing.y = unit(0, 'cm'),
 | |
|             #legend.spacing.x = unit(0, 'cm'),
 | |
|             legend.text = element_text(size = 8, margin = margin(t = 1)),
 | |
|             strip.text.x = element_text(size = 8, color = "black"),
 | |
|             strip.text.y = element_text(size = 8, color = "black"),
 | |
|             legend.key = element_rect(size = 1),
 | |
|             legend.key.size = unit(0.5, 'lines'),
 | |
|             strip.background = element_rect(color="black", fill="#FFFFFF", linetype="solid")
 | |
| #             panel.grid.major=element_line(size=0.25, color=alpha('black', 0.25)),
 | |
| #             panel.grid.minor=element_line(size=0.25, color=alpha('black', 0.25))
 | |
|             )
 | |
|     )
 | |
| 
 | |
|     # Dimensions modifier
 | |
|     if (col == 'host') {
 | |
|         # do nothing
 | |
|     }
 | |
|     else if (col == 'metric') {
 | |
|         p <- (p 
 | |
|             + scale_fill_jcolors("pal12")
 | |
|         )
 | |
|     }
 | |
|     else if (col == 'name') {
 | |
|         # do nothing
 | |
|     }
 | |
|     else if (col == 'seg') {
 | |
|         # do nothing
 | |
|     }
 | |
|    
 | |
| 
 | |
|     # View modifiers
 | |
|     if (view == 'default') {
 | |
|         p <- (p 
 | |
|               + facet_grid(metric ~ .)
 | |
|               + ylab('Score')
 | |
|               + theme(
 | |
|                       strip.text.y = element_text(angle=0)
 | |
|               )
 | |
|         )
 | |
|         # Disable legend if dimensions are too large
 | |
|         if (dims[[col]] > vconf$max_legend_size[[view]][[col]]) {
 | |
|             p <- p + theme (legend.position='none')
 | |
|         }
 | |
|         ggsave(fn, width=vconf$plot_size[[view]][['width']], height=vconf$plot_size[[view]][['height']] * dims[['metric']])
 | |
|     }
 | |
|     else if (view == 'jscore') {
 | |
|         p <- (p 
 | |
|               + ylab('JScore')
 | |
|               + theme (
 | |
|                        strip.text.y = element_text(angle=0),
 | |
|               )
 | |
|         )
 | |
|         # Disable legend if dimensions are too large
 | |
|         if (dims[[col]] > vconf$max_legend_size[[view]][[col]]) {
 | |
|             p <- p + theme (legend.position='none')
 | |
|         }
 | |
|         ggsave(fn, width=vconf$plot_size[[view]][['width']], height=vconf$plot_size[[view]][['height']])
 | |
|     }
 | |
|     else if (view == 'nscore') {
 | |
|         p <- (
 | |
|             p 
 | |
|             + facet_grid(host ~ .)
 | |
|             + ylab('NScore')
 | |
|             + theme(
 | |
|                     strip.text.y = element_text(angle=0),
 | |
|                     aspect.ratio = vconf$plot_size$nscore$height / vconf$plot_size$nscore$width,
 | |
|                     #legend.position='bottom'
 | |
|             )
 | |
|         )
 | |
|         # Disable legend if dimensions are too large
 | |
|         if (dims[[col]] > vconf$max_legend_size[[view]][[col]]) {
 | |
|             p <- p + theme (legend.position='none')
 | |
|         }
 | |
|         extra_space = 2
 | |
|         ggsave(fn, width=vconf$plot_size[[view]][['width']], height=vconf$plot_size[[view]][['height']] * (dims[['host']] + extra_space))
 | |
|     }
 | |
|     else if (view == 'mscore') {
 | |
|         p <- (
 | |
|             p 
 | |
|             + facet_grid(host ~ metric)
 | |
|             #+ coord_fixed(ratio=dims[['host']]/dims[['metric']])
 | |
|             #+ coord_fixed(ratio=dims[['metric']]/dims[['host']])
 | |
|             #+ coord_fixed(ratio=1)
 | |
|             + ylab('MScore')
 | |
|             + theme(
 | |
|                 axis.text.x = element_text(angle=90, hjust=1),
 | |
|                 aspect.ratio = 1,
 | |
|             )
 | |
|         )
 | |
|         # Disable legend if dimensions are too large
 | |
|         if (dims[[col]] > vconf$max_legend_size[[view]][[col]]) {
 | |
|             p <- p + theme (legend.position='none')
 | |
|         }
 | |
|         extra_space = 2
 | |
|         ggsave(fn, width=vconf$plot_size[[view]][['width']] * dims[['metric']], height=vconf$plot_size[[view]][['height']] * (dims[['host']] + extra_space))
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| # Check if dimensions exceed limits
 | |
| exceeds_limits <- function(view, dims, graph_config) {
 | |
|     max_dims <- graph_config$max_dimensions[[view]]
 | |
|     if ((dims[['seg']] > max_dims[['seg']])) {
 | |
|         return(T)
 | |
|     }
 | |
|     if (view == 'default') {
 | |
|         if ((dims[['metric']] > max_dims[['metric']])) {
 | |
|             return(T)
 | |
|         }
 | |
|     }
 | |
|     else if (view == 'jscore') {
 | |
|     }
 | |
|     else if (view == 'nscore') {
 | |
|         if ((dims[['host']] > max_dims[['host']])) {
 | |
|             return(T)
 | |
|         }
 | |
|     }
 | |
|     else if (view == 'mscore') {
 | |
|         if ((dims[['host']] > max_dims[['host']]) || dims[['metric']] > max_dims[['metric']]) {
 | |
|             return(T)
 | |
|         }
 | |
|     }
 | |
|     else {
 | |
|         print("Unknown view")
 | |
|         exit(1)
 | |
|     }
 | |
|     return(F)
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| # Create 10 minutes segments
 | |
| cat_data <- rename_metrics(read.csv(config[['cat_fn']])) # categorized data
 | |
| cat_data['rmin'] = cat_data['runtime'] / 60 # runtime in minutes
 | |
| duration = max(ceiling(cat_data['rmin']))
 | |
| bins = seq(0, duration, graph_config[['seg_size']] )
 | |
| 
 | |
| d2 <- cat_data %>% 
 | |
|     group_by(cat) %>% 
 | |
|     mutate(seg = cut(rmin, breaks = bins, labels = bins[-1])) 
 | |
| d3 <- d2 %>%
 | |
|     group_by(name, metric, host, seg) %>%
 | |
|     summarise(score = sum(cat))
 | |
| 
 | |
| dimensions = list()
 | |
| dimensions[['metric']] <- length(unique(d3$metric))
 | |
| dimensions[['name']] <- length(unique(d3$name))
 | |
| dimensions[['host']] <- length(unique(d3$host))
 | |
| dimensions[['seg']] <- length(unique(d3$seg))
 | |
| 
 | |
| x_breakpoints <- bins[seq(1, length(bins), dimensions[['seg']]/graph_config[['n_x_breakpoints']]+1)]
 | |
| #x_breakpoints[length(x_breakpoints)+1] <- (dimensions[['seg']]-0)*10
 | |
| 
 | |
| out_dir = sprintf('%s/%d_%d', global[['fig_dir']], config[['jobid']], config[['crypted_jobid']])
 | |
| dir.create(out_dir, recursive=TRUE)
 | |
| 
 | |
| 
 | |
| 
 | |
| for (col in graph_config[['cols']]) {
 | |
|     for (view in graph_config[['views']]) {
 | |
|         fn = sprintf('%s/%s_%s.png', out_dir, view, col)
 | |
|         fn_skip = sprintf("%s.skip", fn)
 | |
| 
 | |
|         if (exceeds_limits(view, dimensions, graph_config)) {
 | |
|             if (file.exists(fn)) {
 | |
|                 file.remove(fn)
 | |
|             }
 | |
|             f_skip<-file(fn_skip)
 | |
|             writeLines(c("dimensions too large"), f_skip)
 | |
|             close(f_skip)
 | |
|            print(sprintf('Skipping %s', fn))
 | |
|         }
 | |
|         else {
 | |
|             if (file.exists(fn_skip)) {
 | |
|                 file.remove(fn_skip)
 | |
|             }
 | |
|             print(sprintf('Processing %s', fn))
 | |
|             visualize_categories(fn, global, config, graph_config, d3, view, col, x_breakpoints, dimensions)
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| ## TODO
 | |
| #visualize_rawdata <- function(data) {
 | |
| #}
 | |
| 
 | |
| #pickle_data <- rename_metrics(read_pickle_file(config[['raw_fn']])) # raw data
 | |
| #print(head(pickle_data))
 | |
| #offset = min(pickle_data$timestamp)
 | |
| #dat = pickle_data[complete.cases(pickle_data),]
 | |
| #dat$runtime = dat$timestamp - offset
 | |
| #dat['rmin'] = dat['runtime'] / 60 # runtime in minutes
 | |
| 
 | |
| #visualize_rawdata(dat)
 | |
| 
 |