Visualization script and new structure
This commit is contained in:
		
							parent
							
								
									9ce020ae5e
								
							
						
					
					
						commit
						a7fab7d233
					
				
							
								
								
									
										8
									
								
								datasets/decompress.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										8
									
								
								datasets/decompress.sh
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,8 @@ | |||||||
|  | #!/bin/bash | ||||||
|  | 
 | ||||||
|  | filenames=$( ls *.tar.xz ) | ||||||
|  | 
 | ||||||
|  | for filename in ${filenames[@]}; do | ||||||
|  | 	echo "Decompressing ${filename}" | ||||||
|  |     tar -xJf "${filename}"  | ||||||
|  | done | ||||||
							
								
								
									
										11
									
								
								scripts/r_visual_jobs#pickle_reader.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										11
									
								
								scripts/r_visual_jobs#pickle_reader.py
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,11 @@ | |||||||
|  | # Required for job visualization | ||||||
|  | # job_visualization_r.ipynb | ||||||
|  | 
 | ||||||
|  | import pandas as pd | ||||||
|  | 
 | ||||||
|  | def read_pickle_file(file): | ||||||
|  |     pickle_data = pd.read_pickle(file) | ||||||
|  |     start, stop, data, metadata = pickle_data | ||||||
|  |     return data.reset_index() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
							
								
								
									
										208
									
								
								scripts/visualize.R
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										208
									
								
								scripts/visualize.R
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,208 @@ | |||||||
|  | #!/usr/bin/env Rscript | ||||||
|  | 
 | ||||||
|  | library('ggplot2') | ||||||
|  | library('ggthemes') | ||||||
|  | library('tidyverse') | ||||||
|  | library('repr') | ||||||
|  | library('jcolors') | ||||||
|  | library("reticulate") | ||||||
|  | 
 | ||||||
|  | #setwd(source_dir) | ||||||
|  | use_python("/mnt/lustre01/work/ku0598/k202107/software/install/python/3.8.0/bin/python3", required=T) | ||||||
|  | source_python("/work/ku0598/k202107/git/mistral-job-evaluation/scripts/jupyter/r_visual_jobs#pickle_reader.py") | ||||||
|  | 
 | ||||||
|  | global = list() | ||||||
|  | global[['source_dir']] = '/work/ku0598/k202107/git/mistral-job-evaluation/data/eval_20200117' | ||||||
|  | global[['eval_dir']] = '../evaluation' | ||||||
|  | global[['fig_dir']] = sprintf('%s/pictures/jobs', global[['eval_dir']]) | ||||||
|  | global[['key']] = 22897682 | ||||||
|  | 
 | ||||||
|  | config = list() | ||||||
|  | config[['crypted_jobid']] = 4296426 # has 16 levels | ||||||
|  | config[['jobid']] = bitwXor(config[['crypted_jobid']], global[['key']]) | ||||||
|  | config[['cat_fn']] =  sprintf("%s/600/cats/%s.json", global[['source_dir']], config[['jobid']]) | ||||||
|  | config[['raw_fn']] = sprintf('%s/600/jobdata/%s.pkl', global[['source_dir']], config[['jobid']]) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | graph_config = list() | ||||||
|  | # Colorized entities | ||||||
|  | # "name" : file systems | ||||||
|  | # "host" : compute nodes | ||||||
|  | # "metric" : I/O metrics | ||||||
|  | graph_config[['cols']] = c('metric', 'host', 'name') | ||||||
|  | #graph_config[['cols']] = c('host', 'name') | ||||||
|  | 
 | ||||||
|  | # Enable views | ||||||
|  | #'default', 'jscore', 'nscore', 'mscore' | ||||||
|  | graph_config[['views']] = c('jscore', 'default', 'nscore', 'mscore') | ||||||
|  | #graph_config[['views']] = c('default') | ||||||
|  | 
 | ||||||
|  | # Set at nth position a label | ||||||
|  | graph_config[['x_breakpoint_interval']] = 5 | ||||||
|  | 
 | ||||||
|  | # Segments size in minutes | ||||||
|  | graph_config[['seg_size']] = 10 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | rename_metrics <- function(data) { | ||||||
|  |     data['metric'] <- lapply(data['metric'], gsub, pattern = "host.lustre.", replacement = "", fixed = TRUE) | ||||||
|  |     data['metric'] <- lapply(data['metric'], gsub, pattern = "stats.", replacement = "", fixed = TRUE) | ||||||
|  |     data['metric'] <- lapply(data['metric'], gsub, pattern = ".bytes", replacement = "_bytes", fixed = TRUE) | ||||||
|  |     data['metric'] <- lapply(data['metric'], gsub, pattern = ".calls", replacement = "_calls", fixed = TRUE) | ||||||
|  |     return(data) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | visualize_categories <- function(gconf, cconf, data, view, col, x_breakpoints) { | ||||||
|  |     out_dir = sprintf('%s/%d', gconf[['fig_dir']], cconf[['jobid']]) | ||||||
|  |     dir.create(out_dir, recursive=TRUE) | ||||||
|  |     | ||||||
|  |     # Set legend title | ||||||
|  |     if (col == 'host') { | ||||||
|  |         gtitle = 'Node' | ||||||
|  |     } | ||||||
|  |     else if (col == 'metric') { | ||||||
|  |         gtitle = 'Metric' | ||||||
|  |     } | ||||||
|  |     else if (col == 'name') { | ||||||
|  |         gtitle = 'File system' | ||||||
|  |     } | ||||||
|  |      | ||||||
|  |     | ||||||
|  |     # The palette with black: | ||||||
|  |     #cbp2 = c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999") | ||||||
|  |     # General plot | ||||||
|  |     p <- ( | ||||||
|  |         ggplot(data, aes_string(x='bin', y='score', fill=col)) | ||||||
|  |         #+ geom_bar(stat='summary', fun.y = "mean")  | ||||||
|  |         + geom_bar(stat='identity')  | ||||||
|  |         + scale_x_discrete(breaks=x_breakpoints) | ||||||
|  |         #+ scale_fill_manual(values= cbp2) | ||||||
|  |         #+ geom_line(data=dat,aes(x='rmin', y='value', color="Second line")) | ||||||
|  |         + guides( | ||||||
|  |             fill = guide_legend(title=gtitle, nrow=15) | ||||||
|  |         ) | ||||||
|  |         + ylab('JScore') | ||||||
|  |         + xlab('Runtime in minutes') | ||||||
|  |         + theme_linedraw() | ||||||
|  |         #+ theme_classic() | ||||||
|  |         + theme( | ||||||
|  |             #guide_legend.title = element_text('File system'), #element_blank(), | ||||||
|  |             #legend.text=element_text(size=6), | ||||||
|  |             legend.spacing.y = unit(0, 'cm'), | ||||||
|  |             #legend.spacing.x = unit(0, 'cm'), | ||||||
|  |             legend.text = element_text(size=8, margin = margin(t = 1)), | ||||||
|  |             strip.text.x = element_text(size = 8, color = "black"), | ||||||
|  |             strip.text.y = element_text(size = 8, color = "black"), | ||||||
|  |             legend.key = element_rect(size = 1), | ||||||
|  |             legend.key.size = unit(0.5, 'lines'), | ||||||
|  |             strip.background = element_rect(color="black", fill="#FFFFFF", linetype="solid") | ||||||
|  | #             panel.grid.major=element_line(size=0.25, color=alpha('black', 0.25)), | ||||||
|  | #             panel.grid.minor=element_line(size=0.25, color=alpha('black', 0.25)) | ||||||
|  |             ) | ||||||
|  |     ) | ||||||
|  |     if (col == 'host') { | ||||||
|  |         if (nrow(unique(data['host'])) > 13) { | ||||||
|  |             p <- (p + | ||||||
|  |                 theme( | ||||||
|  |                 #legend.position='none' | ||||||
|  |                 ) | ||||||
|  |             ) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     else if (col == 'metric') { | ||||||
|  |         p <- (p  | ||||||
|  |             + scale_fill_jcolors("pal12") | ||||||
|  |         ) | ||||||
|  |     } | ||||||
|  |     else if (col == 'name') { | ||||||
|  |     } | ||||||
|  |      | ||||||
|  |     if (view == 'jscore') { | ||||||
|  |         fn = sprintf('%s/jscore_%s.png', out_dir, col) | ||||||
|  |         ggsave(fn, width=10, height=2.5) | ||||||
|  |     } | ||||||
|  |     if (view == 'default') { | ||||||
|  |         p <- ( | ||||||
|  |             p  | ||||||
|  |             + facet_grid(metric ~ .) | ||||||
|  |             + ylab('') | ||||||
|  |             + ylab('Score') | ||||||
|  |             + theme( | ||||||
|  |                 legend.position='none', | ||||||
|  |                 #strip.text.x = element_text(angle=0), | ||||||
|  |                 strip.text.y = element_text(angle=0), | ||||||
|  |             ) | ||||||
|  |         ) | ||||||
|  |         fn = sprintf('%s/default_%s.png', out_dir, col) | ||||||
|  |         ggsave(fn, width=7, height=7) | ||||||
|  |     } | ||||||
|  |     else if (view == 'nscore') { | ||||||
|  |         p <- ( | ||||||
|  |             p  | ||||||
|  |             + facet_grid(host ~ .) | ||||||
|  |             + ylab('NScore') | ||||||
|  |             + theme( | ||||||
|  |                 #legend.position='none' | ||||||
|  |             ) | ||||||
|  |         ) | ||||||
|  |         if (col == 'name') { | ||||||
|  |             p <- p + theme(legend.position = 'bottom') | ||||||
|  |         } | ||||||
|  |         fn = sprintf('%s/nscore_%s.png', out_dir, col) | ||||||
|  |         ggsave(fn, width=4, height=4) | ||||||
|  |     } | ||||||
|  |     else if (view == 'mscore') { | ||||||
|  |         p <- ( | ||||||
|  |             p  | ||||||
|  |             + facet_grid(host ~ metric) | ||||||
|  |             + ylab('MScore') | ||||||
|  |             + theme( | ||||||
|  |                 legend.position='none', | ||||||
|  |                 axis.text.x = element_text(angle=90, hjust=1) | ||||||
|  |             ) | ||||||
|  |         ) | ||||||
|  |         fn = sprintf('%s/mscore_%s.png', out_dir, col) | ||||||
|  |         ggsave(fn, width=8, height=4) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # Create 10 minutes segments | ||||||
|  | cat_data <- rename_metrics(read.csv(config[['cat_fn']])) # categorized data | ||||||
|  | cat_data['rmin'] = cat_data['runtime'] / 60 # runtime in minutes | ||||||
|  | duration = max(ceiling(cat_data['rmin'])) | ||||||
|  | bins = seq(0, duration, graph_config[['seg_size']] ) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | d2 <- cat_data %>%  | ||||||
|  |     group_by(cat) %>%  | ||||||
|  |     mutate(bin = cut(rmin, breaks = bins, labels = bins[-1]))  | ||||||
|  | d3 <- d2 %>% | ||||||
|  |     group_by(name, metric, host, bin) %>% | ||||||
|  |     summarise(score = sum(cat)) | ||||||
|  | 
 | ||||||
|  | x_breakpoints <- bins[seq(1, length(bins), graph_config[['x_breakpoint_interval']])] | ||||||
|  | 
 | ||||||
|  | for (col in graph_config[['cols']]) { | ||||||
|  |     for (view in graph_config[['views']]) { | ||||||
|  |         visualize_categories (global, config, d3, view, col, x_breakpoints) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | ## TODO | ||||||
|  | #visualize_rawdata <- function(data) { | ||||||
|  | #} | ||||||
|  | 
 | ||||||
|  | #pickle_data <- rename_metrics(read_pickle_file(config[['raw_fn']])) # raw data | ||||||
|  | #print(head(pickle_data)) | ||||||
|  | #offset = min(pickle_data$timestamp) | ||||||
|  | #dat = pickle_data[complete.cases(pickle_data),] | ||||||
|  | #dat$runtime = dat$timestamp - offset | ||||||
|  | #dat['rmin'] = dat['runtime'] / 60 # runtime in minutes | ||||||
|  | 
 | ||||||
|  | #visualize_rawdata(dat) | ||||||
|  | 
 | ||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user