Merge branch 'master' of http://git.hps.vi4io.org/eugen.betke/mistral-io-datasets into master
This commit is contained in:
		
						commit
						65f8cdb98d
					
				
							
								
								
									
										
											BIN
										
									
								
								datasets/job_codings_v4.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								datasets/job_codings_v4.csv
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| Can't render this file because it is too large. | 
| @ -10,7 +10,7 @@ import matplotlib.cm as cm | ||||
| jobs = sys.argv[1].split(",") | ||||
| prefix = sys.argv[2].split(",") | ||||
| 
 | ||||
| fileformat = ".pdf" | ||||
| fileformat = ".png" | ||||
| 
 | ||||
| print("Plotting the job: " + str(sys.argv[1])) | ||||
| print("Plotting with prefix: " + str(sys.argv[2])) | ||||
| @ -83,7 +83,7 @@ def plot(prefix, header, row): | ||||
|     timeseries = [ float(x) for x in timeseries] | ||||
|     if sum(timeseries) == 0: | ||||
|       continue | ||||
|     timeseries = [ [k, x, s] for (s,x) in zip(timeseries, range(1, len(timeseries))) ] | ||||
|     timeseries = [ [k, x, s] for (s,x) in zip(timeseries, range(0, len(timeseries))) ] | ||||
|     result.extend(timeseries) | ||||
| 
 | ||||
|   if len(result) == 0: | ||||
|  | ||||
							
								
								
									
										147
									
								
								scripts/plot-single-ks-jobs.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										147
									
								
								scripts/plot-single-ks-jobs.py
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,147 @@ | ||||
| #!/usr/bin/env python3 | ||||
| 
 | ||||
| import csv | ||||
| import sys | ||||
| from pandas import DataFrame | ||||
| from pandas import Grouper | ||||
| import seaborn as sns | ||||
| from matplotlib import pyplot | ||||
| import matplotlib.cm as cm | ||||
| 
 | ||||
| jobs = sys.argv[1].split(",") | ||||
| prefix = sys.argv[2].split(",") | ||||
| 
 | ||||
| fileformat = ".png" | ||||
| 
 | ||||
| print("Plotting the job: " + str(sys.argv[1])) | ||||
| print("Plotting with prefix: " + str(sys.argv[2])) | ||||
| 
 | ||||
| 
 | ||||
| # Color map | ||||
| colorMap = { "md_file_create": cm.tab10(0), | ||||
| "md_file_delete": cm.tab10(1), | ||||
| "md_mod": cm.tab10(2), | ||||
| "md_other": cm.tab10(3), | ||||
| "md_read": cm.tab10(4), | ||||
| "read_bytes": cm.tab10(5), | ||||
| "read_calls": cm.tab10(6), | ||||
| "write_bytes": cm.tab10(7), | ||||
| "write_calls": cm.tab10(8) | ||||
| } | ||||
| 
 | ||||
| markerMap = { "md_file_create": "^", | ||||
| "md_file_delete": "v", | ||||
| "md_other": ".", | ||||
| "md_mod": "<", | ||||
| "md_read": ">", | ||||
| "read_bytes": "h", | ||||
| "read_calls": "H", | ||||
| "write_bytes": "D", | ||||
| "write_calls": "d" | ||||
| } | ||||
| 
 | ||||
| linestyleMap = { "md_file_create": ":", | ||||
| "md_file_delete": ":", | ||||
| "md_mod": ":", | ||||
| "md_other": ":", | ||||
| "md_read": ":", | ||||
| "read_bytes": "--", | ||||
| "read_calls": "--", | ||||
| "write_bytes": "-.", | ||||
| "write_calls": "-." | ||||
| } | ||||
| 
 | ||||
| # Plot the timeseries | ||||
| def plot(prefix, header, row): | ||||
|   x = { h : d for (h, d) in zip(header, row)} | ||||
|   jobid = x["jobid"] | ||||
|   del x["jobid"] | ||||
|   result = [] | ||||
|   for k in x: | ||||
|     timeseries = x[k].split(":") | ||||
|     timeseries = [ float(x) for x in timeseries] | ||||
|     if sum(timeseries) == 0: | ||||
|       continue | ||||
|     timeseries = [ [k, x, s] for (s,x) in zip(timeseries, range(0, len(timeseries))) ] | ||||
|     result.extend(timeseries) | ||||
| 
 | ||||
|   if len(result) == 0: | ||||
|     print("Empty job! Cannot plot!") | ||||
|     return | ||||
| 
 | ||||
|   data = DataFrame(result, columns=["metrics", "segment", "value"]) | ||||
|   groups = data.groupby(["metrics"]) | ||||
|   metrics = DataFrame() | ||||
|   labels = [] | ||||
|   colors = [] | ||||
|   style = [] | ||||
|   for name, group in groups: | ||||
|     style.append(linestyleMap[name] + markerMap[name]) | ||||
|     colors.append(colorMap[name]) | ||||
|     if name == "md_file_delete": | ||||
|       name = "file_delete" | ||||
|     if name == "md_file_create": | ||||
|       name = "file_create" | ||||
|     metrics[name] = [x[2] for x in group.values] | ||||
|     labels.append(name) | ||||
| 
 | ||||
|   fsize = (8, 1 + 1.1 * len(labels)) | ||||
|   fsizeFixed = (8, 2) | ||||
|   fsizeHist = (8, 4) | ||||
| 
 | ||||
|   pyplot.close('all') | ||||
| 
 | ||||
|   if len(labels) < 4 : | ||||
|     ax = metrics.plot(legend=True, sharex=True, grid = True,  sharey=True, markersize=10, figsize=fsizeFixed, color=colors, style=style) | ||||
|     ax.set_ylabel("Value") | ||||
|   else: | ||||
|     ax = metrics.plot(subplots=True, legend=False, sharex=True, grid = True,  sharey=True, markersize=10, figsize=fsize, color=colors, style=style) | ||||
|     for (i, l) in zip(range(0, len(labels)), labels): | ||||
|       ax[i].set_ylabel(l) | ||||
| 
 | ||||
|   pyplot.xlabel("Segment number") | ||||
|   pyplot.savefig(prefix + "timeseries" + jobid + fileformat, bbox_inches='tight', dpi=150) | ||||
| 
 | ||||
|   # Create a facetted grid | ||||
|   #g = sns.FacetGrid(tips, col="time", margin_titles=True) | ||||
|   #bins = np.linspace(0, 60, 13) | ||||
|   #g.map(plt.hist, "total_bill", color="steelblue", bins=bins) | ||||
| 
 | ||||
|   ax = metrics.hist(sharex=True, grid = True, sharey=True, figsize=fsizeHist, bins=10) | ||||
|   pyplot.savefig(prefix + "hist" + jobid + fileformat, bbox_inches='tight', dpi=150) | ||||
| 
 | ||||
| 
 | ||||
|   # Plot first 30 segments | ||||
|   if len(timeseries) <= 50: | ||||
|     return | ||||
| 
 | ||||
|   if len(labels) < 4 : | ||||
|     ax = metrics.plot(legend=True, xlim=(0,30), sharex=True, grid = True,  sharey=True, markersize=10, figsize=fsizeFixed, color=colors, style=style) | ||||
|     ax.set_ylabel("Value") | ||||
|   else: | ||||
|     ax = metrics.plot(subplots=True, xlim=(0,30), legend=False, sharex=True, grid = True,  sharey=True, markersize=10, figsize=fsize, color=colors, style=style) | ||||
|     for (i, l) in zip(range(0, len(labels)), labels): | ||||
|       ax[i].set_ylabel(l) | ||||
| 
 | ||||
|   pyplot.xlabel("Segment number") | ||||
|   pyplot.savefig(prefix + "timeseries" + jobid + "-30" + fileformat, bbox_inches='tight', dpi=150) | ||||
| 
 | ||||
| ### end plotting function | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| #with open('job-io-datasets/datasets/job_codings.csv') as csv_file: # EB: old codings | ||||
| with open('./datasets/job_codings_v4.csv') as csv_file: # EB: v3 codings moved to this repo | ||||
|     csv_reader = csv.reader(csv_file, delimiter=',') | ||||
|     line_count = 0 | ||||
|     for row in csv_reader: | ||||
|       if line_count == 0: | ||||
|         header = row | ||||
|         line_count += 1 | ||||
|         continue | ||||
|       job = row[0].strip() | ||||
|       if not job in jobs: | ||||
|         continue | ||||
|       else: | ||||
|         index = jobs.index(job) | ||||
|         plot(prefix[index] + "-ks-" + str(index), header, row) | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user