From 0bb2d52f582880a4af9d0b0611b692e74e454d78 Mon Sep 17 00:00:00 2001 From: Eugen Betke Date: Wed, 24 Oct 2018 16:54:18 +0200 Subject: [PATCH] Analysis scripts --- eval_analysis.R | 76 ++++++++++++++++++++++++++------------------ eval_runtime.R | 76 ++++++++++++++++++++++++++++++++++++++++++++ mkdb.py | 77 +++++++++++++++++++-------------------------- output_converter.sh | 58 ++++++++++++++++++++++++++++++++++ 4 files changed, 212 insertions(+), 75 deletions(-) create mode 100755 eval_runtime.R create mode 100755 output_converter.sh diff --git a/eval_analysis.R b/eval_analysis.R index afe7416dc..c1cd0b36d 100755 --- a/eval_analysis.R +++ b/eval_analysis.R @@ -53,6 +53,9 @@ dims_list = data.frame(h, w, event) # df is a data frame +for (scale in c("linear", "logarithmic")) { + + fss = unique(dbdata$fs) for (fs in fss) { @@ -69,55 +72,66 @@ print(api) for (app in apps) { data3 = data2[app == data2$app, ] -types = unique(data3$type) +iotypes = unique(data3$iotype) print(app) -for (type in types) { -data = data3[type == data3$type, ] +for (iotype in iotypes) { +data = data3[iotype == data3$iotype, ] -print(type) +print(iotype) - ggplot(data=data, aes(x=nn, y=write, colour=as.factor(blocksize/1024), group=blocksize), ymin=0) + + p = ggplot(data=data, aes(x=nn, y=perf, colour=as.factor(blocksize/1024), group=blocksize), ymin=0) + #ggtitle("Write") + - facet_grid(ppn ~ ., labeller = labeller(nn = as_labeller(nn_lab), ppn = as_labeller(ppn_lab))) + + facet_grid(ppn ~ accesstype + striping, labeller = labeller(nn = as_labeller(nn_lab), ppn = as_labeller(ppn_lab))) + xlab("Nodes") + ylab("Performance in MiB/s") + theme(axis.text.x=element_text(angle=90, hjust=0.95, vjust=0.5)) + theme(legend.position="bottom") + - #scale_y_log10() + - scale_x_continuous(breaks = c(unique(data$nn))) + + #scale_x_continuous(breaks = c(unique(data$nn))) + + scale_x_log10(breaks = c(unique(data$nn))) + scale_color_manual(name="Blocksize in KiB: ", values=c('#999999','#E69F00', '#56B4E9', '#000000'), breaks=sort(unique(data$blocksize)/1024)) + #stat_summary(fun.y="median", geom="line", aes(group=factor(blocksize))) + stat_summary(fun.y="mean", geom="line", aes(group=factor(blocksize))) + #geom_boxplot() geom_point() - filename_eps = sprintf("%s/performance_%s_%s_%s_%s_%s.eps", folder_out, app, fs, api, type, "write") - filename_png = sprintf("%s/performance_%s_%s_%s_%s_%s.png", folder_out, app, fs, api, type, "write") + + if ( "logarithmic" == scale ) { + p = p + scale_y_log10() + } + + filename_eps = sprintf("%s/performance_%s_%s_%s_%s_%s_%s.eps", folder_out, app, fs, api, iotype, "write", scale) + filename_png = sprintf("%s/performance_%s_%s_%s_%s_%s_%s.png", folder_out, app, fs, api, iotype, "write", scale) ggsave(filename_png, width = 6, height = 10) ggsave(filename_eps, width = 6, height = 10) - system(sprintf("epstopdf %s", filename_eps)) + #system(sprintf("epstopdf %s", filename_eps)) system(sprintf("rm %s", filename_eps)) - ggplot(data=data, aes(x=nn, y=read, colour=as.factor(blocksize/1024), group=blocksize), ymin=0) + - #ggtitle("Read") + - facet_grid(ppn ~ ., labeller = labeller(nn = as_labeller(nn_lab), ppn = as_labeller(ppn_lab))) + - xlab("Nodes") + - ylab("Performance in MiB/s") + - theme(axis.text.x=element_text(angle=90, hjust=0.95, vjust=0.5)) + - theme(legend.position="bottom") + - #scale_y_log10() + - scale_x_continuous(breaks = c(unique(data$nn))) + - scale_color_manual(name="Blocksize in KiB: ", values=c('#999999','#E69F00', '#56B4E9', '#000000'), breaks=sort(unique(data$blocksize)/1024)) + - #stat_summary(fun.y="median", geom="line", aes(group=factor(blocksize))) + - stat_summary(fun.y="mean", geom="line", aes(group=factor(blocksize))) + - #geom_boxplot() - geom_point() - filename_eps = sprintf("%s/performance_%s_%s_%s_%s_%s.eps", folder_out, app, fs, api, type, "read") - filename_png = sprintf("%s/performance_%s_%s_%s_%s_%s.png", folder_out, app, fs, api, type, "read") - ggsave(filename_png, width = 6, height = 10) - ggsave(filename_eps, width = 6, height = 10) - system(sprintf("epstopdf %s", filename_eps)) - system(sprintf("rm %s", filename_eps)) + #p = ggplot(data=data, aes(x=nn, y=read, colour=as.factor(blocksize/1024), group=blocksize), ymin=0) + + # #ggtitle("Read") + + # facet_grid(ppn ~ ., labeller = labeller(nn = as_labeller(nn_lab), ppn = as_labeller(ppn_lab))) + + # xlab("Nodes") + + # ylab("Performance in MiB/s") + + # theme(axis.text.x=element_text(angle=90, hjust=0.95, vjust=0.5)) + + # theme(legend.position="bottom") + + # #scale_x_continuous(breaks = c(unique(data$nn))) + + # scale_x_log10(breaks = c(unique(data$nn))) + + # scale_color_manual(name="Blocksize in KiB: ", values=c('#999999','#E69F00', '#56B4E9', '#000000'), breaks=sort(unique(data$blocksize)/1024)) + + # #stat_summary(fun.y="median", geom="line", aes(group=factor(blocksize))) + + # stat_summary(fun.y="mean", geom="line", aes(group=factor(blocksize))) + + # #geom_boxplot() + # geom_point() + + #if ( "logarithmic" == scale ) { + # p = p + scale_y_log10() + #} + + #filename_eps = sprintf("%s/performance_%s_%s_%s_%s_%s_%s.eps", folder_out, app, fs, api, type, "read", scale) + #filename_png = sprintf("%s/performance_%s_%s_%s_%s_%s_%s.png", folder_out, app, fs, api, type, "read", scale) + #ggsave(filename_png, width = 3, height = 10) + #ggsave(filename_eps, width = 3, height = 10) + ##system(sprintf("epstopdf %s", filename_eps)) + #system(sprintf("rm %s", filename_eps)) }}}} +} diff --git a/eval_runtime.R b/eval_runtime.R new file mode 100755 index 000000000..5919cf851 --- /dev/null +++ b/eval_runtime.R @@ -0,0 +1,76 @@ +#!/usr/bin/env Rscript + +library(sqldf) +library(plyr) +library(plot3D) +library(ggplot2) + + +args = commandArgs(trailingOnly=TRUE) +print(args) +if (2 != length(args)) { + print("Requires 2 parameters)") + q() +} + +file_db = args[1] +folder_out = args[2] +print(file_db) + +make_facet_label <- function(variable, value){ + return(paste0(value, " KiB")) +} + + +#connection = dbConnect(SQLite(), dbname='results.ddnime.db') +print(file_db) +connection = dbConnect(SQLite(), dbname=file_db) + +#dbdata = dbGetQuery(connection,'select mnt, siox, avg(duration) as ad, app, procs, blocksize from p group by mnt, siox, procs, blocksize, app') +#dbdata = dbGetQuery(connection,'select * from p where tag=="mpio-individual"') +#dbdata = dbGetQuery(connection,'select *, (x*y*z) as blocksize from p where count=8') +#dbdata = dbGetQuery(connection,'select * from p where count<5') +dbdata = dbGetQuery(connection,'select * from p') +dbdata[,"blocksize"] = dbdata$tsize + + +summary(dbdata) + +nn_lab <- sprintf(fmt="NN=%d", unique(dbdata$nn)) +names(nn_lab) <- unique(dbdata$nn) +ppn_lab <- sprintf(fmt="PPN=%d", unique(dbdata$ppn)) +names(ppn_lab) <- unique(dbdata$ppn) +breaks <- c(unique(dbdata$blocksize)) + + + + + #p = ggplot(data=dbdata, aes(x=nn, y=rio, colour=as.factor(blocksize/1024), group=blocksize), ymin=0) + + p = ggplot(data=dbdata) + + #ggtitle("Read") + + #facet_grid(ppn ~ ., labeller = labeller(nn = as_labeller(nn_lab), ppn = as_labeller(ppn_lab))) + + #xlab("Nodes") + + #ylab("Performance in MiB/s") + + #theme(axis.text.x=element_text(angle=90, hjust=0.95, vjust=0.5)) + + #theme(legend.position="bottom") + + scale_x_continuous(breaks = c(60, 120)) + + #scale_x_log10(breaks = c(unique(data$nn))) + + scale_color_manual(name="Blocksize in KiB: ", values=c('#999999','#E69F00', '#56B4E9', '#000000'), breaks=sort(unique(dbdata$blocksize)/1024)) + + #stat_summary(fun.y="median", geom="line", aes(group=factor(blocksize))) + + #stat_summary(fun.y="mean", geom="line", aes(group=factor(blocksize))) + + #geom_boxplot() + geom_histogram(binwidth=1, aes(wio, fill="red")) + + geom_histogram(binwidth=1, aes(rio, fill="blue")) + #geom_density(aes(wio, color="blue")) + + #geom_density(aes(rio, color="red")) + #geom_freqpoly(binwidth=4) + + + filename_eps = sprintf("%s/runtime.eps", folder_out) + filename_png = sprintf("%s/runtime.png", folder_out) + + ggsave(filename_png, width = 10, height = 3) + ggsave(filename_eps, width = 10, height = 3) + #system(sprintf("epstopdf %s", filename_eps)) + system(sprintf("rm %s", filename_eps)) + diff --git a/mkdb.py b/mkdb.py index 4b07d3420..f943f28cc 100755 --- a/mkdb.py +++ b/mkdb.py @@ -25,7 +25,9 @@ def parse(filename, conn): for line in f: #COUNT:1#NN:1#PPN:4#API:POSIX#T:10485760.txt - m = re.match("COUNT:([0-9]+)#NN:([0-9]+)#PPN:([0-9]+)#API:([\w]+)#T:([0-9]+).txt", os.path.basename(filename)) + #merged_output/COUNT:1#NN:8#PPN:8#API:POSIX#T:16384#APP:ior-default#FS:lustre#IOTYPE:random#ACCESSTYPE:write#STRIPING:yes.txt + + m = re.match("COUNT:([0-9]+)#NN:([0-9]+)#PPN:([0-9]+)#API:([\w]+)#T:([0-9]+)#APP:([-\w]+)#FS:([\w]+)#IOTYPE:([\w]+)#ACCESSTYPE:([\w]+)#STRIPING:([\w]+).txt", os.path.basename(filename)) if (m): metadata["count"] = int(m.group(1)) @@ -33,9 +35,11 @@ def parse(filename, conn): metadata["ppn"] = int(m.group(3)) metadata["api"] = m.group(4) metadata["tsize"] = m.group(5) - metadata["fs"] = "lustre" - metadata["app"] = "ior-default" - metadata["type"] = "random" + metadata["app"] = m.group(6) + metadata["fs"] = m.group(7) + metadata["iotype"] = m.group(8) + metadata["accesstype"] = m.group(9) + metadata["striping"] = m.group(10) else: print('couldn\'t parse', os.path.basename(filename)) @@ -51,34 +55,22 @@ def parse(filename, conn): if (m): metadata["fsize_ctl"] = m.group(1) - m = re.match("read[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]*$", line) + m = re.match("(read|write)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]*$", line) if (m): - if m.group(8) not in data: - data[m.group(8)] = dict() - data[m.group(8)]["read"] = float(m.group(1)) - data[m.group(8)]["ropen"] = float(m.group(4)) - data[m.group(8)]["rio"] = float(m.group(5)) - data[m.group(8)]["rclose"] = float(m.group(6)) - data[m.group(8)]["rtotal"] = float(m.group(7)) - data[m.group(8)]["riter"] = float(m.group(8)) - data[m.group(8)].update(metadata) + if m.group(9) not in data: + data[m.group(9)] = dict() + data[m.group(9)]["perf"] = float(m.group(2)) + data[m.group(9)]["open"] = float(m.group(5)) + data[m.group(9)]["io"] = float(m.group(6)) + data[m.group(9)]["close"] = float(m.group(7)) + data[m.group(9)]["total"] = float(m.group(8)) + data[m.group(9)]["iter"] = float(m.group(9)) + data[m.group(9)].update(metadata) - m = re.match("write[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]*$", line) - if (m): - if m.group(8) not in data: - data[m.group(8)] = dict() - data[m.group(8)] = {} - data[m.group(8)]["write"] = float(m.group(1)) - data[m.group(8)]["wopen"] = float(m.group(4)) - data[m.group(8)]["wio"] = float(m.group(5)) - data[m.group(8)]["wclose"] = float(m.group(6)) - data[m.group(8)]["wtotal"] = float(m.group(7)) - data[m.group(8)]["witer"] = float(m.group(8)) - data[m.group(8)].update(metadata) for iteration,entry in data.items(): - if len(entry) == 22: + if len(entry) == 18: print("Success") columns = ", ".join(entry.keys()) placeholders = ':' + ', :'.join(entry.keys()) @@ -102,32 +94,29 @@ try: tbl = 'CREATE TABLE p (\ filename text, \ count int, \ - app text, \ nn int, \ ppn int, \ api text, \ - fs text, \ - type text, \ tsize float, \ + app text, \ + fs text, \ + iotype text, \ + accesstype text, \ + striping text, \ fsize float, \ fsize_ctl txt, \ - ropen float, \ - rio float, \ - rclose float, \ - rtotal float, \ - read float, \ - riter float, \ - wopen float, \ - wio float, \ - wclose float, \ - wtotal float, \ - write float, \ - witer float, \ - primary key(filename, witer, riter) \ + open float, \ + io float, \ + close float, \ + total float, \ + perf float, \ + iter float, \ + primary key(filename, iter) \ )' conn.execute(tbl) -except: +except Exception as e: print("could not create db") + print(e) diff --git a/output_converter.sh b/output_converter.sh new file mode 100755 index 000000000..3946cfb06 --- /dev/null +++ b/output_converter.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +find "./output_v2" -type f -name "*.txt" -print0 | + while IFS= read -r -d $'\0' fn; do + headln="$(grep Finished $fn -n | head -n 1 | cut -d":" -f 1)" + totalln=$(wc -l $fn | cut -f 1 -d" ") + tailln=$(($totalln - $headln)) + + echo $fn $headln $tailln $totalln + bn=$(basename $fn) + + extension="${bn##*.}" + filename="${bn%.*}" + + count=$(awk -F# '{print $1}' <<< $filename) + nn=$(awk -F# '{print $2}' <<< $filename) + ppn=$(awk -F# '{print $3}' <<< $filename) + api=$(awk -F# '{print $4}' <<< $filename) + t=$(awk -F# '{print $5}' <<< $filename) + + FNEXT="$count#$nn#$ppn#$api#$t#APP:ior-default#FS:lustre#IOTYPE:random#ACCESSTYPE:read#STRIPING:yes" + + cp $fn "merged_output/$FNEXT.$extension" + done + + +find "./output" -type f -name "*.txt" -print0 | + while IFS= read -r -d $'\0' fn; do + headln="$(grep Finished $fn -n | head -n 1 | cut -d":" -f 1)" + totalln=$(wc -l $fn | cut -f 1 -d" ") + tailln=$(($totalln - $headln)) + + echo $fn $headln $tailln $totalln + bn=$(basename $fn) + + extension="${bn##*.}" + filename="${bn%.*}" + + STRIPING="yes" + if [[ "" != $(echo $bn| grep MPIIO) ]]; then + STRIPING="no" + fi + + FNEXT="APP:ior-default#FS:lustre#IOTYPE:random" + + head -n $headln $fn > "merged_output/$filename#$FNEXT#ACCESSTYPE:write#STRIPING:yes.$extension" + + outfnread="merged_output/$filename#$FNEXT#ACCESSTYPE:read#STRIPING:$STRIPING.$extension" + + if [ -e $outfnread ]; then + echo "stopping $outfnread, already exists" + exit + fi + + tail -n $tailln $fn > $outfnread + done + +