Analysis scripts

This commit is contained in:
Eugen Betke 2018-10-24 16:54:18 +02:00
parent 67ae1ba065
commit 0bb2d52f58
4 changed files with 212 additions and 75 deletions

View File

@ -53,6 +53,9 @@ dims_list = data.frame(h, w, event) # df is a data frame
for (scale in c("linear", "logarithmic")) {
fss = unique(dbdata$fs)
for (fs in fss) {
@ -69,55 +72,66 @@ print(api)
for (app in apps) {
data3 = data2[app == data2$app, ]
types = unique(data3$type)
iotypes = unique(data3$iotype)
print(app)
for (type in types) {
data = data3[type == data3$type, ]
for (iotype in iotypes) {
data = data3[iotype == data3$iotype, ]
print(type)
print(iotype)
ggplot(data=data, aes(x=nn, y=write, colour=as.factor(blocksize/1024), group=blocksize), ymin=0) +
p = ggplot(data=data, aes(x=nn, y=perf, colour=as.factor(blocksize/1024), group=blocksize), ymin=0) +
#ggtitle("Write") +
facet_grid(ppn ~ ., labeller = labeller(nn = as_labeller(nn_lab), ppn = as_labeller(ppn_lab))) +
facet_grid(ppn ~ accesstype + striping, labeller = labeller(nn = as_labeller(nn_lab), ppn = as_labeller(ppn_lab))) +
xlab("Nodes") +
ylab("Performance in MiB/s") +
theme(axis.text.x=element_text(angle=90, hjust=0.95, vjust=0.5)) +
theme(legend.position="bottom") +
#scale_y_log10() +
scale_x_continuous(breaks = c(unique(data$nn))) +
#scale_x_continuous(breaks = c(unique(data$nn))) +
scale_x_log10(breaks = c(unique(data$nn))) +
scale_color_manual(name="Blocksize in KiB: ", values=c('#999999','#E69F00', '#56B4E9', '#000000'), breaks=sort(unique(data$blocksize)/1024)) +
#stat_summary(fun.y="median", geom="line", aes(group=factor(blocksize))) +
stat_summary(fun.y="mean", geom="line", aes(group=factor(blocksize))) +
#geom_boxplot()
geom_point()
filename_eps = sprintf("%s/performance_%s_%s_%s_%s_%s.eps", folder_out, app, fs, api, type, "write")
filename_png = sprintf("%s/performance_%s_%s_%s_%s_%s.png", folder_out, app, fs, api, type, "write")
if ( "logarithmic" == scale ) {
p = p + scale_y_log10()
}
filename_eps = sprintf("%s/performance_%s_%s_%s_%s_%s_%s.eps", folder_out, app, fs, api, iotype, "write", scale)
filename_png = sprintf("%s/performance_%s_%s_%s_%s_%s_%s.png", folder_out, app, fs, api, iotype, "write", scale)
ggsave(filename_png, width = 6, height = 10)
ggsave(filename_eps, width = 6, height = 10)
system(sprintf("epstopdf %s", filename_eps))
#system(sprintf("epstopdf %s", filename_eps))
system(sprintf("rm %s", filename_eps))
ggplot(data=data, aes(x=nn, y=read, colour=as.factor(blocksize/1024), group=blocksize), ymin=0) +
#ggtitle("Read") +
facet_grid(ppn ~ ., labeller = labeller(nn = as_labeller(nn_lab), ppn = as_labeller(ppn_lab))) +
xlab("Nodes") +
ylab("Performance in MiB/s") +
theme(axis.text.x=element_text(angle=90, hjust=0.95, vjust=0.5)) +
theme(legend.position="bottom") +
#scale_y_log10() +
scale_x_continuous(breaks = c(unique(data$nn))) +
scale_color_manual(name="Blocksize in KiB: ", values=c('#999999','#E69F00', '#56B4E9', '#000000'), breaks=sort(unique(data$blocksize)/1024)) +
#stat_summary(fun.y="median", geom="line", aes(group=factor(blocksize))) +
stat_summary(fun.y="mean", geom="line", aes(group=factor(blocksize))) +
#geom_boxplot()
geom_point()
filename_eps = sprintf("%s/performance_%s_%s_%s_%s_%s.eps", folder_out, app, fs, api, type, "read")
filename_png = sprintf("%s/performance_%s_%s_%s_%s_%s.png", folder_out, app, fs, api, type, "read")
ggsave(filename_png, width = 6, height = 10)
ggsave(filename_eps, width = 6, height = 10)
system(sprintf("epstopdf %s", filename_eps))
system(sprintf("rm %s", filename_eps))
#p = ggplot(data=data, aes(x=nn, y=read, colour=as.factor(blocksize/1024), group=blocksize), ymin=0) +
# #ggtitle("Read") +
# facet_grid(ppn ~ ., labeller = labeller(nn = as_labeller(nn_lab), ppn = as_labeller(ppn_lab))) +
# xlab("Nodes") +
# ylab("Performance in MiB/s") +
# theme(axis.text.x=element_text(angle=90, hjust=0.95, vjust=0.5)) +
# theme(legend.position="bottom") +
# #scale_x_continuous(breaks = c(unique(data$nn))) +
# scale_x_log10(breaks = c(unique(data$nn))) +
# scale_color_manual(name="Blocksize in KiB: ", values=c('#999999','#E69F00', '#56B4E9', '#000000'), breaks=sort(unique(data$blocksize)/1024)) +
# #stat_summary(fun.y="median", geom="line", aes(group=factor(blocksize))) +
# stat_summary(fun.y="mean", geom="line", aes(group=factor(blocksize))) +
# #geom_boxplot()
# geom_point()
#if ( "logarithmic" == scale ) {
# p = p + scale_y_log10()
#}
#filename_eps = sprintf("%s/performance_%s_%s_%s_%s_%s_%s.eps", folder_out, app, fs, api, type, "read", scale)
#filename_png = sprintf("%s/performance_%s_%s_%s_%s_%s_%s.png", folder_out, app, fs, api, type, "read", scale)
#ggsave(filename_png, width = 3, height = 10)
#ggsave(filename_eps, width = 3, height = 10)
##system(sprintf("epstopdf %s", filename_eps))
#system(sprintf("rm %s", filename_eps))
}}}}
}

76
eval_runtime.R Executable file
View File

@ -0,0 +1,76 @@
#!/usr/bin/env Rscript
library(sqldf)
library(plyr)
library(plot3D)
library(ggplot2)
args = commandArgs(trailingOnly=TRUE)
print(args)
if (2 != length(args)) {
print("Requires 2 parameters)")
q()
}
file_db = args[1]
folder_out = args[2]
print(file_db)
make_facet_label <- function(variable, value){
return(paste0(value, " KiB"))
}
#connection = dbConnect(SQLite(), dbname='results.ddnime.db')
print(file_db)
connection = dbConnect(SQLite(), dbname=file_db)
#dbdata = dbGetQuery(connection,'select mnt, siox, avg(duration) as ad, app, procs, blocksize from p group by mnt, siox, procs, blocksize, app')
#dbdata = dbGetQuery(connection,'select * from p where tag=="mpio-individual"')
#dbdata = dbGetQuery(connection,'select *, (x*y*z) as blocksize from p where count=8')
#dbdata = dbGetQuery(connection,'select * from p where count<5')
dbdata = dbGetQuery(connection,'select * from p')
dbdata[,"blocksize"] = dbdata$tsize
summary(dbdata)
nn_lab <- sprintf(fmt="NN=%d", unique(dbdata$nn))
names(nn_lab) <- unique(dbdata$nn)
ppn_lab <- sprintf(fmt="PPN=%d", unique(dbdata$ppn))
names(ppn_lab) <- unique(dbdata$ppn)
breaks <- c(unique(dbdata$blocksize))
#p = ggplot(data=dbdata, aes(x=nn, y=rio, colour=as.factor(blocksize/1024), group=blocksize), ymin=0) +
p = ggplot(data=dbdata) +
#ggtitle("Read") +
#facet_grid(ppn ~ ., labeller = labeller(nn = as_labeller(nn_lab), ppn = as_labeller(ppn_lab))) +
#xlab("Nodes") +
#ylab("Performance in MiB/s") +
#theme(axis.text.x=element_text(angle=90, hjust=0.95, vjust=0.5)) +
#theme(legend.position="bottom") +
scale_x_continuous(breaks = c(60, 120)) +
#scale_x_log10(breaks = c(unique(data$nn))) +
scale_color_manual(name="Blocksize in KiB: ", values=c('#999999','#E69F00', '#56B4E9', '#000000'), breaks=sort(unique(dbdata$blocksize)/1024)) +
#stat_summary(fun.y="median", geom="line", aes(group=factor(blocksize))) +
#stat_summary(fun.y="mean", geom="line", aes(group=factor(blocksize))) +
#geom_boxplot()
geom_histogram(binwidth=1, aes(wio, fill="red")) +
geom_histogram(binwidth=1, aes(rio, fill="blue"))
#geom_density(aes(wio, color="blue")) +
#geom_density(aes(rio, color="red"))
#geom_freqpoly(binwidth=4)
filename_eps = sprintf("%s/runtime.eps", folder_out)
filename_png = sprintf("%s/runtime.png", folder_out)
ggsave(filename_png, width = 10, height = 3)
ggsave(filename_eps, width = 10, height = 3)
#system(sprintf("epstopdf %s", filename_eps))
system(sprintf("rm %s", filename_eps))

77
mkdb.py
View File

@ -25,7 +25,9 @@ def parse(filename, conn):
for line in f:
#COUNT:1#NN:1#PPN:4#API:POSIX#T:10485760.txt
m = re.match("COUNT:([0-9]+)#NN:([0-9]+)#PPN:([0-9]+)#API:([\w]+)#T:([0-9]+).txt", os.path.basename(filename))
#merged_output/COUNT:1#NN:8#PPN:8#API:POSIX#T:16384#APP:ior-default#FS:lustre#IOTYPE:random#ACCESSTYPE:write#STRIPING:yes.txt
m = re.match("COUNT:([0-9]+)#NN:([0-9]+)#PPN:([0-9]+)#API:([\w]+)#T:([0-9]+)#APP:([-\w]+)#FS:([\w]+)#IOTYPE:([\w]+)#ACCESSTYPE:([\w]+)#STRIPING:([\w]+).txt", os.path.basename(filename))
if (m):
metadata["count"] = int(m.group(1))
@ -33,9 +35,11 @@ def parse(filename, conn):
metadata["ppn"] = int(m.group(3))
metadata["api"] = m.group(4)
metadata["tsize"] = m.group(5)
metadata["fs"] = "lustre"
metadata["app"] = "ior-default"
metadata["type"] = "random"
metadata["app"] = m.group(6)
metadata["fs"] = m.group(7)
metadata["iotype"] = m.group(8)
metadata["accesstype"] = m.group(9)
metadata["striping"] = m.group(10)
else:
print('couldn\'t parse', os.path.basename(filename))
@ -51,34 +55,22 @@ def parse(filename, conn):
if (m):
metadata["fsize_ctl"] = m.group(1)
m = re.match("read[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]*$", line)
m = re.match("(read|write)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]*$", line)
if (m):
if m.group(8) not in data:
data[m.group(8)] = dict()
data[m.group(8)]["read"] = float(m.group(1))
data[m.group(8)]["ropen"] = float(m.group(4))
data[m.group(8)]["rio"] = float(m.group(5))
data[m.group(8)]["rclose"] = float(m.group(6))
data[m.group(8)]["rtotal"] = float(m.group(7))
data[m.group(8)]["riter"] = float(m.group(8))
data[m.group(8)].update(metadata)
if m.group(9) not in data:
data[m.group(9)] = dict()
data[m.group(9)]["perf"] = float(m.group(2))
data[m.group(9)]["open"] = float(m.group(5))
data[m.group(9)]["io"] = float(m.group(6))
data[m.group(9)]["close"] = float(m.group(7))
data[m.group(9)]["total"] = float(m.group(8))
data[m.group(9)]["iter"] = float(m.group(9))
data[m.group(9)].update(metadata)
m = re.match("write[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]*$", line)
if (m):
if m.group(8) not in data:
data[m.group(8)] = dict()
data[m.group(8)] = {}
data[m.group(8)]["write"] = float(m.group(1))
data[m.group(8)]["wopen"] = float(m.group(4))
data[m.group(8)]["wio"] = float(m.group(5))
data[m.group(8)]["wclose"] = float(m.group(6))
data[m.group(8)]["wtotal"] = float(m.group(7))
data[m.group(8)]["witer"] = float(m.group(8))
data[m.group(8)].update(metadata)
for iteration,entry in data.items():
if len(entry) == 22:
if len(entry) == 18:
print("Success")
columns = ", ".join(entry.keys())
placeholders = ':' + ', :'.join(entry.keys())
@ -102,32 +94,29 @@ try:
tbl = 'CREATE TABLE p (\
filename text, \
count int, \
app text, \
nn int, \
ppn int, \
api text, \
fs text, \
type text, \
tsize float, \
app text, \
fs text, \
iotype text, \
accesstype text, \
striping text, \
fsize float, \
fsize_ctl txt, \
ropen float, \
rio float, \
rclose float, \
rtotal float, \
read float, \
riter float, \
wopen float, \
wio float, \
wclose float, \
wtotal float, \
write float, \
witer float, \
primary key(filename, witer, riter) \
open float, \
io float, \
close float, \
total float, \
perf float, \
iter float, \
primary key(filename, iter) \
)'
conn.execute(tbl)
except:
except Exception as e:
print("could not create db")
print(e)

58
output_converter.sh Executable file
View File

@ -0,0 +1,58 @@
#!/bin/bash
find "./output_v2" -type f -name "*.txt" -print0 |
while IFS= read -r -d $'\0' fn; do
headln="$(grep Finished $fn -n | head -n 1 | cut -d":" -f 1)"
totalln=$(wc -l $fn | cut -f 1 -d" ")
tailln=$(($totalln - $headln))
echo $fn $headln $tailln $totalln
bn=$(basename $fn)
extension="${bn##*.}"
filename="${bn%.*}"
count=$(awk -F# '{print $1}' <<< $filename)
nn=$(awk -F# '{print $2}' <<< $filename)
ppn=$(awk -F# '{print $3}' <<< $filename)
api=$(awk -F# '{print $4}' <<< $filename)
t=$(awk -F# '{print $5}' <<< $filename)
FNEXT="$count#$nn#$ppn#$api#$t#APP:ior-default#FS:lustre#IOTYPE:random#ACCESSTYPE:read#STRIPING:yes"
cp $fn "merged_output/$FNEXT.$extension"
done
find "./output" -type f -name "*.txt" -print0 |
while IFS= read -r -d $'\0' fn; do
headln="$(grep Finished $fn -n | head -n 1 | cut -d":" -f 1)"
totalln=$(wc -l $fn | cut -f 1 -d" ")
tailln=$(($totalln - $headln))
echo $fn $headln $tailln $totalln
bn=$(basename $fn)
extension="${bn##*.}"
filename="${bn%.*}"
STRIPING="yes"
if [[ "" != $(echo $bn| grep MPIIO) ]]; then
STRIPING="no"
fi
FNEXT="APP:ior-default#FS:lustre#IOTYPE:random"
head -n $headln $fn > "merged_output/$filename#$FNEXT#ACCESSTYPE:write#STRIPING:yes.$extension"
outfnread="merged_output/$filename#$FNEXT#ACCESSTYPE:read#STRIPING:$STRIPING.$extension"
if [ -e $outfnread ]; then
echo "stopping $outfnread, already exists"
exit
fi
tail -n $tailln $fn > $outfnread
done