From 1ab905866b6911d9c207eacc64ba217eb54cc23c Mon Sep 17 00:00:00 2001 From: "eugen.betke" Date: Thu, 15 Nov 2018 04:01:32 +0100 Subject: [PATCH] Full rewrite of mkdb script --- benchmark/mkdb.py | 228 ++++++++++++++++++++++++++++------------------ 1 file changed, 137 insertions(+), 91 deletions(-) diff --git a/benchmark/mkdb.py b/benchmark/mkdb.py index f943f28cc..66b96075f 100755 --- a/benchmark/mkdb.py +++ b/benchmark/mkdb.py @@ -8,6 +8,9 @@ import sqlite3 import traceback import glob import pprint +import numpy as np +from scipy import stats +import json __version = "0.8" __license__ = "GPL" @@ -15,114 +18,157 @@ __author__ = "Eugen" __date__ = "2018" -def parse(filename, conn): - exptype = 0 +def splitFn(fn:str): + base = os.path.basename(fn).replace("isc17-", "isc17").split('.')[0] + tokens = base.split('-') + try: + info = dict(token.split(':') for token in tokens ) + except: + print("Invalid tokens: ", tokens) + quit() + res = dict((k.lower(), int(v.lower()) if v.lower().isdigit() else v.lower()) for k, v in info.items()) + res['filename'] = fn + return res - data = {} - metadata = {} - with open(filename, "r") as f: - metadata["filename"] = filename + + +def parseIorOutput(fn): + selffn = fn + ".json" + res = dict() + with open(selffn) as f: + data = json.load(f) + #print(json.dumps(data, indent=4)) + res.update(data['tests'][0]['Results'][0][0]) + #print(data['tests'][0]['Results'][0][0]) + #print(res) + return res + + +def parseSysCounters(fn:str): + counter_start_files = glob.glob(fn + "_network/*start.txt") + res = dict() + + table = dict() + table['duration'] = list() + table['PortXmitData'] = list() + table['PortRcvData'] = list() + + counter = 0 + for sysfile_start in counter_start_files: + counter = counter + 1 + start = _parseSysCounters(sysfile_start) + stop = _parseSysCounters(sysfile_start.replace('start', 'stop')) + table['duration'].append(stop['timestamp'] - start['timestamp']) + table['PortXmitData'].append(stop['PortXmitData'] - start['PortXmitData']) + table['PortRcvData'].append(stop['PortRcvData'] - start['PortRcvData']) + + res['duration'] = stats.hmean(table['duration']) + res['PortXmitData'] = np.sum(table['PortXmitData']) / res['duration'] / 1024 / 1024 + res['PortRcvData'] = np.sum(table['PortRcvData']) / res['duration'] / 1024 / 1024 + return res + + +def _parseSysCounters(fn:str): + res = {"filename": fn} + res.update(splitFn(fn)) + colnames = list() + with open(fn, "r") as f: for line in f: - - #COUNT:1#NN:1#PPN:4#API:POSIX#T:10485760.txt - #merged_output/COUNT:1#NN:8#PPN:8#API:POSIX#T:16384#APP:ior-default#FS:lustre#IOTYPE:random#ACCESSTYPE:write#STRIPING:yes.txt - - m = re.match("COUNT:([0-9]+)#NN:([0-9]+)#PPN:([0-9]+)#API:([\w]+)#T:([0-9]+)#APP:([-\w]+)#FS:([\w]+)#IOTYPE:([\w]+)#ACCESSTYPE:([\w]+)#STRIPING:([\w]+).txt", os.path.basename(filename)) - + m = re.match("(TIMESTAMP)\s+([0-9]+)", line) if (m): - metadata["count"] = int(m.group(1)) - metadata["nn"] = int(m.group(2)) - metadata["ppn"] = int(m.group(3)) - metadata["api"] = m.group(4) - metadata["tsize"] = m.group(5) - metadata["app"] = m.group(6) - metadata["fs"] = m.group(7) - metadata["iotype"] = m.group(8) - metadata["accesstype"] = m.group(9) - metadata["striping"] = m.group(10) + res[m.group(1).lower()] = int(m.group(2)) - else: - print('couldn\'t parse', os.path.basename(filename)) - print(data) - quit() - - - m = re.match("Command line used: .* -s[\s]+([0-9.]+)[\s]+-t[\s]+([0-9.]+)[\s]+-b[\s]+([0-9.]+)[\s]+-o.*", line) + m = re.match("(PortXmitData):\.+([0-9]+)", line) if (m): - metadata["fsize"] = float(m.group(1)) * float(m.group(3)) * data["ppn"] * data["nn"] + res[m.group(1)] = int(m.group(2)) - m = re.match("[\s]+aggregate filesize = (.*)", line) + m = re.match("(PortRcvData):\.+([0-9]+)", line) if (m): - metadata["fsize_ctl"] = m.group(1) + res[m.group(1)] = int(m.group(2)) - m = re.match("(read|write)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]*$", line) + m = re.match("(max_cached_mb):\s+([0-9]+)", line) if (m): - if m.group(9) not in data: - data[m.group(9)] = dict() - data[m.group(9)]["perf"] = float(m.group(2)) - data[m.group(9)]["open"] = float(m.group(5)) - data[m.group(9)]["io"] = float(m.group(6)) - data[m.group(9)]["close"] = float(m.group(7)) - data[m.group(9)]["total"] = float(m.group(8)) - data[m.group(9)]["iter"] = float(m.group(9)) - data[m.group(9)].update(metadata) + res[m.group(1)] = int(m.group(2)) + + m = re.match("(used_mb):\s+([0-9]+)", line) + if (m): + res[m.group(1)] = int(m.group(2)) + + m = re.match("\s+(face).*", line) + if (m): + colnames = line.replace('|', " ").split() + for i in range(1, 9): + colnames[i] = "send_" + colnames[i] + for i in range(9, 17): + colnames[i] = "recv_" + colnames[i] + + m = re.match("\s+(ib[0-9]+|eno[0-9]+).*", line) + if (m): + tokens = line.replace('|', " ").split() + for i in [1, 9]: + res[m.group(1) + "_" + colnames[i]] = int(tokens[i]) + return res - for iteration,entry in data.items(): - if len(entry) == 18: - print("Success") - columns = ", ".join(entry.keys()) - placeholders = ':' + ', :'.join(entry.keys()) - try: - conn.execute("INSERT INTO p (%s) VALUES (%s)" %(columns, placeholders), entry) - except sqlite3.IntegrityError as e: - print("Already imported") - else: - print("Error in file %s with tuples %s size %d"% (filename, entry, len(entry))) - exptype += 1; +class DBWriter: + def __init__(self, fn:str): + self.conn = sqlite3.connect(fn) + self.map = {float:'float', int:'int', str:'text', np.float64:'float'} + def __del__(self): + self.conn.commit() + self.conn.close() -#parse("./results/iozone/NP:2/C:0/T:100/output_app.txt", conn, style) -assert(3 == len(sys.argv)) -folder = sys.argv[1] -dbname = sys.argv[2] + def create(self, data:dict()): + self.len = len(data) + cols = ["%s %s" % (k,self.map[type(v)]) for k,v in data.items()] + colnames = ','.join(cols) + print(colnames) + query = 'CREATE TABLE p (%s, primary key(filename))' % colnames + try: + self.conn.execute(query) + except Exception as e: + print("could not create db") + print(e) -conn = sqlite3.connect(dbname) -try: - tbl = 'CREATE TABLE p (\ - filename text, \ - count int, \ - nn int, \ - ppn int, \ - api text, \ - tsize float, \ - app text, \ - fs text, \ - iotype text, \ - accesstype text, \ - striping text, \ - fsize float, \ - fsize_ctl txt, \ - open float, \ - io float, \ - close float, \ - total float, \ - perf float, \ - iter float, \ - primary key(filename, iter) \ - )' - conn.execute(tbl) -except Exception as e: - print("could not create db") - print(e) + def insert(self, entry:dict()): + if len(entry) == self.len: + print("Success") + columns = ", ".join(entry.keys()) + placeholders = ':' + ', :'.join(entry.keys()) + try: + self.conn.execute("INSERT INTO p (%s) VALUES (%s)" %(columns, placeholders), entry) + except sqlite3.IntegrityError as e: + print("Already imported") + else: + print("Error in file %s with tuples %s size %d"% (filename, entry, len(entry))) -for filename in glob.glob(folder + "/*"): - #print("Parsing " + filename) - parse(filename, conn) +def main(): + assert(3 == len(sys.argv)) + folder = sys.argv[1] + dbname = sys.argv[2] + db = DBWriter(dbname) -conn.commit() -conn.close() + tabexists = False + + for filename in glob.glob(folder + "/*.txt"): + print("Parsing " + filename) + data = dict() + data.update(splitFn(filename)) + data.update(parseSysCounters(filename)) + data.update(parseIorOutput(filename)) + if not tabexists: + print(data) + db.create(data) + tabexists = True + db.insert(data) + + print(data) + + +if __name__ == "__main__": + main()