Full rewrite of mkdb script
This commit is contained in:
parent
d7b1d08328
commit
1ab905866b
|
@ -8,6 +8,9 @@ import sqlite3
|
|||
import traceback
|
||||
import glob
|
||||
import pprint
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
import json
|
||||
|
||||
__version = "0.8"
|
||||
__license__ = "GPL"
|
||||
|
@ -15,114 +18,157 @@ __author__ = "Eugen"
|
|||
__date__ = "2018"
|
||||
|
||||
|
||||
def parse(filename, conn):
|
||||
exptype = 0
|
||||
def splitFn(fn:str):
|
||||
base = os.path.basename(fn).replace("isc17-", "isc17").split('.')[0]
|
||||
tokens = base.split('-')
|
||||
try:
|
||||
info = dict(token.split(':') for token in tokens )
|
||||
except:
|
||||
print("Invalid tokens: ", tokens)
|
||||
quit()
|
||||
res = dict((k.lower(), int(v.lower()) if v.lower().isdigit() else v.lower()) for k, v in info.items())
|
||||
res['filename'] = fn
|
||||
return res
|
||||
|
||||
data = {}
|
||||
metadata = {}
|
||||
with open(filename, "r") as f:
|
||||
metadata["filename"] = filename
|
||||
|
||||
|
||||
def parseIorOutput(fn):
|
||||
selffn = fn + ".json"
|
||||
res = dict()
|
||||
with open(selffn) as f:
|
||||
data = json.load(f)
|
||||
#print(json.dumps(data, indent=4))
|
||||
res.update(data['tests'][0]['Results'][0][0])
|
||||
#print(data['tests'][0]['Results'][0][0])
|
||||
#print(res)
|
||||
return res
|
||||
|
||||
|
||||
def parseSysCounters(fn:str):
|
||||
counter_start_files = glob.glob(fn + "_network/*start.txt")
|
||||
res = dict()
|
||||
|
||||
table = dict()
|
||||
table['duration'] = list()
|
||||
table['PortXmitData'] = list()
|
||||
table['PortRcvData'] = list()
|
||||
|
||||
counter = 0
|
||||
for sysfile_start in counter_start_files:
|
||||
counter = counter + 1
|
||||
start = _parseSysCounters(sysfile_start)
|
||||
stop = _parseSysCounters(sysfile_start.replace('start', 'stop'))
|
||||
table['duration'].append(stop['timestamp'] - start['timestamp'])
|
||||
table['PortXmitData'].append(stop['PortXmitData'] - start['PortXmitData'])
|
||||
table['PortRcvData'].append(stop['PortRcvData'] - start['PortRcvData'])
|
||||
|
||||
res['duration'] = stats.hmean(table['duration'])
|
||||
res['PortXmitData'] = np.sum(table['PortXmitData']) / res['duration'] / 1024 / 1024
|
||||
res['PortRcvData'] = np.sum(table['PortRcvData']) / res['duration'] / 1024 / 1024
|
||||
return res
|
||||
|
||||
|
||||
def _parseSysCounters(fn:str):
|
||||
res = {"filename": fn}
|
||||
res.update(splitFn(fn))
|
||||
colnames = list()
|
||||
with open(fn, "r") as f:
|
||||
for line in f:
|
||||
|
||||
#COUNT:1#NN:1#PPN:4#API:POSIX#T:10485760.txt
|
||||
#merged_output/COUNT:1#NN:8#PPN:8#API:POSIX#T:16384#APP:ior-default#FS:lustre#IOTYPE:random#ACCESSTYPE:write#STRIPING:yes.txt
|
||||
|
||||
m = re.match("COUNT:([0-9]+)#NN:([0-9]+)#PPN:([0-9]+)#API:([\w]+)#T:([0-9]+)#APP:([-\w]+)#FS:([\w]+)#IOTYPE:([\w]+)#ACCESSTYPE:([\w]+)#STRIPING:([\w]+).txt", os.path.basename(filename))
|
||||
|
||||
m = re.match("(TIMESTAMP)\s+([0-9]+)", line)
|
||||
if (m):
|
||||
metadata["count"] = int(m.group(1))
|
||||
metadata["nn"] = int(m.group(2))
|
||||
metadata["ppn"] = int(m.group(3))
|
||||
metadata["api"] = m.group(4)
|
||||
metadata["tsize"] = m.group(5)
|
||||
metadata["app"] = m.group(6)
|
||||
metadata["fs"] = m.group(7)
|
||||
metadata["iotype"] = m.group(8)
|
||||
metadata["accesstype"] = m.group(9)
|
||||
metadata["striping"] = m.group(10)
|
||||
res[m.group(1).lower()] = int(m.group(2))
|
||||
|
||||
else:
|
||||
print('couldn\'t parse', os.path.basename(filename))
|
||||
print(data)
|
||||
quit()
|
||||
|
||||
|
||||
m = re.match("Command line used: .* -s[\s]+([0-9.]+)[\s]+-t[\s]+([0-9.]+)[\s]+-b[\s]+([0-9.]+)[\s]+-o.*", line)
|
||||
m = re.match("(PortXmitData):\.+([0-9]+)", line)
|
||||
if (m):
|
||||
metadata["fsize"] = float(m.group(1)) * float(m.group(3)) * data["ppn"] * data["nn"]
|
||||
res[m.group(1)] = int(m.group(2))
|
||||
|
||||
m = re.match("[\s]+aggregate filesize = (.*)", line)
|
||||
m = re.match("(PortRcvData):\.+([0-9]+)", line)
|
||||
if (m):
|
||||
metadata["fsize_ctl"] = m.group(1)
|
||||
res[m.group(1)] = int(m.group(2))
|
||||
|
||||
m = re.match("(read|write)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]*$", line)
|
||||
m = re.match("(max_cached_mb):\s+([0-9]+)", line)
|
||||
if (m):
|
||||
if m.group(9) not in data:
|
||||
data[m.group(9)] = dict()
|
||||
data[m.group(9)]["perf"] = float(m.group(2))
|
||||
data[m.group(9)]["open"] = float(m.group(5))
|
||||
data[m.group(9)]["io"] = float(m.group(6))
|
||||
data[m.group(9)]["close"] = float(m.group(7))
|
||||
data[m.group(9)]["total"] = float(m.group(8))
|
||||
data[m.group(9)]["iter"] = float(m.group(9))
|
||||
data[m.group(9)].update(metadata)
|
||||
res[m.group(1)] = int(m.group(2))
|
||||
|
||||
m = re.match("(used_mb):\s+([0-9]+)", line)
|
||||
if (m):
|
||||
res[m.group(1)] = int(m.group(2))
|
||||
|
||||
m = re.match("\s+(face).*", line)
|
||||
if (m):
|
||||
colnames = line.replace('|', " ").split()
|
||||
for i in range(1, 9):
|
||||
colnames[i] = "send_" + colnames[i]
|
||||
for i in range(9, 17):
|
||||
colnames[i] = "recv_" + colnames[i]
|
||||
|
||||
m = re.match("\s+(ib[0-9]+|eno[0-9]+).*", line)
|
||||
if (m):
|
||||
tokens = line.replace('|', " ").split()
|
||||
for i in [1, 9]:
|
||||
res[m.group(1) + "_" + colnames[i]] = int(tokens[i])
|
||||
return res
|
||||
|
||||
|
||||
|
||||
for iteration,entry in data.items():
|
||||
if len(entry) == 18:
|
||||
print("Success")
|
||||
columns = ", ".join(entry.keys())
|
||||
placeholders = ':' + ', :'.join(entry.keys())
|
||||
try:
|
||||
conn.execute("INSERT INTO p (%s) VALUES (%s)" %(columns, placeholders), entry)
|
||||
except sqlite3.IntegrityError as e:
|
||||
print("Already imported")
|
||||
else:
|
||||
print("Error in file %s with tuples %s size %d"% (filename, entry, len(entry)))
|
||||
|
||||
exptype += 1;
|
||||
class DBWriter:
|
||||
def __init__(self, fn:str):
|
||||
self.conn = sqlite3.connect(fn)
|
||||
self.map = {float:'float', int:'int', str:'text', np.float64:'float'}
|
||||
|
||||
def __del__(self):
|
||||
self.conn.commit()
|
||||
self.conn.close()
|
||||
|
||||
#parse("./results/iozone/NP:2/C:0/T:100/output_app.txt", conn, style)
|
||||
assert(3 == len(sys.argv))
|
||||
folder = sys.argv[1]
|
||||
dbname = sys.argv[2]
|
||||
def create(self, data:dict()):
|
||||
self.len = len(data)
|
||||
cols = ["%s %s" % (k,self.map[type(v)]) for k,v in data.items()]
|
||||
colnames = ','.join(cols)
|
||||
print(colnames)
|
||||
query = 'CREATE TABLE p (%s, primary key(filename))' % colnames
|
||||
try:
|
||||
self.conn.execute(query)
|
||||
except Exception as e:
|
||||
print("could not create db")
|
||||
print(e)
|
||||
|
||||
conn = sqlite3.connect(dbname)
|
||||
try:
|
||||
tbl = 'CREATE TABLE p (\
|
||||
filename text, \
|
||||
count int, \
|
||||
nn int, \
|
||||
ppn int, \
|
||||
api text, \
|
||||
tsize float, \
|
||||
app text, \
|
||||
fs text, \
|
||||
iotype text, \
|
||||
accesstype text, \
|
||||
striping text, \
|
||||
fsize float, \
|
||||
fsize_ctl txt, \
|
||||
open float, \
|
||||
io float, \
|
||||
close float, \
|
||||
total float, \
|
||||
perf float, \
|
||||
iter float, \
|
||||
primary key(filename, iter) \
|
||||
)'
|
||||
conn.execute(tbl)
|
||||
except Exception as e:
|
||||
print("could not create db")
|
||||
print(e)
|
||||
def insert(self, entry:dict()):
|
||||
if len(entry) == self.len:
|
||||
print("Success")
|
||||
columns = ", ".join(entry.keys())
|
||||
placeholders = ':' + ', :'.join(entry.keys())
|
||||
try:
|
||||
self.conn.execute("INSERT INTO p (%s) VALUES (%s)" %(columns, placeholders), entry)
|
||||
except sqlite3.IntegrityError as e:
|
||||
print("Already imported")
|
||||
else:
|
||||
print("Error in file %s with tuples %s size %d"% (filename, entry, len(entry)))
|
||||
|
||||
|
||||
|
||||
for filename in glob.glob(folder + "/*"):
|
||||
#print("Parsing " + filename)
|
||||
parse(filename, conn)
|
||||
def main():
|
||||
assert(3 == len(sys.argv))
|
||||
folder = sys.argv[1]
|
||||
dbname = sys.argv[2]
|
||||
db = DBWriter(dbname)
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
tabexists = False
|
||||
|
||||
for filename in glob.glob(folder + "/*.txt"):
|
||||
print("Parsing " + filename)
|
||||
data = dict()
|
||||
data.update(splitFn(filename))
|
||||
data.update(parseSysCounters(filename))
|
||||
data.update(parseIorOutput(filename))
|
||||
if not tabexists:
|
||||
print(data)
|
||||
db.create(data)
|
||||
tabexists = True
|
||||
db.insert(data)
|
||||
|
||||
print(data)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
Loading…
Reference in New Issue