ddn-ime-evaluation/benchmark/mkdb.py

191 lines
5.7 KiB
Python
Executable File

#!/usr/bin/env python3
import sys
import os
import re
import sqlite3
import traceback
import glob
import pprint
import numpy as np
from scipy import stats
import json
__version = "0.8"
__license__ = "GPL"
__author__ = "Eugen"
__date__ = "2018"
def splitFn(fn:str):
base = os.path.basename(fn).replace("isc17-", "isc17").split('.')[0]
tokens = base.split('-')
try:
info = dict(token.split(':') for token in tokens )
except:
print("Invalid tokens: ", tokens)
quit()
res = dict((k.lower(), int(v.lower()) if v.lower().isdigit() else v.lower()) for k, v in info.items())
res['filename'] = fn
return res
def parseIorOutput(fn):
selffn = fn + ".json"
res = list()
with open(selffn) as f:
data = json.load(f)
#print(json.dumps(data, indent=4))
res = data['tests'][0]['Results']
return res
def parseSysCounters(fn:str):
counter_start_files = glob.glob(fn + "_network/*start.txt")
res = dict()
table = dict()
table['duration'] = list()
table['PortXmitData'] = list()
table['PortRcvData'] = list()
counter = 0
for sysfile_start in counter_start_files:
counter = counter + 1
start = _parseSysCounters(sysfile_start)
stop = _parseSysCounters(sysfile_start.replace('start', 'stop'))
table['duration'].append(stop['timestamp'] - start['timestamp'])
table['PortXmitData'].append(stop['PortXmitData'] - start['PortXmitData'])
table['PortRcvData'].append(stop['PortRcvData'] - start['PortRcvData'])
res['duration'] = stats.hmean(table['duration'])
res['PortXmitData'] = np.sum(table['PortXmitData']) / res['duration'] / 1024 / 1024
res['PortRcvData'] = np.sum(table['PortRcvData']) / res['duration'] / 1024 / 1024
return res
def _parseSysCounters(fn:str):
res = {"filename": fn}
res.update(splitFn(fn))
colnames = list()
with open(fn, "r") as f:
for line in f:
m = re.match("(TIMESTAMP)\s+([0-9]+)", line)
if (m):
res[m.group(1).lower()] = int(m.group(2))
#m = re.match("cpu([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)", line)
# stat['cpu%d' % int(m.group(1)]['user'] = int(m.group(2))
# stat['cpu%d' % int(m.group(1)]['nice'] = int(m.group(3))
# stat['cpu%d' % int(m.group(1)]['system'] = int(m.group(4))
# stat['cpu%d' % int(m.group(1)]['idle'] = int(m.group(5))
# stat['cpu%d' % int(m.group(1)]['iowait'] = int(m.group(6))
# stat['cpu%d' % int(m.group(1)]['irq'] = int(m.group(7))
# stat['cpu%d' % int(m.group(1)]['softirq'] = int(m.group(8))
if (m):
res[m.group(1)] = int(m.group(2))
m = re.match("(PortXmitData):\.+([0-9]+)", line)
if (m):
res[m.group(1)] = int(m.group(2))
m = re.match("(PortRcvData):\.+([0-9]+)", line)
if (m):
res[m.group(1)] = int(m.group(2))
m = re.match("(max_cached_mb):\s+([0-9]+)", line)
if (m):
res[m.group(1)] = int(m.group(2))
m = re.match("(used_mb):\s+([0-9]+)", line)
if (m):
res[m.group(1)] = int(m.group(2))
m = re.match("\s+(face).*", line)
if (m):
colnames = line.replace('|', " ").split()
for i in range(1, 9):
colnames[i] = "send_" + colnames[i]
for i in range(9, 17):
colnames[i] = "recv_" + colnames[i]
m = re.match("\s+(ib[0-9]+|eno[0-9]+).*", line)
if (m):
tokens = line.replace('|', " ").split()
for i in [1, 9]:
res[m.group(1) + "_" + colnames[i]] = int(tokens[i])
return res
class DBWriter:
def __init__(self, fn:str):
self.conn = sqlite3.connect(fn)
self.map = {float:'float', int:'int', str:'text', np.float64:'float'}
def __del__(self):
self.conn.commit()
self.conn.close()
def create(self, data:dict()):
self.len = len(data)
cols = ["%s %s" % (k,self.map[type(v)]) for k,v in data.items()]
colnames = ','.join(cols)
print(colnames)
query = 'CREATE TABLE p (%s, primary key(filename,iteration))' % colnames
try:
self.conn.execute(query)
except Exception as e:
print("could not create db")
print(e)
def insert(self, entry:dict()):
if len(entry) == self.len:
print("Success")
columns = ", ".join(entry.keys())
placeholders = ':' + ', :'.join(entry.keys())
try:
self.conn.execute("INSERT INTO p (%s) VALUES (%s)" %(columns, placeholders), entry)
except sqlite3.IntegrityError as e:
print("Already imported")
else:
print("Error with tuples %s size %d. Expected %d"% (entry, len(entry), self.len))
def main():
assert(3 == len(sys.argv))
folder = sys.argv[1]
dbname = sys.argv[2]
db = DBWriter(dbname)
tabexists = False
for filename in glob.glob(folder + "/*.txt"):
print("Parsing " + filename)
data = dict()
ior_result = parseIorOutput(filename)
for i in range(0, len(ior_result)):
data.update(splitFn(filename))
data.update(parseSysCounters(filename))
data.update(ior_result[i][0])
data['iteration'] = i
if not tabexists:
print(data)
db.create(data)
tabexists = True
db.insert(data)
print(data)
if __name__ == "__main__":
main()