#!/usr/bin/env python3 import sys import os import re import sqlite3 import traceback import glob import pprint import numpy as np from scipy import stats import json __version = "0.8" __license__ = "GPL" __author__ = "Eugen" __date__ = "2018" def splitFn(fn:str): base = os.path.basename(fn).replace("isc17-", "isc17").split('.')[0] tokens = base.split('-') try: info = dict(token.split(':') for token in tokens ) except: print("Invalid tokens: ", tokens) quit() res = dict((k.lower(), int(v.lower()) if v.lower().isdigit() else v.lower()) for k, v in info.items()) res['filename'] = fn return res def parseIorOutput(fn): selffn = fn + ".json" res = list() with open(selffn) as f: data = json.load(f) #print(json.dumps(data, indent=4)) res = data['tests'][0]['Results'] return res def parseSysCounters(fn:str): counter_start_files = glob.glob(fn + "_network/*start.txt") res = dict() table = dict() table['duration'] = list() table['PortXmitData'] = list() table['PortRcvData'] = list() counter = 0 for sysfile_start in counter_start_files: counter = counter + 1 start = _parseSysCounters(sysfile_start) stop = _parseSysCounters(sysfile_start.replace('start', 'stop')) table['duration'].append(stop['timestamp'] - start['timestamp']) table['PortXmitData'].append(stop['PortXmitData'] - start['PortXmitData']) table['PortRcvData'].append(stop['PortRcvData'] - start['PortRcvData']) res['duration'] = stats.hmean(table['duration']) res['PortXmitData'] = np.sum(table['PortXmitData']) / res['duration'] / 1024 / 1024 res['PortRcvData'] = np.sum(table['PortRcvData']) / res['duration'] / 1024 / 1024 return res def _parseSysCounters(fn:str): res = {"filename": fn} res.update(splitFn(fn)) colnames = list() with open(fn, "r") as f: for line in f: m = re.match("(TIMESTAMP)\s+([0-9]+)", line) if (m): res[m.group(1).lower()] = int(m.group(2)) #m = re.match("cpu([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)", line) # stat['cpu%d' % int(m.group(1)]['user'] = int(m.group(2)) # stat['cpu%d' % int(m.group(1)]['nice'] = int(m.group(3)) # stat['cpu%d' % int(m.group(1)]['system'] = int(m.group(4)) # stat['cpu%d' % int(m.group(1)]['idle'] = int(m.group(5)) # stat['cpu%d' % int(m.group(1)]['iowait'] = int(m.group(6)) # stat['cpu%d' % int(m.group(1)]['irq'] = int(m.group(7)) # stat['cpu%d' % int(m.group(1)]['softirq'] = int(m.group(8)) if (m): res[m.group(1)] = int(m.group(2)) m = re.match("(PortXmitData):\.+([0-9]+)", line) if (m): res[m.group(1)] = int(m.group(2)) m = re.match("(PortRcvData):\.+([0-9]+)", line) if (m): res[m.group(1)] = int(m.group(2)) m = re.match("(max_cached_mb):\s+([0-9]+)", line) if (m): res[m.group(1)] = int(m.group(2)) m = re.match("(used_mb):\s+([0-9]+)", line) if (m): res[m.group(1)] = int(m.group(2)) m = re.match("\s+(face).*", line) if (m): colnames = line.replace('|', " ").split() for i in range(1, 9): colnames[i] = "send_" + colnames[i] for i in range(9, 17): colnames[i] = "recv_" + colnames[i] m = re.match("\s+(ib[0-9]+|eno[0-9]+).*", line) if (m): tokens = line.replace('|', " ").split() for i in [1, 9]: res[m.group(1) + "_" + colnames[i]] = int(tokens[i]) return res class DBWriter: def __init__(self, fn:str): self.conn = sqlite3.connect(fn) self.map = {float:'float', int:'int', str:'text', np.float64:'float'} def __del__(self): self.conn.commit() self.conn.close() def create(self, data:dict()): self.len = len(data) cols = ["%s %s" % (k,self.map[type(v)]) for k,v in data.items()] colnames = ','.join(cols) print(colnames) query = 'CREATE TABLE p (%s, primary key(filename,iteration))' % colnames try: self.conn.execute(query) except Exception as e: print("could not create db") print(e) def insert(self, entry:dict()): if len(entry) == self.len: print("Success") columns = ", ".join(entry.keys()) placeholders = ':' + ', :'.join(entry.keys()) try: self.conn.execute("INSERT INTO p (%s) VALUES (%s)" %(columns, placeholders), entry) except sqlite3.IntegrityError as e: print("Already imported") else: print("Error with tuples %s size %d. Expected %d"% (entry, len(entry), self.len)) def main(): assert(3 == len(sys.argv)) folder = sys.argv[1] dbname = sys.argv[2] db = DBWriter(dbname) tabexists = False for filename in glob.glob(folder + "/*.txt"): print("Parsing " + filename) data = dict() ior_result = parseIorOutput(filename) for i in range(0, len(ior_result)): data.update(splitFn(filename)) data.update(parseSysCounters(filename)) data.update(ior_result[i][0]) data['iteration'] = i if not tabexists: print(data) db.create(data) tabexists = True db.insert(data) print(data) if __name__ == "__main__": main()