ddn-ime-evaluation/mkdb.py

140 lines
4.5 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
import sys
import os
import re
import sqlite3
import traceback
import glob
import pprint
__version = "0.8"
__license__ = "GPL"
__author__ = "Eugen"
2018-10-22 16:30:44 +00:00
__date__ = "2018"
def parse(filename, conn):
exptype = 0
data = {}
2018-10-22 17:07:54 +00:00
metadata = {}
with open(filename, "r") as f:
2018-10-22 17:07:54 +00:00
metadata["filename"] = filename
for line in f:
2018-10-22 16:30:44 +00:00
#COUNT:1#NN:1#PPN:4#API:POSIX#T:10485760.txt
m = re.match("COUNT:([0-9]+)#NN:([0-9]+)#PPN:([0-9]+)#API:([\w]+)#T:([0-9]+).txt", os.path.basename(filename))
if (m):
2018-10-22 17:07:54 +00:00
metadata["count"] = int(m.group(1))
metadata["nn"] = int(m.group(2))
metadata["ppn"] = int(m.group(3))
metadata["api"] = m.group(4)
metadata["tsize"] = m.group(5)
2018-10-23 08:25:28 +00:00
metadata["fs"] = "lustre"
metadata["app"] = "ior-default"
metadata["type"] = "random"
2018-10-22 16:30:44 +00:00
else:
print('couldn\'t parse', os.path.basename(filename))
print(data)
quit()
2018-10-22 16:30:44 +00:00
m = re.match("Command line used: .* -s[\s]+([0-9.]+)[\s]+-t[\s]+([0-9.]+)[\s]+-b[\s]+([0-9.]+)[\s]+-o.*", line)
if (m):
2018-10-22 17:07:54 +00:00
metadata["fsize"] = float(m.group(1)) * float(m.group(3)) * data["ppn"] * data["nn"]
2018-10-22 16:30:44 +00:00
m = re.match("[\s]+aggregate filesize = (.*)", line)
if (m):
2018-10-22 17:07:54 +00:00
metadata["fsize_ctl"] = m.group(1)
2018-10-22 16:30:44 +00:00
m = re.match("read[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]*$", line)
if (m):
2018-10-22 17:07:54 +00:00
if m.group(8) not in data:
data[m.group(8)] = dict()
data[m.group(8)]["read"] = float(m.group(1))
data[m.group(8)]["ropen"] = float(m.group(4))
data[m.group(8)]["rio"] = float(m.group(5))
data[m.group(8)]["rclose"] = float(m.group(6))
data[m.group(8)]["rtotal"] = float(m.group(7))
data[m.group(8)]["riter"] = float(m.group(8))
data[m.group(8)].update(metadata)
2018-10-22 16:30:44 +00:00
m = re.match("write[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]+([0-9.]+)[\s]*$", line)
if (m):
2018-10-22 17:07:54 +00:00
if m.group(8) not in data:
data[m.group(8)] = dict()
data[m.group(8)] = {}
data[m.group(8)]["write"] = float(m.group(1))
data[m.group(8)]["wopen"] = float(m.group(4))
data[m.group(8)]["wio"] = float(m.group(5))
data[m.group(8)]["wclose"] = float(m.group(6))
data[m.group(8)]["wtotal"] = float(m.group(7))
data[m.group(8)]["witer"] = float(m.group(8))
data[m.group(8)].update(metadata)
for iteration,entry in data.items():
2018-10-23 08:25:28 +00:00
if len(entry) == 22:
2018-10-22 17:07:54 +00:00
print("Success")
columns = ", ".join(entry.keys())
placeholders = ':' + ', :'.join(entry.keys())
try:
conn.execute("INSERT INTO p (%s) VALUES (%s)" %(columns, placeholders), entry)
except sqlite3.IntegrityError as e:
print("Already imported")
else:
print("Error in file %s with tuples %s size %d"% (filename, entry, len(entry)))
2018-10-22 17:07:54 +00:00
exptype += 1;
2018-10-22 16:30:44 +00:00
#parse("./results/iozone/NP:2/C:0/T:100/output_app.txt", conn, style)
assert(3 == len(sys.argv))
folder = sys.argv[1]
dbname = sys.argv[2]
conn = sqlite3.connect(dbname)
try:
tbl = 'CREATE TABLE p (\
filename text, \
count int, \
app text, \
nn int, \
ppn int, \
2018-10-22 16:30:44 +00:00
api text, \
2018-10-23 08:25:28 +00:00
fs text, \
type text, \
2018-10-22 16:30:44 +00:00
tsize float, \
fsize float, \
2018-10-22 17:07:54 +00:00
fsize_ctl txt, \
ropen float, \
rio float, \
rclose float, \
rtotal float, \
read float, \
2018-10-22 17:07:54 +00:00
riter float, \
wopen float, \
wio float, \
wclose float, \
wtotal float, \
write float, \
2018-10-22 17:07:54 +00:00
witer float, \
primary key(filename, witer, riter) \
)'
conn.execute(tbl)
except:
print("could not create db")
for filename in glob.glob(folder + "/*"):
#print("Parsing " + filename)
parse(filename, conn)
conn.commit()
conn.close()