Renamed
This commit is contained in:
parent
65f8cdb98d
commit
ea893d76f0
|
@ -23,14 +23,14 @@ prepare
|
|||
|
||||
for I in job_similarities_*.csv ; do
|
||||
rm *.png *.pdf
|
||||
./scripts/plot.R $I > description.txt
|
||||
echo "processing $I"
|
||||
./scripts/plot.R $I > description.txt 2>&1
|
||||
OUT=${I%%.csv}-out
|
||||
mkdir $OUT
|
||||
if [[ $CLEAN != "0" ]] ; then
|
||||
rm $OUT/*
|
||||
mv description.txt $OUT
|
||||
fi
|
||||
mv *.png *.pdf jobs-*.txt $OUT
|
||||
mv description.txt *.png *.pdf jobs-*.txt $OUT
|
||||
done
|
||||
|
||||
# analyze peformance data
|
||||
|
|
|
@ -0,0 +1,154 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import csv
|
||||
import sys
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
from pandas import Grouper
|
||||
import seaborn as sns
|
||||
from matplotlib import pyplot
|
||||
import matplotlib.cm as cm
|
||||
|
||||
jobs = sys.argv[1].split(",")
|
||||
prefix = sys.argv[2].split(",")
|
||||
|
||||
fileformat = ".png"
|
||||
|
||||
print("Plotting the job: " + str(sys.argv[1]))
|
||||
print("Plotting with prefix: " + str(sys.argv[2]))
|
||||
|
||||
|
||||
# Color map
|
||||
colorMap = { "md_file_create": cm.tab10(0),
|
||||
"md_file_delete": cm.tab10(1),
|
||||
"md_mod": cm.tab10(2),
|
||||
"md_other": cm.tab10(3),
|
||||
"md_read": cm.tab10(4),
|
||||
"read_bytes": cm.tab10(5),
|
||||
"read_calls": cm.tab10(6),
|
||||
"write_bytes": cm.tab10(7),
|
||||
"write_calls": cm.tab10(8)
|
||||
}
|
||||
|
||||
markerMap = { "md_file_create": "^",
|
||||
"md_file_delete": "v",
|
||||
"md_other": ".",
|
||||
"md_mod": "<",
|
||||
"md_read": ">",
|
||||
"read_bytes": "h",
|
||||
"read_calls": "H",
|
||||
"write_bytes": "D",
|
||||
"write_calls": "d"
|
||||
}
|
||||
|
||||
linestyleMap = { "md_file_create": ":",
|
||||
"md_file_delete": ":",
|
||||
"md_mod": ":",
|
||||
"md_other": ":",
|
||||
"md_read": ":",
|
||||
"read_bytes": "--",
|
||||
"read_calls": "--",
|
||||
"write_bytes": "-.",
|
||||
"write_calls": "-."
|
||||
}
|
||||
|
||||
# Plot the timeseries
|
||||
def plot(prefix, header, row):
|
||||
x = { h : d for (h, d) in zip(header, row)}
|
||||
jobid = x["jobid"]
|
||||
del x["jobid"]
|
||||
result = []
|
||||
for k in x:
|
||||
timeseries = x[k].split(":")
|
||||
timeseries = [ float(x) for x in timeseries]
|
||||
if sum(timeseries) == 0:
|
||||
continue
|
||||
timeseries = [ [k, x, s] for (s,x) in zip(timeseries, range(0, len(timeseries))) ]
|
||||
result.extend(timeseries)
|
||||
|
||||
if len(result) == 0:
|
||||
print("Empty job! Cannot plot!")
|
||||
return
|
||||
|
||||
data = DataFrame(result, columns=["metrics", "segment", "value"])
|
||||
groups = data.groupby(["metrics"])
|
||||
metrics = DataFrame()
|
||||
labels = []
|
||||
colors = []
|
||||
style = []
|
||||
for name, group in groups:
|
||||
style.append(linestyleMap[name] + markerMap[name])
|
||||
colors.append(colorMap[name])
|
||||
if name == "md_file_delete":
|
||||
name = "file_delete"
|
||||
if name == "md_file_create":
|
||||
name = "file_create"
|
||||
try:
|
||||
metrics[name] = pd.Series([x[2] for x in group.values])
|
||||
except:
|
||||
print("Error processing %s with" % jobid)
|
||||
print(group.values)
|
||||
return
|
||||
|
||||
labels.append(name)
|
||||
|
||||
fsize = (8, 1 + 1.1 * len(labels))
|
||||
fsizeFixed = (8, 2)
|
||||
fsizeHist = (8, 4)
|
||||
|
||||
pyplot.close('all')
|
||||
|
||||
if len(labels) < 4 :
|
||||
ax = metrics.plot(legend=True, sharex=True, grid = True, sharey=True, markersize=10, figsize=fsizeFixed, color=colors, style=style)
|
||||
ax.set_ylabel("Value")
|
||||
else:
|
||||
ax = metrics.plot(subplots=True, legend=False, sharex=True, grid = True, sharey=True, markersize=10, figsize=fsize, color=colors, style=style)
|
||||
for (i, l) in zip(range(0, len(labels)), labels):
|
||||
ax[i].set_ylabel(l)
|
||||
|
||||
pyplot.xlabel("Segment number")
|
||||
pyplot.savefig(prefix + "timeseries" + jobid + fileformat, bbox_inches='tight', dpi=150)
|
||||
|
||||
# Create a facetted grid
|
||||
#g = sns.FacetGrid(tips, col="time", margin_titles=True)
|
||||
#bins = np.linspace(0, 60, 13)
|
||||
#g.map(plt.hist, "total_bill", color="steelblue", bins=bins)
|
||||
|
||||
ax = metrics.hist(sharex=True, grid = True, sharey=True, figsize=fsizeHist, bins=10)
|
||||
pyplot.savefig(prefix + "hist" + jobid + fileformat, bbox_inches='tight', dpi=150)
|
||||
|
||||
|
||||
# Plot first 30 segments
|
||||
if len(timeseries) <= 50:
|
||||
return
|
||||
|
||||
if len(labels) < 4 :
|
||||
ax = metrics.plot(legend=True, xlim=(0,30), sharex=True, grid = True, sharey=True, markersize=10, figsize=fsizeFixed, color=colors, style=style)
|
||||
ax.set_ylabel("Value")
|
||||
else:
|
||||
ax = metrics.plot(subplots=True, xlim=(0,30), legend=False, sharex=True, grid = True, sharey=True, markersize=10, figsize=fsize, color=colors, style=style)
|
||||
for (i, l) in zip(range(0, len(labels)), labels):
|
||||
ax[i].set_ylabel(l)
|
||||
|
||||
pyplot.xlabel("Segment number")
|
||||
pyplot.savefig(prefix + "timeseries" + jobid + "-30" + fileformat, bbox_inches='tight', dpi=150)
|
||||
|
||||
### end plotting function
|
||||
|
||||
|
||||
|
||||
#with open('job-io-datasets/datasets/job_codings.csv') as csv_file: # EB: old codings
|
||||
with open('./datasets/job_codings_v4.csv') as csv_file: # EB: v3 codings moved to this repo
|
||||
csv_reader = csv.reader(csv_file, delimiter=',')
|
||||
line_count = 0
|
||||
for row in csv_reader:
|
||||
if line_count == 0:
|
||||
header = row
|
||||
line_count += 1
|
||||
continue
|
||||
job = row[0].strip()
|
||||
if not job in jobs:
|
||||
continue
|
||||
else:
|
||||
index = jobs.index(job)
|
||||
plot(prefix[index] + "-ks-" + str(index), header, row)
|
|
@ -7,7 +7,7 @@ library(stringi)
|
|||
library(stringr)
|
||||
|
||||
# Turn to TRUE to print indivdiual job images
|
||||
plotjobs = FALSE
|
||||
plotjobs = TRUE
|
||||
|
||||
# Color scheme
|
||||
plotcolors <- c("#CC0000", "#FFA500", "#FFFF00", "#008000", "#9999ff", "#000099")
|
||||
|
@ -28,7 +28,7 @@ cat(nrow(data))
|
|||
|
||||
# empirical cumulative density function (ECDF)
|
||||
data$sim = data$similarity*100
|
||||
ggplot(data, aes(sim, color=alg_name, group=alg_name)) + stat_ecdf(geom = "step") + xlab("Similarity in %") + ylab("Fraction of jobs") + theme(legend.position=c(0.9, 0.4)) + scale_color_brewer(palette = "Set2") + scale_x_log10()
|
||||
ggplot(data, aes(sim, color=alg_name, group=alg_name)) + stat_ecdf(geom = "step") + xlab("Similarity in %") + ylab("Fraction of jobs") + theme(legend.position=c(0.05, 0.5), legend.title = element_blank()) + scale_color_brewer(palette = "Set2") + scale_x_log10()
|
||||
ggsave("ecdf.png", width=8, height=2.5)
|
||||
|
||||
# histogram for the jobs
|
||||
|
@ -47,13 +47,21 @@ metadata = read.csv("./datasets/job_metadata.csv") # EB: is ebenfalls im Repo
|
|||
metadata$user_id = as.factor(metadata$user_id)
|
||||
metadata$group_id = as.factor(metadata$group_id)
|
||||
|
||||
plotJobs = function(jobs){
|
||||
plotJobs = function(algorithm, jobs){
|
||||
# print the job timelines
|
||||
r = e[ordered, ]
|
||||
|
||||
if (plotjobs) {
|
||||
if(algorithm == "ks"){
|
||||
script = "./scripts/plot-job-timelines-ks.py"
|
||||
}else{
|
||||
script = "./scripts/plot-job-timelines.py"
|
||||
return(0) ### FIXME
|
||||
}
|
||||
prefix = do.call("sprintf", list("%s-%.4f-", level, r$similarity))
|
||||
system(sprintf("./scripts/plot-single-job.py %s %s", paste(r$jobid, collapse=","), paste(prefix, collapse=",")))
|
||||
call = sprintf("%s %s %s", script, paste(r$jobid, collapse=","), paste(prefix, collapse=","))
|
||||
print(call)
|
||||
system(call)
|
||||
}
|
||||
|
||||
system(sprintf("./scripts/extract-conf-data.sh %s > jobs-%s.txt", paste(r$jobid, collapse=" "), level))
|
||||
|
@ -88,7 +96,7 @@ for (level in levels(data$alg_name)){
|
|||
userprofile$userrank = 1:nrow(userprofile)
|
||||
result.userid = rbind(result.userid, cbind(level, userprofile))
|
||||
|
||||
plotJobs(jobs)
|
||||
plotJobs(level, jobs)
|
||||
}
|
||||
|
||||
colnames(result.userid) = c("alg_name", "user_id", "count", "userrank")
|
||||
|
|
Loading…
Reference in New Issue