User info added.

This commit is contained in:
Julian M. Kunkel 2020-08-20 20:39:42 +01:00
parent a0e9ed9fb1
commit aa86690f43
2 changed files with 56 additions and 12 deletions

View File

@ -244,9 +244,37 @@ Potentially, analyze how the rankings of different similarities look like.
\subsection{Quantitative Analysis of Selected Jobs} \subsection{Quantitative Analysis of Selected Jobs}
\begin{table}
\caption{User and Group Information} User count and group id is the same, meaning that a user is likely from the same group and the number of groups is identical to the number of users (unique), for Job-L user id and group count differ a bit, for Job-M a bit more.
\end{table} Up to about 2x users than groups.
To understand how the Top\,100 jobs are distributed across users, the data is grouped by userid and counted.
\Cref{fig:userids} shows the stacked user information, where the lowest stack is the user with the most jobs and the top most user in the stack has the smallest number of jobs.
For Job-S, we can see that about 70-80\% of jobs stem from one user, for the hex\_lev and hex\_native algorithms, the other jobs stem from a second user while bin includes jobs from additional users (5 in total).
For Job-M, jobs from more users are included (13); about 25\% of jobs stem from the same user, here, hex\_lev and hex\_native is including more users (30 and 33, respectively) than the other three algorithms.
For Job-L, the two hex algorithms include with (12 and 13) a bit more diverse user community than the bin algorithms (9) but hex\_phases covers 35 users.
\begin{figure}
\begin{subfigure}{0.31\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_4296426-out/user-ids}
\caption{Job-S} \label{fig:users-job-S}
\end{subfigure}
\begin{subfigure}{0.31\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_5024292-out/user-ids}
\caption{Job-M} \label{fig:users-job-M}
\end{subfigure}
\begin{subfigure}{0.31\textwidth}
\centering
\includegraphics[width=\textwidth]{job_similarities_7488914-out/user-ids}
\caption{Job-L} \label{fig:users-job-L}
\end{subfigure}
\caption{User information for each jobs}
\label{fig:userids}
\end{figure}
\begin{figure} \begin{figure}
\begin{subfigure}{0.31\textwidth} \begin{subfigure}{0.31\textwidth}

View File

@ -4,7 +4,7 @@ library(ggplot2)
library(dplyr) library(dplyr)
require(scales) require(scales)
plotjobs = TRUE plotjobs = FALSE
# Color scheme # Color scheme
plotcolors <- c("#CC0000", "#FFA500", "#FFFF00", "#008000", "#9999ff", "#000066") plotcolors <- c("#CC0000", "#FFA500", "#FFFF00", "#008000", "#9999ff", "#000066")
@ -42,13 +42,6 @@ metadata$user_id = as.factor(metadata$user_id)
metadata$group_id = as.factor(metadata$group_id) metadata$group_id = as.factor(metadata$group_id)
plotJobs = function(jobs){ plotJobs = function(jobs){
# plot details about the jobs of a given algorithm
tbl = jobData[jobData$jobid %in% jobs,]
print(summary(tbl))
#print(tbl)
md = metadata[metadata$jobid %in% jobs,]
print(summary(md))
# print the job timelines # print the job timelines
r = e[ordered, ] r = e[ordered, ]
@ -61,8 +54,9 @@ plotJobs = function(jobs){
# Store the job ids in a table, each column is one algorithm # Store the job ids in a table, each column is one algorithm
dim = length(levels(data$alg_name)) dim = length(levels(data$alg_name))
count = 100 count = 100
result = matrix(1:(dim*count), nrow=count, ncol=dim) result = matrix(1:(dim*count), nrow=count, ncol=dim) # will contain the job ids for the count best jobs
colnames(result) = levels(data$alg_name) colnames(result) = levels(data$alg_name)
result.userid = tibble() # will contain the userid for the count best jobs
# Extract the 100 most similar jobs into the table # Extract the 100 most similar jobs into the table
for (level in levels(data$alg_name)){ for (level in levels(data$alg_name)){
@ -74,9 +68,31 @@ for (level in levels(data$alg_name)){
# Extract the data for the jobs # Extract the data for the jobs
jobs = e[ordered,"jobid"] jobs = e[ordered,"jobid"]
result[, level] = jobs result[, level] = jobs
# extract details about the jobs of a given algorithm
tbl = jobData[jobData$jobid %in% jobs,]
print(summary(tbl))
md = metadata[metadata$jobid %in% jobs,]
print(summary(md))
md$value = 1
userprofile = md %>% group_by(user_id) %>% summarise(count = sum(value))
userprofile = userprofile[order(userprofile$count, decreasing=TRUE),]
userprofile$userrank = 1:nrow(userprofile)
result.userid = rbind(result.userid, cbind(level, userprofile))
plotJobs(jobs) plotJobs(jobs)
} }
colnames(result.userid) = c("alg_name", "user_id", "count", "userrank")
print(result.userid)
# Create stacked user table
ggplot(result.userid, aes(fill=userrank, y=count, x=alg_name)) + geom_bar(position="stack", stat="identity") + theme(legend.position = "none") + scale_fill_gradientn(colours=rainbow(5)) + ylab("Stacked user count") + xlab("Algorithm") # + scale_fill_gradient(low="blue", high="red", space ="Lab" ) + scale_fill_continuous(type = "viridis")
ggsave("user-ids.png", width=6, height=4)
# Compute intersection in a new table # Compute intersection in a new table
res.intersect = matrix(1:(dim*dim), nrow=dim, ncol=dim) res.intersect = matrix(1:(dim*dim), nrow=dim, ncol=dim)
colnames(res.intersect) = levels(data$alg_name) colnames(res.intersect) = levels(data$alg_name)