master Julian M. Kunkel 3 years ago
parent
commit
aa86690f43
2 changed files with 56 additions and 12 deletions
1. 34
paper/main.tex
2. 34
scripts/plot.R

#### 34 paper/main.tex View File

 @ -244,9 +244,37 @@ Potentially, analyze how the rankings of different similarities look like.   \subsection{Quantitative Analysis of Selected Jobs}   \begin{table} \caption{User and Group Information} \end{table}   User count and group id is the same, meaning that a user is likely from the same group and the number of groups is identical to the number of users (unique), for Job-L user id and group count differ a bit, for Job-M a bit more. Up to about 2x users than groups.   To understand how the Top\,100 jobs are distributed across users, the data is grouped by userid and counted. \Cref{fig:userids} shows the stacked user information, where the lowest stack is the user with the most jobs and the top most user in the stack has the smallest number of jobs. For Job-S, we can see that about 70-80\% of jobs stem from one user, for the hex\_lev and hex\_native algorithms, the other jobs stem from a second user while bin includes jobs from additional users (5 in total). For Job-M, jobs from more users are included (13); about 25\% of jobs stem from the same user, here, hex\_lev and hex\_native is including more users (30 and 33, respectively) than the other three algorithms. For Job-L, the two hex algorithms include with (12 and 13) a bit more diverse user community than the bin algorithms (9) but hex\_phases covers 35 users.   \begin{figure} \begin{subfigure}{0.31\textwidth} \centering \includegraphics[width=\textwidth]{job_similarities_4296426-out/user-ids} \caption{Job-S} \label{fig:users-job-S} \end{subfigure} \begin{subfigure}{0.31\textwidth} \centering \includegraphics[width=\textwidth]{job_similarities_5024292-out/user-ids} \caption{Job-M} \label{fig:users-job-M} \end{subfigure} \begin{subfigure}{0.31\textwidth} \centering \includegraphics[width=\textwidth]{job_similarities_7488914-out/user-ids} \caption{Job-L} \label{fig:users-job-L} \end{subfigure}     \caption{User information for each jobs} \label{fig:userids} \end{figure}   \begin{figure} \begin{subfigure}{0.31\textwidth}

#### 34 scripts/plot.R View File

 @ -4,7 +4,7 @@ library(ggplot2) library(dplyr) require(scales)   plotjobs = TRUE plotjobs = FALSE   # Color scheme plotcolors <- c("#CC0000", "#FFA500", "#FFFF00", "#008000", "#9999ff", "#000066") @ -42,13 +42,6 @@ metadata$user_id = as.factor(metadata$user_id) metadata$group_id = as.factor(metadata$group_id)   plotJobs = function(jobs){  # plot details about the jobs of a given algorithm  tbl = jobData[jobData$jobid %in% jobs,]  print(summary(tbl))  #print(tbl)  md = metadata[metadata$jobid %in% jobs,]  print(summary(md))    # print the job timelines  r = e[ordered, ]   @ -61,8 +54,9 @@ plotJobs = function(jobs){ # Store the job ids in a table, each column is one algorithm dim = length(levels(data$alg_name)) count = 100 result = matrix(1:(dim*count), nrow=count, ncol=dim) result = matrix(1:(dim*count), nrow=count, ncol=dim) # will contain the job ids for the count best jobs colnames(result) = levels(data$alg_name) result.userid = tibble() # will contain the userid for the count best jobs   # Extract the 100 most similar jobs into the table for (level in levels(data$alg_name)){ @ -74,9 +68,31 @@ for (level in levels(data$alg_name)){  # Extract the data for the jobs  jobs = e[ordered,"jobid"]  result[, level] = jobs    # extract details about the jobs of a given algorithm  tbl = jobData[jobData$jobid %in% jobs,]  print(summary(tbl))  md = metadata[metadata$jobid %in% jobs,]  print(summary(md))  md$value = 1  userprofile = md %>% group_by(user_id) %>% summarise(count = sum(value))  userprofile = userprofile[order(userprofile$count, decreasing=TRUE),]  userprofile$userrank = 1:nrow(userprofile)  result.userid = rbind(result.userid, cbind(level, userprofile))    plotJobs(jobs) }   colnames(result.userid) = c("alg_name", "user_id", "count", "userrank")   print(result.userid)   # Create stacked user table ggplot(result.userid, aes(fill=userrank, y=count, x=alg_name)) + geom_bar(position="stack", stat="identity") + theme(legend.position = "none") + scale_fill_gradientn(colours=rainbow(5)) + ylab("Stacked user count") + xlab("Algorithm") # + scale_fill_gradient(low="blue", high="red", space ="Lab" ) + scale_fill_continuous(type = "viridis")   ggsave("user-ids.png", width=6, height=4)     # Compute intersection in a new table res.intersect = matrix(1:(dim*dim), nrow=dim, ncol=dim) colnames(res.intersect) = levels(data$alg_name)