Merge branch 'master' of http://git.hps.vi4io.org/eugen.betke/mistral-io-datasets
This commit is contained in:
		
						commit
						98186bc0d8
					
				| @ -244,9 +244,37 @@ Potentially, analyze how the rankings of different similarities look like. | ||||
| 
 | ||||
| \subsection{Quantitative Analysis of Selected Jobs} | ||||
| 
 | ||||
| \begin{table} | ||||
| \caption{User and Group Information} | ||||
| \end{table} | ||||
| 
 | ||||
| User count and group id is the same, meaning that a user is likely from the same group and the number of groups is identical to the number of users (unique), for Job-L user id and group count differ a bit, for Job-M a bit more. | ||||
| Up to about 2x users than groups. | ||||
| 
 | ||||
| To understand how the Top\,100 jobs are distributed across users, the data is grouped by userid and counted. | ||||
| \Cref{fig:userids} shows the stacked user information, where the lowest stack is the user with the most jobs and the top most user in the stack has the smallest number of jobs. | ||||
| For Job-S, we can see that about 70-80\% of jobs stem from one user, for the hex\_lev and hex\_native algorithms, the other jobs stem from a second user while bin includes jobs from additional users (5 in total). | ||||
| For Job-M, jobs from more users are included (13); about 25\% of jobs stem from the same user, here, hex\_lev and hex\_native is including more users (30 and 33, respectively) than the other three algorithms. | ||||
| For Job-L, the two hex algorithms include with (12 and 13) a bit more diverse user community than the bin algorithms (9) but hex\_phases covers 35 users. | ||||
| 
 | ||||
| \begin{figure} | ||||
| \begin{subfigure}{0.31\textwidth} | ||||
| \centering | ||||
| \includegraphics[width=\textwidth]{job_similarities_4296426-out/user-ids} | ||||
| \caption{Job-S} \label{fig:users-job-S} | ||||
| \end{subfigure} | ||||
| \begin{subfigure}{0.31\textwidth} | ||||
| \centering | ||||
| \includegraphics[width=\textwidth]{job_similarities_5024292-out/user-ids} | ||||
| \caption{Job-M} \label{fig:users-job-M} | ||||
| \end{subfigure} | ||||
| \begin{subfigure}{0.31\textwidth} | ||||
| \centering | ||||
| \includegraphics[width=\textwidth]{job_similarities_7488914-out/user-ids} | ||||
| \caption{Job-L} \label{fig:users-job-L} | ||||
| \end{subfigure} | ||||
| 
 | ||||
| 
 | ||||
| \caption{User information for each jobs} | ||||
| \label{fig:userids} | ||||
| \end{figure} | ||||
| 
 | ||||
| \begin{figure} | ||||
| \begin{subfigure}{0.31\textwidth} | ||||
|  | ||||
| @ -4,7 +4,7 @@ library(ggplot2) | ||||
| library(dplyr) | ||||
| require(scales) | ||||
| 
 | ||||
| plotjobs = TRUE | ||||
| plotjobs = FALSE | ||||
| 
 | ||||
| # Color scheme | ||||
| plotcolors <- c("#CC0000", "#FFA500", "#FFFF00", "#008000", "#9999ff", "#000066") | ||||
| @ -42,13 +42,6 @@ metadata$user_id = as.factor(metadata$user_id) | ||||
| metadata$group_id = as.factor(metadata$group_id) | ||||
| 
 | ||||
| plotJobs = function(jobs){ | ||||
|     # plot details about the jobs of a given algorithm | ||||
|     tbl = jobData[jobData$jobid %in% jobs,] | ||||
|     print(summary(tbl)) | ||||
|     #print(tbl) | ||||
|     md = metadata[metadata$jobid %in% jobs,] | ||||
|     print(summary(md)) | ||||
| 
 | ||||
|     # print the job timelines | ||||
|     r = e[ordered, ] | ||||
| 
 | ||||
| @ -61,8 +54,9 @@ plotJobs = function(jobs){ | ||||
| # Store the job ids in a table, each column is one algorithm | ||||
| dim = length(levels(data$alg_name)) | ||||
| count = 100 | ||||
| result = matrix(1:(dim*count), nrow=count, ncol=dim) | ||||
| result = matrix(1:(dim*count), nrow=count, ncol=dim) # will contain the job ids for the count best jobs | ||||
| colnames(result) = levels(data$alg_name) | ||||
| result.userid = tibble() # will contain the userid for the count best jobs | ||||
| 
 | ||||
| # Extract the 100 most similar jobs into the table | ||||
| for (level in levels(data$alg_name)){ | ||||
| @ -74,9 +68,31 @@ for (level in levels(data$alg_name)){ | ||||
|     # Extract the data for the jobs | ||||
|     jobs = e[ordered,"jobid"] | ||||
|     result[, level] = jobs | ||||
| 
 | ||||
|     # extract details about the jobs of a given algorithm | ||||
|     tbl = jobData[jobData$jobid %in% jobs,] | ||||
|     print(summary(tbl)) | ||||
|     md = metadata[metadata$jobid %in% jobs,] | ||||
|     print(summary(md)) | ||||
|     md$value = 1 | ||||
|     userprofile = md %>% group_by(user_id) %>% summarise(count = sum(value)) | ||||
|     userprofile = userprofile[order(userprofile$count, decreasing=TRUE),] | ||||
|     userprofile$userrank = 1:nrow(userprofile) | ||||
|     result.userid = rbind(result.userid, cbind(level, userprofile)) | ||||
| 
 | ||||
|     plotJobs(jobs) | ||||
| } | ||||
| 
 | ||||
| colnames(result.userid) = c("alg_name", "user_id", "count", "userrank") | ||||
| 
 | ||||
| print(result.userid) | ||||
| 
 | ||||
| # Create stacked user table | ||||
| ggplot(result.userid, aes(fill=userrank, y=count, x=alg_name)) + geom_bar(position="stack", stat="identity") + theme(legend.position = "none") + scale_fill_gradientn(colours=rainbow(5)) + ylab("Stacked user count") + xlab("Algorithm") # + scale_fill_gradient(low="blue", high="red", space ="Lab" ) + scale_fill_continuous(type = "viridis") | ||||
| 
 | ||||
| ggsave("user-ids.png", width=6, height=4) | ||||
| 
 | ||||
| 
 | ||||
| # Compute intersection in a new table | ||||
| res.intersect = matrix(1:(dim*dim), nrow=dim, ncol=dim) | ||||
| colnames(res.intersect) = levels(data$alg_name) | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user