Compare commits

...

4 Commits

5 changed files with 193 additions and 39 deletions

BIN
.DS_Store vendored 100644

Binary file not shown.

1
.gitignore vendored
View File

@ -1,4 +1,5 @@
# ---> TeX
*.pdf
## Core latex/pdflatex auxiliary files:
*.aux
*.lof

View File

@ -481,7 +481,8 @@ breaklinks=true,
\begin{titlepage}
\begin{center}
\textsc{\LARGE Research Proposal}\\[0.5cm] % Thesis type
\textsc{\LARGE Research Proposal}\\[0.2cm] % Thesis type
\textsc{\degreename}\\[0.5cm]
{\scshape submitted to the \par}

View File

@ -1,6 +1,107 @@
@book{lamport1994latex,
title={{LATEX: a document preparation system: user's guide and reference manual}},
author={Lamport, Leslie},
year={1994},
publisher={Addison-wesley}
% Encoding: UTF-8
@Book{lamport1994,
title = {{LATEX: a document preparation system: user's guide and reference manual}},
publisher = {Addison-wesley},
year = {1994},
author = {Lamport, Leslie},
}
@Article{Jenkins2017,
author = {Jenkins, John and Shipman, Galen and Mohd-Yusof, Jamaludin and Barros, Kipton and Carns, Philip and Ross, Robert},
title = {{A Case Study in Computational Caching Microservices for HPC}},
year = {2017},
doi = {10.1109/IPDPSW.2017.40},
place = {United States},
}
@Article{Joab2018,
author = {Joab Jackson},
title = {{Containers for High Performance Computing}},
year = {2018},
url = {https://thenewstack.io/roadmap-containers-for-high-performance-computing/},
}
@Article{Fatema2017,
author = {Fatéma, Zahra and Benchara, Fatéma Zahra and Mohamed, Youssfi and Bouattane, Omar and Serrar, Ouafae and Hassan, Ouajji},
title = {Toward a New Massively Distributed Virtual Machine based Cloud Micro-Services Team Model for HPC: SPMD Applications},
journal = {International Journal of Advanced Computer Science and Applications},
year = {2017},
volume = {8},
pages = {238-249},
month = {08},
doi = {10.14569/IJACSA.2017.080831},
}
@InProceedings{Cheptsov2016,
author = {A. Cheptsov},
title = {{Dynamic Approach to Scheduling Reconfigurable Scientific Workflows in Heterogeneous HPC Environments}},
booktitle = {2016 10th International Conference on Complex, Intelligent, and Software Intensive Systems (CISIS)},
year = {2016},
pages = {7-14},
month = {July},
doi = {10.1109/CISIS.2016.146},
keywords = {adaptive scheduling;dynamic scheduling;parallel processing;resource allocation;core scheduling algorithms;Maui;Torque;in-depth monitoring technologies;adaptive scheduling;heuristic-based approach;infrastructure-specific policies;application-specific policies;optimization policies;scheduled application characteristics;heterogeneous resource allocation;high performance computing infrastructures;heterogeneous HPC environments;reconfigurable scientific workflows;dynamic scheduling;Dynamic scheduling;Resource management;Processor scheduling;Energy consumption;Job shop scheduling;Optimization;scheduling; performance; workflow; monitoring; heuristics; DreamCloud},
}
@InProceedings{Benchara2016,
author = {F. Z. Benchara and M. Youssfi and O. Bouattane and H. Ouajji},
title = {{A new efficient distributed computing middleware based on cloud micro-services for HPC}},
booktitle = {2016 5th International Conference on Multimedia Computing and Systems (ICMCS)},
year = {2016},
pages = {354-359},
month = {Sept},
doi = {10.1109/ICMCS.2016.7905644},
issn = {2472-7652},
keywords = {Big Data;cloud computing;middleware;parallel processing;resource allocation;distributed computing middleware;HPC;high-performance computing-based cloud microservices;massively-parallel-distributed computational system;Big Data;cooperative microservice team work model;distributed microservices;microservice virtual processing units;MsVPU;integrated load balancing service;AMQP communication protocol;distributed computational scheme;integrated middleware;Computational modeling;Cloud computing;Load management;Load modeling;Nickel;Parallel and Distributed Computing;Middleware;Micro-services;Cloud Computing;Load balancing;High Performance Computing},
}
@Book{Rajasekar2015,
title = {{The Integrated Rule-Oriented Data System (iRODS 4.0) Microservice Workbook}},
publisher = {CreateSpace Independent Publishing Platform},
year = {2015},
author = {Rajasekar, Arcot and Russell, Terrell and Coposky, Jason and de Torcy, Antoine and Xu, Hao and Wan, Michael and Moore, Reagan W. and Schroeder, Wayne and Chen, Sheau-Yen and Conway, Mike and Ward, Jewel H.},
address = {USA},
edition = {1st},
isbn = {1511732776, 9781511732772},
}
@InProceedings{Arne2016,
author = {Arne Johanson and Sascha Fl{\"o}gel and Christian Dullo and Wilhelm Hasselbring},
title = {{OceanTEA: Exploring Ocean-Derived Climate Data Using Microservices}},
booktitle = {Proceedings of the Sixth International Workshop on Climate Informatics (CI 2016)},
year = {2016},
series = {NCAR Technical Note NCAR/TN},
pages = {25--28},
month = {September},
abstract = {Ocean observation systems gather an increasing
amount of climate-relevant time series data. To interactively explore and analyze such high-dimensionaldatasets, we developed the software OceanTEA. Our open-source tool leverages modern web technology to support interactive data visualization, spatial analysis of current patterns, and temporal pattern discovery via machine learning methods. The microservice architecture of OceanTEA ensures a maintainable implementation that seamlessly scales from desktop computers to cloud computing infrastructure.},
keywords = {Climate Data, Microservices },
url = {http://eprints.uni-kiel.de/34758/},
}
@Article{Johanson2018,
author = {A. Johanson and W. Hasselbring},
title = {{Software Engineering for Computational Science: Past, Present, Future}},
journal = {Computing in Science Engineering},
year = {2018},
pages = {1-1},
issn = {1521-9615},
doi = {10.1109/MCSE.2018.108162940},
keywords = {Software;Scientific computing;Software engineering;Computational modeling;Computers;Productivity;Object recognition;scientific software development;domain-specific languages;software performance engineering;software testing;requirements engineering},
}
@Article{Orcutt2015,
author = {{Orcutt}, J.~A. and {Rajasekar}, A. and {Moore}, R.~W. and {Vernon}, F.},
title = {{Workflow-Oriented Cyberinfrastructure for Sensor Data Analytics}},
journal = {AGU Fall Meeting Abstracts},
year = {2015},
pages = {IN31C-1778},
month = dec,
adsnote = {Provided by the SAO/NASA Astrophysics Data System},
adsurl = {http://adsabs.harvard.edu/abs/2015AGUFMIN31C1778O},
eid = {IN31C-1778},
keywords = {1908 Cyberinfrastructure, INFORMATICS, 1910 Data assimilation, integration and fusion, INFORMATICS, 1920 Emerging informatics technologies, INFORMATICS, 1998 Workflow, INFORMATICS},
}
@Comment{jabref-meta: databaseType:bibtex;}

View File

@ -1,80 +1,131 @@
\documentclass{ResearchProposal} % The class file specifying the document structure
\thesistitle[Optional Short Title]{MyThesis}
\author{MyFirstname \textsc{MyLastname}}
\thesistitle{Potential of Microservices in HPC}
\author{Kyle \textsc{ Spindler}}
\supervisor{Dr. Julian \textsc{Kunkel}} % Your prospect supervisor's name (if known already), leave empty if you are looking for one
\university{University of Reading} % The university you apply for
\department{Department of Computer Science} % The department's name
\group{\href{http://hps.vi4io.org}{High-Performance Storage}} % The research area/group
\keywords{X, Y, Z} % Use a few describing the thesis better
\degree{for a PhD at a distance in part-time}
\group{\href{http://hps.vi4io.org}{High-Performance, Storage Software Architectures}} % The research area/group
\keywords{Microservice, Serverless, HPC, Storage} % Use a few describing the thesis better
\RequirePackage{todonotes}
\newcommand{\jk}[1]{\todo[inline]{JK: #1}}
\addbibresource{example.bib}
\startMain
\textit{Please also check \url{http://www.reading.ac.uk/computer-science/dcs-PhD-programmes.aspx}.}
\section{Motivation}
\textit{What makes the research topic of interest and importance}
Software Architecture involves considering multiple characteristics such as separation of concerns, quality attributes (maintainability, scalability, loose coupling, high cohesion etc...) and architectural styles. Some architecture styles are more suited for performance while others are better at maintainability and loose coupling like microservices.
Microservices is a very popular architecture that is used in many domains because of the benefits it offers.
\medskip
Scientific codes suffer from good software engineering practices. HPC and store applications are typically tightly coupled to utilise the available resources efficiently. While it is claimed that this provides the best performance, the benefit and drawbacks of alternative software architectures for HPC software is not thoroughly investigated. Microservices, for example, provide a scalable architecture and ease the software development process by providing separation of concerns by applying techniques from Domain-Driven Design. When deciding a software architecture not only performance and scalability matters, but also flexibility and maintainability of the software.
\medskip
In this regard, the HPC community struggles to recruit sufficient developers to keep up with the development of software which can often be seen in important utility tools. For example, existing tools for pre/post-processing of HPC workflows and the analysis of HPC data are typically not the main focus of scientists and developers; hence, they are implemented in a way that shows limited scalability, i.e., are executed sequentially in bash scripts.
\section{Research question}
\textit{The main research question(s) that you want to address.}
\textit{Provide one sentence that gives an overview of the topic, how would you describe your thesis to a computer scientist?}
Understand the impact of modern day software architectures (microservices, event driven) has on HPC and particularly the climate/weather domain
Example: The goal of this thesis is to understand and optimize the performance behavior for large-scale data accesses in the domain of climate and weather.
The goal of this thesis is to see if HPC applications and storage systems can be redeveloped using modern day software architecture such as microservices with minimal or no overhead while gaining the benefits from the loosely coupled architecture.
\textit{Now split the research goal into questions}
\smallskip
This covers the research questions:
\begin{enumerate}
\item What workflows are limited by I/O?
\item Which I/O operations are typically performed?
\item Which optimizations are beneficial for the workflows on HPC systems?
\item What parts of the HPC and Storage Solution could benefit from microservices or other software architectures?
\item How to make HPC and Storage Solution more maintainable, scaleable, loosely couple, more cohesion and more independent?
\item What areas within the HPC and Storage Solution that could improve it's efficiency through the use of Software Architecture?
\end{enumerate}
\section{Related work}
\textit{How your thinking builds on any previous work.}
\medskip
Relevant related work can be classified into:
\begin{enumerate}
\item The usage of microservices in different disciplines and particularly HPC
\item Software engineering and software architectures in HPC
\item Performance analysis of microservices
\end{enumerate}
Relevant work can be classified into: a) LaTeX studies, b) performance analysis in HPC, ....
\paragraph{LaTeX studies.} It has been shown that blabla \citep{lamport1994latex}.
\paragraph{1. The usage of microservices in different disciplines and particularly HPC.}
Although microservices are found in HPC applications the majority of research found has applied microservices in storage \citep{Orcutt2015}, pre/post processing, middleware, scheduling, workflow and caching services that wrap around the main HPC processing. Also storage services like iRODS harness microservices, however, these systems are not as performance-critical as a typical HPC application which might be one of the reasons why iRODS is not used in HPC environments.
\paragraph{Performance analysis.}
\paragraph{2. Software engineering and software architectures in HPC.} Software engineering has a focus on of having maintainable code using various designs, patterns and principles which has an influence on the software architecture \citep{Johanson2018}. Trade offs are an important decision making process when selecting an architecture, so by having an architecture that is close in performance to a traditional HPC with an increase of maintainability as opposed to an application with high performance an no maintainability might be worth that particular trade off \citep{Jenkins2017}. Other attributes may be included in the trade off besides performance such as security \citep{Joab2018}.
\paragraph{3. Performance analysis of microservices}
The loosely coupled architecture has an influence on the performance aspect of this analysis, as shown, for example, in \citep{Fatema2017}
Some preliminary analysis of RESTful services exist, but this is only one potential framework for microservices.
% \medskip
%
% \paragraph{Other areas within an HPC and storage environment where microservices has been used:}
%
% \paragraph{Caching Service.} Microservices were used as a distributed interpolation-based memoization cache \citep{Jenkins2017}.
%
% \paragraph{Containers for High Performance Computing.} Thoughts on how containers may or may not be used in HPC. Containers are commonly used in microservice architectures. \citep{Joab2018}.
%
% \paragraph{Distributed Virtual Machine Cloud Microservice for HPC:SPMD Applications} How Virtual Machines were used in a cloud based microservice architecture in an HPC environment:SPMD. \citep{Fatema2017}.
%
% \paragraph{Scheduling Scientific Workflows in HPC} How to dynamically approach to scheduling reconfigurable scientific workflows in heterogeneous HPC environments. \citep{Cheptsov2016}.
%
% \paragraph{Middleware cloud based microservice in HPC} Shows how middleware has made use of microservices within a HPC environment. \citep{Benchara2016}.
%
% \paragraph{iRODS integrated Microservice Rulebook} Shows how iRODS uses Microservices for storage. \citep{Rajasekar2015}.
%
% \paragraph{Microservices in Ocean Climate Data } Shows how Ocean-Derived Climate Data uses Microservices. \citep{Arne2016}
%
% \paragraph{Software Engineering in Computational Science} How Software Engineering practices can be used in Computational Science environments. \citep{Johanson2018}
%
% \paragraph{Workflow-Oriented Cyberinfrastructure for Sensor Data Analytics} How the use of iRODS was used with streaming sensors. \citep{Orcutt2015}
%
% \paragraph {http://eprints.uni-kiel.de/42726/1/2018-04-19GeomarDataScience.pdf}
\section{Research methodology}
\textit{What research methodology or techniques you may need to use}
Firstly, I must understand the limitations and performance characteristics of alternatives software architectures. This is performed in two steps:
\begin{enumerate}
\item Benchmarking of microservice communication protocols in contracts to HPC communication path.
\item Modelling of systems with different hardware / software architectures
\end{enumerate}
\section{Required infrastructure}
\textit{What facilities you are likely to require to conduct your research.}
Next, from HPC use cases and scenarios must be derived and characterised with their pros/cons and performance characteristics.
Finally, to prove the expectations, a prototype of selected applications must be made and compared to native applications. This may involve to adjust an existing package or create a new application that behaves similar but has limited feature set (so called mini-app).
\medskip
This research requires a supercomputer with more than 100 nodes to run experiments on.
An orthogonal aspect is to conduct surveys with scientists / scientific developers to understand the reason for the architectural choices made and identify strategies to adjust the existing practice.
\section{Required infrastructure}
Access to HPC systems to benchmark existing software and the developed prototypes. Access to the scientific network to foster discussions and explore with scientists in co-development the benefits of the prototypes.
These requirements will be addressed by my supervisor -- there is no special requirement for the university to provide any of this infrastructure.
\section{Workplan}
\textit{How the research can be completed in the time available. Provide a rough sketch over the runtime of your PhD}
The following sketches the workplan for the different years of the PhD.
The following sketches the tentative workplan for the different years of the PhD.
\paragraph{First year:} setup of work environment, researching related work, writing the chapters introduction and related work of the thesis.
\paragraph{First year:} Setup of work environment, researching related work, start investigating system under study, send out surveys, analyse system under study -- performing the performance analysis of the microservices architecture and frameworks to realize them, writing the chapters introduction and related work of the thesis.
\paragraph{Second year:}
\paragraph{Second year:} Continue researching related work, add more details in thesis based on previous findings, deriving performance models for microservices, start implementing a prototype for proofing the model.
\paragraph{Third year:}
\paragraph{Third year:} Continue implementing the prototype, add quality assurance and testing to prototype, continue researching, improve the model, add design details to thesis.
\paragraph{Fourth year:} Conclude benchmark tests of benchmarks and mini-applications, adjust prototype, add benchmark findings into thesis, provide critical thinking.
\paragraph{Fifth year:} Finalise thesis, prepare thesis for final stages and delivery.
\proposalAppendix
Add here any appendix, if needed
%Add here any appendix, if needed
\printbibliography[heading=bibintoc]