@inproceedings{f877200458cd46a7a863038fc30f87ed,
title = "On-demand data analytics in HPC environments at leadership computing facilities: Challenges and experiences",
abstract = "The construction of data analysis infrastructures that handle continuously accumulating data is quickly becoming an essential requirement for many organizations such as the U.S. Department of Energy (DOE). While DOE supports some of the largest computing facilities in the world, new analysis infrastructures like Apache Spark are difficult to implement. In this paper, we propose an on-demand Spark service that mitigates these difficulties, allowing facility users to flexibly create Spark instances quickly and easily. We define a systematic approach for creating these Spark instances and validate that optimal performance benefits are maintained. Using a series of benchmarks for algorithms that are commonly used in scientific workflows, we compared the behavior of Spark tasks using facility resources with that of an open research cloud that has a dedicated Spark infrastructure deployed. Finally, we leveraged a scientific use case from the Center of Nanophase Materials at the Oak Ridge National Laboratory to demonstrate the utility of using Spark in the computing facility.",
keywords = "HPC, data analytics, distributed computing",
author = "John Harney and Lim, \{Seung Hwan\} and Sreenivas Sukumar and Dale Stansberry and Peter Xenopoulos",
note = "Publisher Copyright: {\textcopyright} 2016 IEEE.; 4th IEEE International Conference on Big Data, Big Data 2016 ; Conference date: 05-12-2016 Through 08-12-2016",
year = "2016",
doi = "10.1109/BigData.2016.7840835",
language = "English",
series = "Proceedings - 2016 IEEE International Conference on Big Data, Big Data 2016",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "2087--2096",
editor = "Ronay Ak and George Karypis and Yinglong Xia and Hu, \{Xiaohua Tony\} and Yu, \{Philip S.\} and James Joshi and Lyle Ungar and Ling Liu and Aki-Hiro Sato and Toyotaro Suzumura and Sudarsan Rachuri and Rama Govindaraju and Weijia Xu",
booktitle = "Proceedings - 2016 IEEE International Conference on Big Data, Big Data 2016",
}