@inproceedings{ca86a52462364bfbbd2d020ec859abe1,
title = "Integrating apache spark Into PBS-Based HPC environments",
abstract = "This paper describes an effort at the University of Tennessee's National Institute for Computational Sciences (NIC- S) to integrate Apache Spark into the widely used TORQUE HPC batch environment. The similarities and differences between the execution of a Spark program and that of an MPI program on a cluster are used to motivate how to implement Spark/TORQUE integration. An implementation of this integration, pbs-spark-submit, is described, including demonstrations of functionality on two HPC clusters and a large shared-memory system.",
keywords = "Apache spark, Batch processing, Data analytics, NICS, PBS, TORQUE",
author = "Troy Baer and Paul Peltz and Junqi Yin and Edmon Begoli",
note = "Publisher Copyright: Copyright {\textcopyright} 2015 ACM.; 4th Annual Conference on Extreme Science and Engineering Discovery Environment, XSEDE 2015 ; Conference date: 26-07-2015 Through 30-07-2015",
year = "2015",
month = jul,
day = "26",
doi = "10.1145/2792745.2792779",
language = "English",
series = "ACM International Conference Proceeding Series",
publisher = "Association for Computing Machinery",
booktitle = "Proceedings of the XSEDE 2015 Conference",
}