@inproceedings{c0cea72bd22649d283f60e6045a3c009,
title = "Enabling discovery data science through cross-facility workflows",
abstract = "Experimental and observational instruments for scientific research (such as light sources, genome sequencers, accelerators, telescopes and electron microscopes) increasingly require High Performance Computing (HPC) scale capabilities for data analysis and workflow processing. Next-generation instruments are being deployed with higher resolutions and faster data capture rates, creating a big data crunch that cannot be handled by modest institutional computing resources. Often these big data analysis pipelines also require near real-time computing and have higher resilience requirements than the simulation and modeling workloads more traditionally seen at HPC centers. While some facilities have enabled workflows to run at a single HPC facility, there is a growing need to integrate capabilities across HPC facilities to enable cross-facility workflows, either to provide resilience to an experiment, increase analysis throughput capabilities, or to better match a workflow to a particular architecture. In this paper we describe the barriers to executing complex data analysis workflows across HPC facilities and propose an architectural design pattern for enabling scientific discovery using cross-facility workflows that includes orchestration services, application programming interfaces (APIs), data access and co-scheduling.",
keywords = "big data science, containers, cross-facility workflows, data analysis, infrastructure, orchestration platforms, workflow portability",
author = "Antypas, {K. B.} and Bard, {D. J.} and Blaschke, {J. P.} and {Shane Canon}, R. and Bjoern Enders and Shankar, {Mallikarjun Arjun} and Suhas Somnath and Dale Stansberry and Uram, {Thomas D.} and Wilkinson, {Sean R.}",
note = "Publisher Copyright: {\textcopyright} 2021 IEEE.; 2021 IEEE International Conference on Big Data, Big Data 2021 ; Conference date: 15-12-2021 Through 18-12-2021",
year = "2021",
doi = "10.1109/BigData52589.2021.9671421",
language = "English",
series = "Proceedings - 2021 IEEE International Conference on Big Data, Big Data 2021",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "3671--3680",
editor = "Yixin Chen and Heiko Ludwig and Yicheng Tu and Usama Fayyad and Xingquan Zhu and Hu, {Xiaohua Tony} and Suren Byna and Xiong Liu and Jianping Zhang and Shirui Pan and Vagelis Papalexakis and Jianwu Wang and Alfredo Cuzzocrea and Carlos Ordonez",
booktitle = "Proceedings - 2021 IEEE International Conference on Big Data, Big Data 2021",
}