@inproceedings{801d8cf7fcda4390ae94e8ea716f3a33,
title = "In-situ I/O processing: A case for location flexibility",
abstract = "Increasingly severe I/O bottlenecks on High-End Computing machines are prompting scientists to process output data during simulation time, {"}in-situ{"}, and before placing data on disks. This paper argues for flexibility in the implementation of such in-situ data analytics, using measurements and a performance model that demonstrate the potential advantages and limitations of performing analytics at different levels of the I/O hierarchy, including on a machine's compute nodes vs. on separate {"}staging{"} nodes dedicated to analysis tasks. Model and measurement results are guided by realistic large-scale applications running on leadership class machines, and I/O and analytics actions are described as computational dataflow graphs - termed I/O graphs - that combine data movement with 'in transit' operations on data as it is being moved across the I/O hierarchy. Results demonstrate the importance of flexibility in analytics placement and characterize the attributes of analytics operations that lead to different placement decisions.",
keywords = "I/O, analytics, in-situ processing, placement, staging",
author = "Fang Zheng and Hasan Abbasi and Jianting Cao and Jai Dayal and Karsten Schwan and Matthew Wolf and Scott Klasky and Norbert Podhorszki",
year = "2011",
doi = "10.1145/2159352.2159362",
language = "English",
isbn = "9781450311038",
series = "PDSW'11 - Proceedings of the 6th Parallel Data Storage Workshop, Co-located with SC'11",
pages = "37--42",
booktitle = "PDSW'11 - Proceedings of the 6th Parallel Data Storage Workshop, Co-located with SC'11",
note = "6th Parallel Data Storage Workshop, PDSW'11, Co-located with SC'11 ; Conference date: 13-11-2011 Through 13-11-2011",
}