@inproceedings{ac372936b34a49cf9fa3e58d2129755e,
title = "Hades: A Context-Aware Active Storage Framework for Accelerating Large-Scale Data Analysis",
abstract = "Modern simulation workflows generate and analyze massive amounts of data using I/O libraries like Adios2 and NetCDF. Although extensive work has optimized the I/O processes during the simulation phase, executing analytical queries - which often require iterative traversals of large files for insights - is cumbersome and usually constrained by low I/O performance. Instead of waiting for the analysis phase to process queries, quantities can be derived asynchronously during data production and cached, speeding up future queries. In this work, we introduce a context-aware I/O layer named 'Hades.' It is designed to efficiently derive insights from selected quantities without compromising overall workflow performance. Hades actively and asynchronously computes and stores these quantities while the data is in transit. Hades leverages a hierarchical buffering system with data access-aware prefetching to ensure quick and timely access to relevant data. It offers a flexible query interface empowering users to easily define derived quantities and provide control over data placement decisions. Hades is implemented using an Adios2 plugin engine and the Hermes buffering platform, enabling transparent use by any Adios-powered application or workflow. Experimental results demonstrate performance improvements by up to 3-4x for tested real-world scientific producer-consumer workflows.",
keywords = "Active Storage, Context Awareness, Data Operator, Hierarchical Storage, In-transit Computing, Metadata Management",
author = "Jaime Cernuda and Luke Logan and Ana Gainaru and Scott Klasky and Jay Lofstead and Anthony Kougkas and Sun, {Xian He}",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE.; 24th IEEE/ACM International Symposium on Cluster, Cloud and Internet Computing, CCGrid 2024 ; Conference date: 06-05-2024 Through 09-05-2024",
year = "2024",
doi = "10.1109/CCGrid59990.2024.00070",
language = "English",
series = "Proceedings - 2024 IEEE 24th International Symposium on Cluster, Cloud and Internet Computing, CCGrid 2024",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "577--586",
booktitle = "Proceedings - 2024 IEEE 24th International Symposium on Cluster, Cloud and Internet Computing, CCGrid 2024",
}