@inproceedings{e6a4b2457ce74309af049b0e18d9f719,
title = "Efficient Data Management in Neutron Scattering Data Reduction Workflows at ORNL",
abstract = "Oak Ridge National Laboratory (ORNL) experimental neutron science facilities produce 1.2 TB a day of raw event-based data that is stored using the standard metadata-rich NeXus schema built on top of the HDF5 file format. Performance of several data reduction workflows is largely determined by the amount of time spent on the loading and processing algorithms in Mantid, an open-source data analysis framework used across several neutron sciences facilities around the world. The present work introduces new data management algorithms to address identified input output (I/O) bottlenecks on Mantid. First, we introduce an in-memory binary-tree metadata index that resemble NeXus data access patterns to provide a scalable search and extraction mechanism. Second, data encapsulation in Mantid algorithms is optimally redesigned to reduce the total compute and memory runtime footprint associated with metadata I/O reconstruction tasks. Results from this work show speed ups in wall-clock time on ORNL data reduction workflows, ranging from 11% to 30% depending on the complexity of the targeted instrument-specific data. Nevertheless, we highlight the need for more research to address reduction challenges as experimental data volumes increase.",
keywords = "HDF5, Mantid, NeXus, data management, experimental data, indexing, metadata, neutron scattering, reduction workflows",
author = "Godoy, {William F.} and Peterson, {Peter F.} and Hahn, {Steven E.} and Billings, {Jay J.}",
note = "Publisher Copyright: {\textcopyright} 2020 IEEE.; 8th IEEE International Conference on Big Data, Big Data 2020 ; Conference date: 10-12-2020 Through 13-12-2020",
year = "2020",
month = dec,
day = "10",
doi = "10.1109/BigData50022.2020.9377836",
language = "English",
series = "Proceedings - 2020 IEEE International Conference on Big Data, Big Data 2020",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "2674--2680",
editor = "Xintao Wu and Chris Jermaine and Li Xiong and Hu, {Xiaohua Tony} and Olivera Kotevska and Siyuan Lu and Weijia Xu and Srinivas Aluru and Chengxiang Zhai and Eyhab Al-Masri and Zhiyuan Chen and Jeff Saltz",
booktitle = "Proceedings - 2020 IEEE International Conference on Big Data, Big Data 2020",
}