@inproceedings{d4a9a4bb16cd43dca9055fe96c1a233f,
title = "Domain-Specific Type-Safe APIs for Hierarchical Scientific Data with Modern C++",
abstract = "General-purpose library application programming interfaces (APIs) for self-describing hierarchical scientific data storage, such as the HDF5 and NetCDF libraries, are traditionally of runtime nature. Runtime errors for entry existence and data types are typically caught later in the development process of higher-level application-specific APIs. In this paper, we propose exploiting modern C++ metaprogramming features to add compile-time type-safety to improve the interaction with a well-defined metadata-rich scientific schema in domain-specific hierarchical datasets. We tackle two aspects of common use: (i) direct data access, (ii) flexible “in-memory” index models for efficient search and data processing. The proposed APIs use C++17{\textquoteright}s template type auto deduction features, C++11{\textquoteright}s enum class for type-safety and C-style preprocessor macros for generative templated code. We showcase the pros and cons of our initial work on the standard NeXus schema used for annotating and storing experimental neutron scattering data at several facilities around the world on top of HDF5. Extendable compile-time type-safe APIs are a desirable feature that could be indexed by any modern integrated development environment (IDE). Hence, such APIs can help ease the learning curve for domain scientists using a less error-prone software interaction to enhance the findability of their data without resorting to a domain-specific language (DSL).",
keywords = "C++, FAIR scientific data, HDF5, Template metaprogramming, Type-safe API",
author = "Godoy, {William F.} and Thakur, {Addi Malviya} and Hahn, {Steven E.}",
note = "Publisher Copyright: {\textcopyright} 2022, This is a U.S. government work and not under copyright protection in the U.S.; foreign copyright protection may apply.; 7th International Conference on Data Science and Engineering, ICDSE 2021 ; Conference date: 17-12-2021 Through 18-12-2021",
year = "2022",
doi = "10.1007/978-981-19-4453-6_14",
language = "English",
isbn = "9789811944529",
series = "Lecture Notes in Electrical Engineering",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "191--204",
editor = "Jimson Mathew and {Santhosh Kumar}, G. and Deepak Padmanabhan and Jose, {Joemon M.}",
booktitle = "Responsible Data Science - Select Proceedings of ICDSE 2021",
}