@inproceedings{33a5ffb732ab4b97996381684ece8bf4,
title = "Creating a Tools Ecosystem for Cross-Discipline Environmental Data Reuse",
abstract = "Reusing data is difficult even within well-defined science communities and only gets worse when combining data from multiple communities and disciplines. Through the lens of current work on constructing an environmental epidemiological data set from multiple disciplinary sources, we demonstrate the need for a new tool ecosystem to support heterogeneous Big Data science. Extending existing community standards for schemas and/or data formats through human auditing and wrangling of the data is not feasible at scale. This work therefore suggests new approaches for the multi-disciplinary communities to build a shared tool ecosystem for big data. We discuss both the larger context of data wrangling of epidemiological data sets for novel artificial intelligence algorithms and the specific lessons from working with these multi-disciplinary data sets. Adopting a more model-driven, automatable approach promises not only better efficiency but also removes key sources of human-generated errors and promotes reuse and reproducibility of science data.",
keywords = "data wrangling, domain-specific modeling, spatial time-series data",
author = "Jeremy Logan and Greeshma Agasthya and Heidi Hanson and Matthew Wolf and Heechan Lee and Shaheen Dewji and Yoon, {Hong Jun} and Anuj Kapadia",
note = "Publisher Copyright: {\textcopyright} 2021 IEEE.; 2021 IEEE International Conference on Big Data, Big Data 2021 ; Conference date: 15-12-2021 Through 18-12-2021",
year = "2021",
doi = "10.1109/BigData52589.2021.9671683",
language = "English",
series = "Proceedings - 2021 IEEE International Conference on Big Data, Big Data 2021",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "3705--3708",
editor = "Yixin Chen and Heiko Ludwig and Yicheng Tu and Usama Fayyad and Xingquan Zhu and Hu, {Xiaohua Tony} and Suren Byna and Xiong Liu and Jianping Zhang and Shirui Pan and Vagelis Papalexakis and Jianwu Wang and Alfredo Cuzzocrea and Carlos Ordonez",
booktitle = "Proceedings - 2021 IEEE International Conference on Big Data, Big Data 2021",
}