@inproceedings{e9c98614691546acb58ec2aa2374c7f5,
title = "EDO: Improving read performance for scientific applications through elastic data organization",
abstract = "Large scale scientific applications are often bottlenecked due to the writing of checkpoint-restart data. Much work has been focused on improving their write performance. With the mounting needs of scientific discovery from these datasets, it is also important to provide good read performance for many common access patterns, which requires effective data organization. To address this issue, we introduce Elastic Data Organization (EDO), which can transparently enable different data organization strategies for scientific applications. Through its flexible data ordering algorithms, EDO harmonizes different access patterns with the underlying file system. Two levels of data ordering are introduced in EDO. One works at the level of data groups (a.k.a process groups). It uses Hilbert Space Filling Curves (SFC) to balance the distribution of data groups across storage targets. Another governs the ordering of data elements within a data group. It divides a data group into sub chunks and strikes a good balance between the size of sub chunks and the number of seek operations. Our experimental results demonstrate that EDO is able to achieve balanced data distribution across all dimensions and improve the read performance of multidimensional datasets in scientific applications.",
keywords = "ADIOS, Data Organization, Parallel I/O, Planar Read Patterns, Space Filling Curve",
author = "Yuan Tian and Scott Klasky and Hasan Abbasi and Jay Lofstead and Ray Grout and Norbert Podhorszki and Qing Liu and Yandong Wang and Weikuan Yu",
year = "2011",
doi = "10.1109/CLUSTER.2011.18",
language = "English",
isbn = "9780769545165",
series = "Proceedings - IEEE International Conference on Cluster Computing, ICCC",
pages = "93--102",
booktitle = "Proceedings - 2011 IEEE International Conference on Cluster Computing, CLUSTER 2011",
note = "2011 IEEE International Conference on Cluster Computing, CLUSTER 2011 ; Conference date: 26-09-2011 Through 30-09-2011",
}