@inproceedings{ab98a41403bb4c949d5e706cc55b0ea0,
title = "Foundation Model for Lossy Compression of Spatiotemporal Scientific Data",
abstract = "We present a foundation model (FM) for lossy scientific data compression, combining a variational autoencoder (VAE) with a hyper-prior structure and a super-resolution (SR) module. The VAE framework uses hyper-priors to model latent space dependencies, enhancing compression efficiency. The SR module refines low-resolution representations into high-resolution outputs, improving reconstruction quality. By alternating between 2D and 3D convolutions, the model efficiently captures spatiotemporal correlations in scientific data while maintaining low computational cost. Experimental results demonstrate that the FM generalizes well to unseen domains and varying data shapes, achieving up to 4× higher compression ratios than state-of-the-art methods after domain-specific fine-tuning. The SR module improves compression ratio by 30\% compared to simple upsampling techniques. This approach significantly reduces storage and transmission costs for large-scale scientific simulations while preserving data integrity and fidelity.",
keywords = "Data Compression, Foundation Models, Spatiotemporal Scientific Data",
author = "Xiao Li and Jaemoon Lee and Anand Rangarajan and Sanjay Ranka",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Singapore Pte Ltd. 2025.; 29th Pacific-Asia Conference on Knowledge Discovery and Data Mining, PAKDD 2025 ; Conference date: 10-06-2025 Through 13-06-2025",
year = "2025",
doi = "10.1007/978-981-96-8295-9\_27",
language = "English",
isbn = "9789819682942",
series = "Lecture Notes in Computer Science",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "368--380",
editor = "Xintao Wu and Myra Spiliopoulou and Can Wang and Vipin Kumar and Longbing Cao and Xiangmin Zhou and Guansong Pang and Joao Gama",
booktitle = "Data Science",
}