@inproceedings{5a6461ce96cb45cab45997e3922efae2,
title = "System log pre-processing to improve failure prediction",
abstract = "Log preprocessing, a process applied on the raw log before applying a predictive method, is of paramount importance to failure prediction and diagnosis. While existing filtering methods have demonstrated good compression rate, they fail to preserve important failure patterns that are crucial for failure analysis. To address the problem, in this paper we present a log preprocessing method. It consists of three integrated steps: (1) event categorization to uniformly classify system events and identify fatal events; (2) event filtering to remove temporal and spatial redundant records, while also preserving necessary failure patterns for failure analysis; (3) causality-related filtering to combine correlated events for filtering through apriori association rule mining. We demonstrate the effectiveness of our preprocessing method by using real failure logs collected from the Cray XT4 at ORNL and the Blue Gene/L system at SDSC. Experiments show that our method can preserve more failure patterns for failure analysis, thereby improving failure prediction by up to 174%.",
keywords = "Cray XT4, Event categorization, Event filtering, IBM Blue Gene/L, Log preprocessing",
author = "Ziming Zheng and Zhiling Lan and Park, {Byung H.} and Al Geist",
year = "2009",
doi = "10.1109/DSN.2009.5270289",
language = "English",
isbn = "9781424444212",
series = "Proceedings of the International Conference on Dependable Systems and Networks",
pages = "572--577",
booktitle = "Proceedings of the 2009 IEEE/IFIP International Conference on Dependable Systems and Networks, DSN 2009",
note = "2009 IEEE/IFIP International Conference on Dependable Systems and Networks, DSN 2009 ; Conference date: 29-06-2009 Through 02-07-2009",
}