@article{b5b16b5d10454129a2bec7bcf2673828,
title = "Numerical analysis of fixed point algorithms in the presence of hardware faults",
abstract = "The exponential growth of computational power of the extreme scale machines over the past few decades has led to a corresponding decrease in reliability and a sharp increase of the frequency of hardware faults. Our research focuses on the mathematical challenges presented by the silent hardware faults; i.e., faults that can perturb the result of computations in an inconspicuous way. Using the approach of selective reliability, we present an analytic fault mode that can be used to study the resilience properties of a numerical algorithm. We apply our approach to the classical fixed point iteration and demonstrate that in the presence of hardware faults, the classical method fails to converge in expectation. We preset a modified resilient algorithm that detects and rejects faults resulting in error with large magnitude, while small faults are negated by the natural self-correcting properties of the algorithm. We show that our method is convergent (in first and second statistical moments) even in the presence of silent hardware faults.",
keywords = "Fault tolerance, Fixed point method, Resilience",
author = "Miroslav Stoyanov and Clayton Webster",
note = "Publisher Copyright: {\textcopyright} 2015 Society for Industrial and Applied Mathematics.",
year = "2015",
doi = "10.1137/140991406",
language = "English",
volume = "37",
pages = "C532--C553",
journal = "SIAM Journal on Scientific Computing",
issn = "1064-8275",
publisher = "Society for Industrial and Applied Mathematics",
number = "5",
}