@inproceedings{ef16a4694906446999e4caf335c01f6f,
title = "Scaling lattice QCD beyond 100 GPUs",
abstract = "Over the past five years, graphics processing units (GPUs) have had a transformational effect on numerical lattice quantum chromodynamics (LQCD) calculations in nuclear and particle physics. While GPUs have been applied with great success to the post-Monte Carlo {"}analysis{"} phase which accounts for a substantial fraction of the workload in a typical LQCD calculation, the initial Monte Carlo {"}gauge field generation{"} phase requires capability-level supercomputing, corresponding to O(100) GPUs or more. Such strong scaling has not been previously achieved. In this contribution, we demonstrate that using a multi-dimensional parallelization strategy and a domain-decomposed preconditioner allows us to scale into this regime. We present results for two popular discretizations of the Dirac operator, Wilson-clover and improved staggered, employing up to 256 GPUs on the Edge cluster at Lawrence Livermore National Laboratory.",
keywords = "Domain decomposition, GPU, Krylov solvers, Lattice QCD",
author = "R. Babich and Clark, \{M. A.\} and B. Jo{\'o} and G. Shi and Brower, \{R. C.\} and S. Gottlieb",
year = "2011",
month = nov,
day = "12",
doi = "10.1145/2063384.2063478",
language = "English",
isbn = "9781450307710",
series = "Proceedings of 2011 SC - International Conference for High Performance Computing, Networking, Storage and Analysis",
publisher = "Association for Computing Machinery",
booktitle = "Proceedings of 2011 SC - International Conference for High Performance Computing, Networking, Storage and Analysis",
note = "2011 International Conference for High Performance Computing, Networking, Storage and Analysis, SC 2011 ; Conference date: 12-11-2011 Through 18-11-2011",
}