@inproceedings{ec1d58902e0945f3b1afbec707be2899,
title = "An evaluation of the CORAL interconnects",
abstract = "The US Department of Energy deployed the Summit and Sierra supercomputers with the latest state-of-the-art network interconnect technology in 2018 and both systems entered production in 2019. In this paper, we provide an in-depth assessment of the systems' network interconnects that are based on Enhanced Data Rate (EDR) 100 Gb/s Mellanox InfiniBand. Both systems use second-generation EDR Host Channel Adapters (HCAs) and switches with several new features such as Adaptive Routing (AR), switch-based collectives, and HCA-based tag matching. Although based on the same components, Summit's network is {"}non-blocking{"} (i.e., a fully provisioned Clos network) and Sierra's network has a 2:1 taper between the racks and aggregation switches. We evaluate the two systems' interconnects using traditional communication benchmarks as well as production applications. We find that the new Adaptive Routing dramatically improves performance but the other new features still need improvement.",
keywords = "Bandwidth, Congestion, EDR, High performance computing, InfiniBand, Interconnect, Latency, Offload, Switch collectives, Tag matching",
author = "Christopher Zimmer and Scott Atchley and Ramesh Pankajakshan and Smith, {Brian E.} and Ian Karlin and Leininger, {Matthew L.} and Adam Bertsch and Ryujin, {Brian S.} and Jason Burmark and Andr{\'e} Walker-Loud and Clark, {M. A.} and Olga Pearce",
note = "Publisher Copyright: {\textcopyright} 2019 ACM.; 2019 International Conference for High Performance Computing, Networking, Storage and Analysis, SC 2019 ; Conference date: 17-11-2019 Through 22-11-2019",
year = "2019",
month = nov,
day = "17",
doi = "10.1145/3295500.3356166",
language = "English",
series = "International Conference for High Performance Computing, Networking, Storage and Analysis, SC",
publisher = "IEEE Computer Society",
booktitle = "Proceedings of SC 2019",
}