@inproceedings{16db03241a09401fb7c5f8c49bab9ab0,
title = "Poster: Scalable infrastructure to support supercomputer resiliency-aware applications and load balancing",
abstract = "High performance computing systems display increasing complexity and component counts. This trend exposes weak-nesses in the underlying clustering infrastructure needed for continuous availability, maximizing utilization, and efficient administration of such systems. To mitigate the problem, we present a highly scalable clustering infrastructure, based on peer-to-peer technologies, for supporting resiliency-aware applications as well as efficient monitoring and load balancing. Supported services include Membership, Publishsubscribe messaging, Convergecast, Attribute replication and a DHT. We present a preliminary evaluation taken from an IBM BlueGene/P, demonstrating scalability up to ∼ 256K nodes.",
keywords = "Clustering, Membership, Middleware, Peer-to-peer, Pub/sub systems, Scalability",
author = "Yoav Tock and Benjamin Mandler and Jos{\`e} Moreira and Terry Jones",
year = "2011",
doi = "10.1145/2148600.2148606",
language = "English",
isbn = "9781450310307",
series = "SC'11 - Proceedings of the 2011 High Performance Computing Networking, Storage and Analysis Companion, Co-located with SC'11",
pages = "9--10",
booktitle = "SC'11 - Proceedings of the 2011 High Performance Computing Networking, Storage and Analysis Companion, Co-located with SC'11",
note = "2011 High Performance Computing Networking, Storage and Analysis, SC'11, Co-located with SC'11 ; Conference date: 12-11-2011 Through 18-11-2011",
}