@inproceedings{0154c3fd21ab4389bb8cd3883d90b3fa,
title = "IBench: A Distributed Inference Simulation and Benchmark Suite",
abstract = "We present a novel distributed inference benchmarking system, called 'iBench', that provides relevant performance metrics for high-performance edge computing systems using trained deep learning models. The proposed benchmark is unique in that it includes data transfer performance through a distributed system, such as a supercomputer, using clients and servers to provide a system-level benchmark. iBench is flexible and robust enough to allow for the benchmarking of custom-built inference servers. This was demonstrated through the development of a custom Flask-based inference server to serve MLPerf's official ResNet50v1.5 model. In this paper, we compare iBench against MLPerf inference performance on an 8-V100 GPU node. iBench is shown to provide two primary advantages over MLPerf: (1) the ability to measure distributed inference performance, and (2) a more realistic measure of benchmark performance for inference servers on HPC by taking into account additional factors to inference time, such as HTTP request-response time, payload pre-processing and packing time, and invest time.",
keywords = "GPU, ResNet50, TensorRT, benchmark, distributed, inference",
author = "Wesley Brewer and Greg Behm and Alan Scheinine and Ben Parsons and Wesley Emeneker and Trevino, {Robert P.}",
note = "Publisher Copyright: {\textcopyright} 2020 IEEE.; 2020 IEEE High Performance Extreme Computing Conference, HPEC 2020 ; Conference date: 21-09-2020 Through 25-09-2020",
year = "2020",
month = sep,
day = "22",
doi = "10.1109/HPEC43674.2020.9286169",
language = "English",
series = "2020 IEEE High Performance Extreme Computing Conference, HPEC 2020",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
booktitle = "2020 IEEE High Performance Extreme Computing Conference, HPEC 2020",
}