@inproceedings{775dbfb883af4c3195166702600df4b5,
title = "Evaluating and Optimizing OpenCL Base64 Data Unpacking Kernel with FPGA",
abstract = "Development of applications using OpenCL targeting FPGAs is an emerging approach on heterogeneous computing systems. This paper uses the data unpacking algorithm in Base64 encoding as a case study to present programming and optimization techniques, and experimental results of the OpenCL-based implementations on an FPGA. We explain the algorithm and evaluate the performance of the kernel implementations with Intel's FPGA OpenCL SDK. The experimental results show kernel vectorization and duplication are two optimization techniques that can improve the kernel performance. The performance of kernel duplication is also closely related to the local work size. Our experiment shows 16-lane vectorization increases the bandwidth by a factor of 2 to 10 for large input data sizes. Moreover, the performance of kernel duplication using 16 compute units is 40% to 1.5% less than that of kernel vectorization depending on the input size. Tuning the local work size can improve the kernel performance by a factor of 3 to 23. For this kernel, using local memory is not an effective technique to improve the kernel performance because input data is not reused. A combination of vectorization and duplication achieves the highest performance of 12.3 GiB/s. Compared to an Intel Xeon E5 CPU and an Nvidia Tesla K80 GPU, the performance of the kernel on the Arria 10 FPGA is 6.7X faster than the CPU and 3X slower than the GPU. The performance per watt on the FPGA is 20.5X higher than the CPU and 1.19X lower than the GPU.",
keywords = "Base64 Encoding, FPGA, OpenCL",
author = "Zheming Jin and Iris Johnson and Hal Finkel",
note = "Publisher Copyright: {\textcopyright} 2018 IEEE.; 26th Euromicro International Conference on Parallel, Distributed, and Network-Based Processing, PDP 2018 ; Conference date: 21-03-2018 Through 23-03-2018",
year = "2018",
month = jun,
day = "6",
doi = "10.1109/PDP2018.2018.00046",
language = "English",
series = "Proceedings - 26th Euromicro International Conference on Parallel, Distributed, and Network-Based Processing, PDP 2018",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "273--277",
editor = "Igor Kotenko and Ivan Merelli and Pietro Lio",
booktitle = "Proceedings - 26th Euromicro International Conference on Parallel, Distributed, and Network-Based Processing, PDP 2018",
}