@inproceedings{953d71049f3849d0aefc0269335a14cd,
title = "Performance portability study of epistasis detection using SYCL on NVIDIA GPU",
abstract = "We describe the experience of converting a CUDA implementation of a high-order epistasis detection algorithm to SYCL. The goals are for our work to be useful to application and compiler developers with a detailed description of migration paths between CUDA and SYCL. Evaluating the CUDA and SYCL applications on an NVIDIA V100 GPU, we find that the optimization of loop unrolling needs to be applied manually to the SYCL kernel for obtaining comparable performance. The performance of the SYCL group reduce function, an alternative to the CUDA warp-based reduction, depends on the problem and work group sizes. The 64-bit popcount operation implemented with tree of adders is slightly faster than the built-in popcount operation. When the number of OpenMP threads is four, the highest performance of the SYCL and CUDA applications are comparable.",
keywords = "Epistasis, GPU, Portability, Programming model",
author = "Zheming Jin and Vetter, {Jeffrey S.}",
note = "Publisher Copyright: {\textcopyright} 2022 ACM.; 13th ACM International Conference on Bioinformatics, Computational Biology and Health Informatics, BCB 2022 ; Conference date: 07-08-2022 Through 08-08-2022",
year = "2022",
month = aug,
day = "7",
doi = "10.1145/3535508.3545591",
language = "English",
series = "Proceedings of the 13th ACM International Conference on Bioinformatics, Computational Biology and Health Informatics, BCB 2022",
publisher = "Association for Computing Machinery, Inc",
booktitle = "Proceedings of the 13th ACM International Conference on Bioinformatics, Computational Biology and Health Informatics, BCB 2022",
}