@inproceedings{169d63b5a6e14ac5b105a52a675246ca,
title = "Task-based cholesky decomposition on knights corner using openMP",
abstract = "The growing popularity of the Intel Xeon Phi coprocessors and the continued development of this new many-core architecture have created the need for an open-source, scalable, and cross-platform taskbased dense linear algebra package that can efficiently use this type of hardware. In this paper, we examined the design modifications necessary when porting PLASMA, a task-based dense linear algebra library, to run effectively on Intel{\textquoteright}s Knights Corner Xeon Phi coprocessor. First, we modified PLASMA{\textquoteright}s tiled Cholesky decomposition to use OpenMP for its scheduling mechanism to enable Xeon Phi compatibility. We then compared the performance of our modified code to that of the original dynamic scheduler running on an Intel Xeon Sandy Bridge CPU. Finally, we looked at the performance of the new OpenMP tiled Cholesky decomposition on a Knights Corner coprocessor. We found that desirable performance for this architecture was attainable with the right code optimizations; these changes were necessary to account for differences in the runtimes and in the hardware itself.",
keywords = "Cholesky decomposition, Linear algebra, OpenMP, PLASMA, Task-based programming, Tile algorithms, Xeon Phi",
author = "Joseph Dorris and Jakub Kurzak and Piotr Luszczek and Asim YarKhan and Jack Dongarra",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing AG 2016.; International Workshops on High Performance Computing, ISC High Performance 2016 and Workshop on 2nd International Workshop on Communication Architectures at Extreme Scale, ExaComm 2016, Workshop on Exascale Multi/Many Core Computing Systems, E-MuCoCoS 2016, HPC I/O in the Data Center, HPC-IODC 2016, Application Performance on Intel Xeon Phi – Being Prepared for KNL and Beyond, IXPUG 2016, International Workshop on OpenPOWER for HPC, IWOPH 2016, International Workshop on Performance Portable Programming Models for Accelerators, P^3MA 2016, Workshop on Virtualization in High-Performance Cloud Computing, VHPC 2016, Workshop on Performance and Scalability of Storage Systems, WOPSSS 2016 ; Conference date: 19-06-2016 Through 23-06-2016",
year = "2016",
doi = "10.1007/978-3-319-46079-6_37",
language = "English",
isbn = "9783319460789",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "544--562",
editor = "Bernd Mohr and Kunkel, {Julian M.} and Michela Taufer",
booktitle = "High Performance Computing - ISC High Performance 2016 International Workshops ExaComm, E-MuCoCoS, HPC-IODC, IXPUG, IWOPH, P^3MA, VHPC, WOPSSS, Revised Selected",
}