@inproceedings{b57b4371e1d2474e9889f85c77dc3ab3,
title = "Accelerating collaborative filtering using concepts from high performance computing",
abstract = "In this paper we accelerate the Alternating Least Squares (ALS) algorithm used for generating product recommendations on the basis of implicit feedback datasets. We approach the algorithm with concepts proven to be successful in High Performance Computing. This includes the formulation of the algorithm as a mix of cache-optimized algorithm-specific kernels and standard BLAS routines, acceleration via graphics processing units (GPUs), use of parallel batched kernels, and autotuning to identify performance winners. For benchmark datasets, the multi-threaded CPU implementation we propose achieves more than a 10 times speedup over the implementations available in the GraphLab and Spark MLlib software packages. For the GPU implementation, the parameters of an algorithm-specific kernel were optimized using a comprehensive autotuning sweep. This results in an additional 2 times speedup over our CPU implementation.",
keywords = "Alternating Least Squares, Autotuning, Batched Cholesky, Collaborative Filtering, GPUs",
author = "Mark Gates and Hartwig Anzt and Jakub Kurzak and Jack Dongarra",
note = "Publisher Copyright: {\textcopyright} 2015 IEEE.; 3rd IEEE International Conference on Big Data, IEEE Big Data 2015 ; Conference date: 29-10-2015 Through 01-11-2015",
year = "2015",
month = dec,
day = "22",
doi = "10.1109/BigData.2015.7363811",
language = "English",
series = "Proceedings - 2015 IEEE International Conference on Big Data, IEEE Big Data 2015",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "667--676",
editor = "Feng Luo and Kemafor Ogan and Zaki, {Mohammed J.} and Laura Haas and Ooi, {Beng Chin} and Vipin Kumar and Sudarsan Rachuri and Saumyadipta Pyne and Howard Ho and Xiaohua Hu and Shipeng Yu and Hsiao, {Morris Hui-I} and Jian Li",
booktitle = "Proceedings - 2015 IEEE International Conference on Big Data, IEEE Big Data 2015",
}