@inproceedings{aedac79e9c3f419eb341451211d6722f,
title = "Wootz: A compiler-based framework for fast CNN pruning via composability",
abstract = "Convolutional Neural Networks (CNN) are widely used for Deep Learning tasks. CNN pruning is an important method to adapt a large CNN model trained on general datasets to fit a more specialized task or a smaller device. The key challenge is on deciding which filters to remove in order to maximize the quality of the pruned networks while satisfying the constraints. It is time-consuming due to the enormous configuration space and the slowness of CNN training. The problem has drawn many efforts from the machine learning field, which try to reduce the set of network configurations to explore. This work tackles the problem distinctively from a programming systems perspective, trying to speed up the evaluations of the remaining configurations through computation reuse via a compiler-based framework. We empirically uncover the existence of composability in the training of a collection of pruned CNN models, and point out the opportunities for computation reuse. We then propose composability-based CNN pruning, and design a compression-based algorithm to efficiently identify the set of CNN layers to pre-train for maximizing their reuse benefits in CNN pruning. We further develop a compiler-based framework named Wootz, which, for an arbitrary CNN, automatically generates code that builds a Teacher-Student scheme to materialize composability-based pruning. Experiments show that network pruning enabled by Wootz shortens the state-of-art pruning process by up to 186X while producing significantly improved pruning results.",
keywords = "CNN, Compiler, Composability, Network pruning",
author = "Hui Guan and Xipeng Shen and Lim, {Seung Hwan}",
note = "Publisher Copyright: {\textcopyright} 2019 Association for Computing Machinery. ACM; 40th ACM SIGPLAN Conference on Programming Language Design and Implementation, PLDI 2019 ; Conference date: 22-06-2019 Through 26-06-2019",
year = "2019",
month = jun,
day = "8",
doi = "10.1145/3314221.3314652",
language = "English",
series = "Proceedings of the ACM SIGPLAN Conference on Programming Language Design and Implementation (PLDI)",
publisher = "Association for Computing Machinery",
pages = "717--730",
editor = "McKinley, {Kathryn S.} and Kathleen Fisher",
booktitle = "PLDI 2019 - Proceedings of the 40th ACM SIGPLAN Conference on Programming Language Design and Implementation",
}