@inproceedings{Zhang:2017, author = {Zhang, Xiuxia and Tan, Guangming and Xue, Shuangbai and Li, Jiajia and Zhou, Keren and Chen, Mingyu}, title = {Understanding the GPU Microarchitecture to Achieve Bare-Metal Performance Tuning}, booktitle = {Proceedings of the 22Nd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming}, series = {PPoPP '17}, year = {2017}, isbn = {978-1-4503-4493-7}, location = {Austin, Texas, USA}, pages = {31--43}, numpages = {13}, url = {http://doi.acm.org/10.1145/3018743.3018755}, doi = {10.1145/3018743.3018755}, acmid = {3018755}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {assembler, convolution, gpu, performance, reverse-engineering gpu isa encoding, sgemm}, }