@inproceedings{66f300c2aae94bf596aab1023009eb57,
title = "OpenMP Memkind: An Extension for Heterogeneous Physical Memories",
abstract = "Recently, CPU and graphics processors have been increasing the degree of on-chip parallelism in order to combat the decrease in traditional Moore's Law scaling. As a result, these new processors are increasing their appetite for faster memory devices with higher bandwidth. Component manufacturers have resorted to disparate or hierarchical fast memory device architectures such as shared local memory (SLM), scratch pad memory (SPM), and high bandwidth memory (HBM) to provide sufficient bandwidth. Following this trend, the physical memory locality gradually becomes a performance feature that users would like to explicitly manage. Inspired by this idea, this research is conducted to create a heterogeneous memory interface based on a new declarative data storage directive, or 'memkind', for the OpenMP parallel programming specification to explicitly manage physical memory locality. Our approach is implemented as an OpenMP directive in order to avoid allocating data inside parallel regions, thus avoiding performance degradation due to sequential operating system routines. We demonstrate our approach as an extension to the LLVM OpenMP implementation, that enables the portability of our approach to be rapidly ported to any LLVM-supported architecture target. Our contributions in this work are a detailed design analysis of the memkind directive as well as a detailed implementation in the LLVM compiler infrastructure. We demonstrate the efficacy of our approach using a synthetic benchmark application that records the execution performance and memory allocation efficiency.",
keywords = "Heterogeneous Memory, LLVM, Memkind, OpenMP",
author = "Xi Wang and Leidel, {John D.} and Yong Chen",
note = "Funding Information: VIII. ACKNOWLEDGMENT We are thankful to the anonymous reviewers for their valuable feedback. This research is supported in part by the National Science Foundation under grant CNS-1162488, CNS-1338078, IIP-1362134, and CCF-1409946. Further, we sincerely acknowledge the resources and support from Micron Technology, Inc. The authors would also like to thank Stony Brook Research Computing and Cyberinfrastructure, and the Institute for Advanced Computational Science at Stony Brook University for access to the high-performance LIred and SeaWulf computing systems, the latter of which was made possible by a $1.4M National Science Foundation grant (#1531492). Publisher Copyright: {\textcopyright} 2017 IEEE.; null ; Conference date: 14-08-2017",
year = "2017",
month = sep,
day = "5",
doi = "10.1109/ICPPW.2017.40",
language = "English",
series = "Proceedings of the International Conference on Parallel Processing Workshops",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "220--227",
booktitle = "Proceedings - 46th International Conference on Parallel Processing Workshops, ICPPW 2017",
}