@Article{cmc.2019.06097, AUTHOR = {Suzhen Wang, Shanshan Geng, Zhanfeng Zhang, Anshan Ye, Keming Chen, Zhaosheng Xu, Huimin Luo, Gangshan Wu, Lina Xu, Ning Cao5}, TITLE = {A Dynamic Memory Allocation Optimization Mechanism Based on Spark}, JOURNAL = {Computers, Materials \& Continua}, VOLUME = {61}, YEAR = {2019}, NUMBER = {2}, PAGES = {739--757}, URL = {http://www.techscience.com/cmc/v61n2/33502}, ISSN = {1546-2226}, ABSTRACT = {Spark is a distributed data processing framework based on memory. Memory allocation is a focus question of Spark research. A good memory allocation scheme can effectively improve the efficiency of task execution and memory resource utilization of the Spark. Aiming at the memory allocation problem in the Spark2.x version, this paper optimizes the memory allocation strategy by analyzing the Spark memory model, the existing cache replacement algorithms and the memory allocation methods, which is on the basis of minimizing the storage area and allocating the execution area according to the demand. It mainly including two parts: cache replacement optimization and memory allocation optimization. Firstly, in the storage area, the cache replacement algorithm is optimized according to the characteristics of RDD Partition, which is combined with PCA dimension. In this section, the four features of RDD Partition are selected. When the RDD cache is replaced, only two most important features are selected by PCA dimension reduction method each time, thereby ensuring the generalization of the cache replacement strategy. Secondly, the memory allocation strategy of the execution area is optimized according to the memory requirement of Task and the memory space of storage area. In this paper, a series of experiments in Spark on Yarn mode are carried out to verify the effectiveness of the optimization algorithm and improve the cluster performance.}, DOI = {10.32604/cmc.2019.06097} }