
@Article{cmc.2025.063047,
AUTHOR = {Chuan Li, Xuanlin Wen},
TITLE = {A Survey of Spark Scheduling Strategy Optimization Techniques and Development Trends},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {83},
YEAR = {2025},
NUMBER = {3},
PAGES = {3843--3875},
URL = {http://www.techscience.com/cmc/v83n3/61030},
ISSN = {1546-2226},
ABSTRACT = {Spark performs excellently in large-scale data-parallel computing and iterative processing. However, with the increase in data size and program complexity, the default scheduling strategy has difficulty meeting the demands of resource utilization and performance optimization. Scheduling strategy optimization, as a key direction for improving Spark’s execution efficiency, has attracted widespread attention. This paper first introduces the basic theories of Spark, compares several default scheduling strategies, and discusses common scheduling performance evaluation indicators and factors affecting scheduling efficiency. Subsequently, existing scheduling optimization schemes are summarized based on three scheduling modes: load characteristics, cluster characteristics, and matching of both, and representative algorithms are analyzed in terms of performance indicators and applicable scenarios, comparing the advantages and disadvantages of different scheduling modes. The article also explores in detail the integration of Spark scheduling strategies with specific application scenarios and the challenges in production environments. Finally, the limitations of the existing schemes are analyzed, and prospects are envisioned.},
DOI = {10.32604/cmc.2025.063047}
}



