
@Article{cmc.2026.077782,
AUTHOR = {Hafiz Arslan Ramzan, Kamrul Islam, Md Ahbab Hussain, Raiyan Muntasir Monim, Sabit Md Asad, Sadia Ramzan},
TITLE = {Data-Driven Test Case Prioritization (DD-TCP): A Machine Learning Framework for Intelligent Software Quality Assurance},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {},
YEAR = {},
NUMBER = {},
PAGES = {{pages}},
URL = {http://www.techscience.com/cmc/online/detail/26554},
ISSN = {1546-2226},
ABSTRACT = {Regression testing of large-scale, data-intensive software systems demands efficient test-case prioritization strategies to detect faults early while minimizing computational cost. Conventional prioritization methods, such as coverage-based and risk-based approaches, lack adaptability to evolving project dynamics and fail to leverage the rich test-execution data accumulated over continuous integration cycles. This study presents a Data-Driven Test-Case Prioritization (DD-TCP) Framework that incorporates statistical and machine-learning techniques to model the relationship between test-case features and historical fault detection outcomes. The framework extracts multidimensional attributes including code-change frequency, dependency metrics, execution duration, and past failure density, which are normalized and embedded into a predictive ranking model based on gradient-boosted decision trees. Test cases are then dynamically reordered using a probabilistic gain function that maximizes early fault detection probability. Comprehensive simulations on representative open-source project datasets and synthetically generated large-scale test suites reveal that the proposed Data-Driven Test-Case Prioritization (DD-TCP) framework consistently achieves superior performance, yielding a 32.4% improvement in Average Percentage of Faults Detected (APFD) and a 27.1% reduction in execution overhead relative to baseline methods. The results demonstrate the feasibility of data-centric intelligence for scalable regression testing and provide an analytical foundation for integrating machine learning into next-generation Software Quality Assurance pipelines.},
DOI = {10.32604/cmc.2026.077782}
}



