
@Article{cmes.2026.077334,
AUTHOR = {Daniyal Asif, Nabil Kerdid, Muhammad Shoaib Arif, Mairaj Bibi},
TITLE = {Explainable Ensemble Learning Approach for Ovarian Cancer Diagnosis Using Clinical Data},
JOURNAL = {Computer Modeling in Engineering \& Sciences},
VOLUME = {146},
YEAR = {2026},
NUMBER = {3},
PAGES = {0--0},
URL = {http://www.techscience.com/CMES/v146n3/66802},
ISSN = {1526-1506},
ABSTRACT = {Ovarian cancer (OC) is one of the leading causes of death related to gynecological cancer, with the main difficulty of its early diagnosis and a heterogeneous nature of tumor biomarkers. Machine learning (ML) has the potential to process complex datasets and support decision-making in OC diagnosis. Nevertheless, traditional ML models tend to be biased, overfitting, noisy, and less generalized. Moreover, their black-box nature reduces interpretability and limits their practical clinical applicability. In this study, we introduce an explainable ensemble learning (EL) model, TreeX-Stack, based on a stacking architecture that employs tree-based learners such as Decision Tree (DT), Random Forest (RF), Gradient Boosting (GB), and Extreme Gradient Boosting (XGBoost) as base learners, and Logistic Regression (LR) as the meta-learner to enhance ovarian cancer (OC) diagnosis. Local Interpretable Model-Agnostic Explanations (LIME) are used to explain individual predictions, making the model outputs more clinically interpretable and applicable. The model is trained on the dataset that includes demographic information, blood test, general chemistry, and tumor markers. Extensive preprocessing includes handling missing data using iterative imputation with Bayesian Ridge and addressing multicollinearity by removing features with correlation coefficients above 0.7. Relevant features are then selected using the Boruta feature selection method. To obtain robust and unbiased performance estimates during hyperparameter tuning, nested cross-validation (CV) with grid search is employed, and all experiments are repeated five times to ensure statistical reliability. TreeX-Stack demonstrates excellent diagnostic performance, achieving an accuracy of 0.9027, a precision of 0.8673, a recall of 0.9391, and an F1-score of 0.9012. Feature-importance analyses using LIME and permutation importance highlight Human Epididymis Protein 4 (HE4) as the most significant biomarker for OC. The combination of high predictive performance and interpretability makes TreeX-Stack a reliable tool for clinical decision support in OC diagnosis.},
DOI = {10.32604/cmes.2026.077334}
}



