
@Article{cmes.2026.074800,
AUTHOR = {Victor Leiva, Carlos Martin-Barreiro, Viviana Giampaoli},
TITLE = {Computational Modeling for Mortality Prediction in Medical Sciences Based on a Proto-Digital Twin Framework},
JOURNAL = {Computer Modeling in Engineering \& Sciences},
VOLUME = {146},
YEAR = {2026},
NUMBER = {2},
PAGES = {--},
URL = {http://www.techscience.com/CMES/v146n2/66304},
ISSN = {1526-1506},
ABSTRACT = {Mortality prediction in respiratory health is challenging, especially when using large-scale clinical datasets composed primarily of categorical variables. Traditional digital twin (DT) frameworks often rely on longitudinal or sensor-based data, which are not always available in public health contexts. In this article, we propose a novel proto-DT framework for mortality prediction in respiratory health using a large-scale categorical biomedical dataset. This dataset contains 415,711 severe acute respiratory infection cases from the Brazilian Unified Health System, including both COVID-19 and non-COVID-19 patients. Four classification models—extreme gradient boosting (XGBoost), logistic regression, random forest, and a deep neural network (DNN)—are trained using cost-sensitive learning to address class imbalance. The models are evaluated using accuracy, precision, recall, F1-score, and area under the curve (AUC) related to the receiver operating characteristic (ROC). The framework supports simulated interventions by modifying selected inputs and recalculating predicted mortality. Additionally, we incorporate multiple correspondence analysis and K-means clustering to explore model sensitivity. A Python library has been developed to ensure reproducibility. All models achieve AUC-ROC values near or above 0.85. XGBoost yields the highest accuracy (0.84), while the DNN achieves the highest recall (0.81). Scenario-based simulations reveal how key clinical factors, such as intensive care unit admission and oxygen support, affect predicted outcomes. The proposed proto-DT framework demonstrates the feasibility of mortality prediction and intervention simulation using categorical data alone. This framework provides a foundation for data-driven explainable DTs in public health, even in the absence of time-series data.},
DOI = {10.32604/cmes.2026.074800}
}



