
@Article{schd.2026.079590,
AUTHOR = {Shruthi S, D. Hanumanth Rao Naidu},
TITLE = {Predicting Congenital Heart Disease Using Maternal Risk Factors: A Machine Learning Study from an Indian Tertiary Cardiac Care Centre},
JOURNAL = {Structural and Congenital Heart Disease},
VOLUME = {21},
YEAR = {2026},
NUMBER = {2},
PAGES = {0--0},
URL = {http://www.techscience.com/schd/v21n2/67588},
ISSN = {3071-1738},
ABSTRACT = {<b>Background:</b> Congenital Heart Disease (CHD) is an abnormality of the heart arising before birth. CHD diagnosis poses a critical challenge, particularly in resource-constrained settings where access to doctors and skilled radiologists is limited. The maternal risk factors contributing to CHD include modifiable and non-modifiable causes. Very few studies mention about these maternal risk factors for the Indian population to build predictive machine learning models for disease forecasting. The aim is to explore the feasibility of predicting CHD occurrence using maternal risk factor data and machine learning models in an Indian context. <b>Methods:</b> This research utilizes Indian-origin retrospective case and control data from a hospital, harnessing maternal risk factors for CHD prediction. The study analyzed 1040 preprocessed records with 56 features, which were organized into thematic categories. A diverse set of machine learning algorithms were employed for CHD prediction, including Weighted Support Vector Machine, Weighted Random Forests, Naive Bayes, Artificial Neural Network, Logistic Regression, Decision Tree, Extreme Gradient Boosting, Adaptive Boosting, and Gradient Boost Machine. The dataset was evaluated using stratified train (80%)-test (20%) split, repeated 5-fold cross-validation, and bootstrap-based evaluation. <b>Results:</b> The study reveals that tree-based models demonstrated comparatively stronger performance for the identification of CHD, with sensitivity values exceeding 93%, a maximum specificity of 87.5%, and Area Under the Precision–Recall Curve (AUPRC) values greater than 94% for all models. XGBoost achieved the best performance among the evaluated models, with a balanced accuracy of 81.7% and F1 score of 93.4%. Data insights generated by the Shapley Additive Explanations (SHAP) explainable AI framework provide further insight into the features contributing to CHD. <b>Conclusions:</b> The maternal risk factor "supplement score" emerged as a significant variable, indicating that the maternal nutritional status plays a critical role in influencing CHD risk. The findings indicate that non-invasive CHD screening based on maternal risk-factor data can be effectively performed using tree-based machine-learning models, with ensemble methods such as gradient boosting and random forest.},
DOI = {10.32604/schd.2026.079590}
}



