@Article{cmc.2025.063686,
AUTHOR = {Tehreem Fatima, Kewen Xia, Wenbiao Yang, Qurat Ul Ain, Poornima Lankani Perera},
TITLE = {Diabetes Prediction Using ADASYN-Based Data Augmentation and CNN-BiGRU Deep Learning Model},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {84},
YEAR = {2025},
NUMBER = {1},
PAGES = {811--826},
URL = {http://www.techscience.com/cmc/v84n1/61746},
ISSN = {1546-2226},
ABSTRACT = {The rising prevalence of diabetes in modern society underscores the urgent need for precise and efficient diagnostic tools to support early intervention and treatment. However, the inherent limitations of existing datasets, including significant class imbalances and inadequate sample diversity, pose challenges to the accurate prediction and classification of diabetes. Addressing these issues, this study proposes an innovative diabetes prediction framework that integrates a hybrid Convolutional Neural Network-Bidirectional Gated Recurrent Unit (CNN-BiGRU) model for classification with Adaptive Synthetic Sampling (ADASYN) for data augmentation. ADASYN was employed to generate synthetic yet representative data samples, effectively mitigating class imbalance and enhancing the diversity and representativeness of the dataset. This augmentation process is critical for ensuring the robustness and generalizability of the predictive model, particularly in scenarios where minority class samples are underrepresented. The CNN-BiGRU architecture was designed to leverage the complementary strengths of CNN in extracting spatial features and BiGRU in capturing sequential dependencies, making it well-suited for the complex patterns inherent in medical data. The proposed framework demonstrated exceptional performance, achieving a training accuracy of 98.74% and a test accuracy of 97.78% on the augmented dataset. These results validate the efficacy of the integrated approach in addressing the challenges of class imbalance and dataset heterogeneity, while significantly enhancing the diagnostic precision for diabetes prediction. This study provides a scalable and reliable methodology with promising implications for advancing diagnostic accuracy in medical applications, particularly in resource-constrained and data-limited environments.},
DOI = {10.32604/cmc.2025.063686}
}