@Article{cmc.2025.063646,
AUTHOR = {Atif Raza Zaidi, Tahir Abbas, Ali Daud, Omar Alghushairy, Hussain Dawood, Nadeem Sarwar},
TITLE = {Enhancing Android Malware Detection with XGBoost and Convolutional Neural Networks},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {84},
YEAR = {2025},
NUMBER = {2},
PAGES = {3281--3304},
URL = {http://www.techscience.com/cmc/v84n2/62873},
ISSN = {1546-2226},
ABSTRACT = {Safeguarding against malware requires precise machine-learning algorithms to classify harmful apps. The Drebin dataset of 15,036 samples and 215 features yielded significant and reliable results for two hybrid models, CNN + XGBoost and KNN + XGBoost. To address the class imbalance issue, SMOTE (Synthetic Minority Over-sampling Technique) was used to preprocess the dataset, creating synthetic samples of the minority class (malware) to balance the training set. XGBoost was then used to choose the most essential features for separating malware from benign programs. The models were trained and tested using 6-fold cross-validation, measuring accuracy, precision, recall, F1 score, and ROC AUC. The results are highly dependable, showing that CNN + XGBoost consistently outperforms KNN + XGBoost with an average accuracy of 98.76% compared to 97.89%. The CNN-based malware classification model, with its higher precision, recall, and F1 scores, is a secure choice. CNN + XGBoost, with its fewer all-fold misclassifications in confusion matrices, further solidifies this security. The calibration curve research, confirming the accuracy and cybersecurity applicability of the models’ probability projections, adds to the sense of reliability. This study unequivocally demonstrates that CNN + XGBoost is a reliable and effective malware detection system, underlining the importance of feature selection and hybrid models.},
DOI = {10.32604/cmc.2025.063646}
}