@Article{cmes.2026.084282,
AUTHOR = {Salman Khan, Mai Alzamel},
TITLE = {A Scalable Deep Learning Framework for Real-Time Cyber Threat Detection in Big Data Security Analytics},
JOURNAL = {Computer Modeling in Engineering \& Sciences},
VOLUME = {147},
YEAR = {2026},
NUMBER = {3},
PAGES = {0--0},
URL = {http://www.techscience.com/CMES/v147n3/67937},
ISSN = {1526-1506},
ABSTRACT = {Traditional threat detection has proven ineffective in large-scale, moving data in the era of ever-more complex adversarial techniques and interconnected systems. The challenge becomes even more complex when high-volume, unstructured data continuously streams from social media platforms, requiring them to process the data efficiently and intelligently to provide timely security insights. Considering the big data security, the present study presents a scalable deep-learning-based system for real-time cyber threat detection, which has been developed and validated especially for distributed big data processing environments. A hybrid embedding approach that combines Word2Vec and Iterated Dilated Convolutional Neural Networks (ID-CNNs) is created to extract complementary semantic and sequential linguistic patterns from tweet data. Next, the most informative features are selected using SHapley Additive exPlanations (SHAP) feature selection technique by discarding redundant and noisy signals, followed by a Deep Neural Network (DNN) classifier to classify threats in real-time. The framework is deployed and tested on two well-known big data platforms (Apache Hadoop, Apache Spark) with different-sized data sets and node configurations to measure detection accuracy as well as computational scalability. From the classification point of view, when it comes to accuracy, the proposed model gives a 99.18% accuracy rate and MCC of 0.984 which is better than all the baseline models. When it comes to scalability, Apache Spark has proven to be superior to Apache Hadoop in all configurations, with up to 5.10 times speedup on the biggest dataset, still maintaining classification accuracy, and dropping execution time from 53 s on a single node to 12 s with six nodes on 8M threads. The results validate that the proposed framework provides a powerful, highly accurate, and computationally efficient solution for real-time cyber threat intelligence in large-scale big data security analytics applications.},
DOI = {10.32604/cmes.2026.084282}
}