@Article{cmes.2025.063092,
AUTHOR = {Suliman Mohamed Fati, Mohammed A. Mahdi, Mohamed A.G. Hazber, Shahanawaj Ahamad, Sawsan A. Saad, Mohammed Gamal Ragab, Mohammed Al-Shalabi},
TITLE = {Enhancing Multi-Class Cyberbullying Classification with Hybrid Feature Extraction and Transformer-Based Models},
JOURNAL = {Computer Modeling in Engineering \& Sciences},
VOLUME = {143},
YEAR = {2025},
NUMBER = {2},
PAGES = {2109--2131},
URL = {http://www.techscience.com/CMES/v143n2/61429},
ISSN = {1526-1506},
ABSTRACT = {Cyberbullying on social media poses significant psychological risks, yet most detection systems oversimplify the task by focusing on binary classification, ignoring nuanced categories like passive-aggressive remarks or indirect slurs. To address this gap, we propose a hybrid framework combining Term Frequency-Inverse Document Frequency (TF-IDF), word-to-vector (Word2Vec), and Bidirectional Encoder Representations from Transformers (BERT) based models for multi-class cyberbullying detection. Our approach integrates TF-IDF for lexical specificity and Word2Vec for semantic relationships, fused with BERT’s contextual embeddings to capture syntactic and semantic complexities. We evaluate the framework on a publicly available dataset of 47,000 annotated social media posts across five cyberbullying categories: age, ethnicity, gender, religion, and indirect aggression. Among BERT variants tested, BERT Base Un-Cased achieved the highest performance with 93% accuracy (standard deviation ±1% across 5-fold cross-validation) and an average AUC of 0.96, outperforming standalone TF-IDF (78%) and Word2Vec (82%) models. Notably, it achieved near-perfect AUC scores (0.99) for age and ethnicity-based bullying. A comparative analysis with state-of-the-art benchmarks, including Generative Pre-trained Transformer 2 (GPT-2) and Text-to-Text Transfer Transformer (T5) models highlights BERT’s superiority in handling ambiguous language. This work advances cyberbullying detection by demonstrating how hybrid feature extraction and transformer models improve multi-class classification, offering a scalable solution for moderating nuanced harmful content.},
DOI = {10.32604/cmes.2025.063092}
}