@Article{jcs.2025.071627,
AUTHOR = {Chaonan Xin, Keqing Xu},
TITLE = {Cross-Dataset Transformer-IDS with Calibration and AUC Optimization (Evaluated on NSL-KDD, UNSW-NB15, CIC-IDS2017)},
JOURNAL = {Journal of Cyber Security},
VOLUME = {7},
YEAR = {2025},
NUMBER = {1},
PAGES = {483--503},
URL = {http://www.techscience.com/JCS/v7n1/64671},
ISSN = {2579-0064},
ABSTRACT = {Intrusion Detection Systems (IDS) have achieved high accuracy on benchmark datasets, yet models often fail to generalize across different network environments. In this paper, we propose Transformer-IDS, a transformer-based network intrusion detection model designed for cross-dataset generalization. The model incorporates a classification token, multi-head self-attention, and embedding layers to learn versatile features, and it introduces a calibration module and an AUC-oriented optimization objective to improve reliability and ranking performance. We evaluate Transformer-IDS on three prominent datasets (NSL-KDD, UNSW-NB15, CIC-IDS2017) in both within-dataset and cross-dataset scenarios. Results demonstrate that while conventional deep IDS models (e.g., CNN-LSTM hybrids) reach ~99% accuracy when training and testing on the same dataset, their performance drops sharply to near-chance in cross-dataset tests. In contrast, the proposed Transformer-IDS achieves substantially better cross-dataset detection, improving Area Under the ROC Curve (AUC) by over 10%–20% and F1-score by 10+ points vs. baseline models. Calibration of output probabilities further enhances trustworthiness, aligning predicted confidence with actual attack probabilities. These findings highlight that a transformer with calibration and AUC optimization can serve as a robust IDS for varied network contexts, reducing the generalization gap and providing more reliable intrusion alerts.},
DOI = {10.32604/jcs.2025.071627}
}