
@Article{cmc.2026.074274,
AUTHOR = {Wajdan Al Malwi, Fatima Asiri, Nazik Alturki, Noha Alnazzawi, Dimitrios Kasimatis, Nikolaos Pitropakis},
TITLE = {Safety-Aware Reinforcement Learning for Self-Healing Intrusion Detection in 5G-Enabled IoT Networks},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {87},
YEAR = {2026},
NUMBER = {2},
PAGES = {--},
URL = {http://www.techscience.com/cmc/v87n2/66584},
ISSN = {1546-2226},
ABSTRACT = {The expansion of 5G-enabled Internet of Things (IoT) networks, while enabling transformative applications, significantly increases the attack surface and necessitates security solutions that extend beyond traditional intrusion detection. Existing intrusion detection systems (IDSs) mainly operate in an open-loop manner, excelling at classification but lacking the ability for autonomous, safety-aware remediation. This gap is particularly critical in 5G environments, where manual intervention is too slow and naive automation can lead to severe service disruptions. To address this issue, we propose a novel Self-Healing Intrusion Detection System (SH-IDS) framework that develops a closed-loop cyber defense mechanism. The main technical contribution is the integration of a deep neural network-based threat detector, which offers uncertainty-quantified predictions, with a safety-aware reinforcement learning (RL) engine formulated as a Constrained Markov Decision Process (CMDP). The CMDP explicitly models operational safety as cost constraints, and a new runtime safety shield actively adjusts any unsafe action proposed by the RL agent to the nearest safe alternative, ensuring operational integrity. Additionally, we introduce a composite utility function for the comprehensive evaluation of the system. Empirical analysis on the 5G-NIDD dataset demonstrates the superior performance of our framework: the detector achieves <mml:math id="mml-ieqn-1"><mml:mn>98.26</mml:mn><mml:mi mathvariant="normal">%</mml:mi></mml:math> accuracy, while the safe RL agent learns effective mitigation policies. Importantly, the safety shield blocked up to 70 unsafe actions under strict constraints, and analysis of the learned Q-tables confirms that the agent internalizes safety, avoiding overly disruptive actions, such as isolating nodes for minor threats. The system also maintains high efficiency with a compact model size of <mml:math id="mml-ieqn-2"><mml:mn>121.7</mml:mn></mml:math> KB and sub-millisecond latency, confirming its practical deployability for real-time 5G-IoT security.},
DOI = {10.32604/cmc.2026.074274}
}



