@Article{cmes.2026.083503,
AUTHOR = {Aminah Almehmadi, Ayman Noor, Aziza I. Noor, Hanan Almukhalfi, Talal H. Noor},
TITLE = {Multi-Class Severity-Aware Fire and Smoke Detection Using YOLOv12 for Sustainable Intelligent Real-Time Monitoring},
JOURNAL = {Computer Modeling in Engineering \& Sciences},
VOLUME = {147},
YEAR = {2026},
NUMBER = {3},
PAGES = {--},
URL = {http://www.techscience.com/CMES/v147n3/67931},
ISSN = {1526-1506},
ABSTRACT = {Fire emergencies have long posed a serious threat to people’s lives, real estate assets, and environmental sustainability in civilized societies, especially when combustible events are detected at late stages of development. Recent advancements in computer vision–based fire detection have enabled automated real-time monitoring; however, most solutions either detect the existence of fire/smoke or employ binary decision-making, which limits visual monitoring systems from being risk-aware. This work introduces a severity-aware fire/smoke detection model that supports intelligent monitoring systems in detecting visual hazards. The goal is to identify varying levels of fire intensity and smoke density and to detect humans in real time. We design a system capable of monitoring environments using components such as sensing devices, network communication buses, cloud data centers, and computer vision–based detectors. The fire/smoke detection model comprises modern deep learning–based object detectors, with the YOLOv12 model serving as the detection backbone. Moreover, our work proposes a two-stage training method that first learns coarse representations of fire, smoke, and humans, and then adapts the detector for fine-grained, severity-aware classification, thereby enhancing severity discrimination and reducing inter-class confusion. We gathered our dataset to comprise approximately 6500 annotated images, split between coarse-grained and severity-aware detection models. The dataset consists of seven classes indicating human presence, three classes indicating varying levels of fire intensity, and three classes indicating varying levels of smoke density. We conducted experiments comparing three baseline object detection architectures (i.e., YOLOv12s, RT-DETR-L, and SSDLite320-MobileNetV3) using identical training/testing configurations. YOLOv12 has outperformed other baseline object detection architectures, achieving 0.929 mAP@50, 0.884 precision, 0.876 recall, 0.880 F1-score, and 2.48 ms per-image latency, providing the best balance between detection performance and real-time processing capability. Our results indicate that severity-aware detection can improve the early-stage detection of intelligent monitoring systems.},
DOI = {10.32604/cmes.2026.083503}
}