
@Article{cmc.2025.074682,
AUTHOR = {Wenhe Chen, Yue Wang, Shuonan Shen, Leer Hua, Caixia Zheng, Qi Pu, Xundiao Ma},
TITLE = {VIF-YOLO: A Visible-Infrared Fusion YOLO Model for Real-Time Human Detection in Dense Smoke Environments},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {87},
YEAR = {2026},
NUMBER = {1},
PAGES = {--},
URL = {http://www.techscience.com/cmc/v87n1/66101},
ISSN = {1546-2226},
ABSTRACT = {In fire rescue scenarios, traditional manual operations are highly dangerous, as dense smoke, low visibility, extreme heat, and toxic gases not only hinder rescue efficiency but also endanger firefighters’ safety. Although intelligent rescue robots can enter hazardous environments in place of humans, smoke poses major challenges for human detection algorithms. These challenges include the attenuation of visible and infrared signals, complex thermal fields, and interference from background objects, all of which make it difficult to accurately identify trapped individuals. To address this problem, we propose VIF-YOLO, a visible–infrared fusion model for real-time human detection in dense smoke environments. The framework introduces a lightweight multimodal fusion (LMF) module based on learnable low-rank representation blocks to end-to-end integrate visible and infrared images, preserving fine details while enhancing salient features. In addition, an efficient multiscale attention (EMA) mechanism is incorporated into the YOLOv10n backbone to improve feature representation under low-light conditions. Extensive experiments on our newly constructed multimodal smoke human detection (MSHD) dataset demonstrate that VIF-YOLO achieves mAP50 of 99.5%, precision of 99.2%, and recall of 99.3%, outperforming YOLOv10n by a clear margin. Furthermore, when deployed on the NVIDIA Jetson Xavier NX, VIF-YOLO attains 40.6 FPS with an average inference latency of 24.6 ms, validating its real-time capability on edge-computing platforms. These results confirm that VIF-YOLO provides accurate, robust, and fast detection across complex backgrounds and diverse smoke conditions, ensuring reliable and rapid localization of individuals in need of rescue.},
DOI = {10.32604/cmc.2025.074682}
}



