
@Article{cmc.2025.067367,
AUTHOR = {Chongyang Wang, Qiongyan Li, Shu Liu, Pengle Cheng, Ying Huang},
TITLE = {Transformer-Based Fusion of Infrared and Visible Imagery for Smoke Recognition in Commercial Areas},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {84},
YEAR = {2025},
NUMBER = {3},
PAGES = {5157--5176},
URL = {http://www.techscience.com/cmc/v84n3/63206},
ISSN = {1546-2226},
ABSTRACT = {With rapid urbanization, fires pose significant challenges in urban governance. Traditional fire detection methods often struggle to detect smoke in complex urban scenes due to environmental interferences and variations in viewing angles. This study proposes a novel multimodal smoke detection method that fuses infrared and visible imagery using a transformer-based deep learning model. By capturing both thermal and visual cues, our approach significantly enhances the accuracy and robustness of smoke detection in business parks scenes. We first established a dual-view dataset comprising infrared and visible light videos, implemented an innovative image feature fusion strategy, and designed a deep learning model based on the transformer architecture and attention mechanism for smoke classification. Experimental results demonstrate that our method outperforms existing methods, under the condition of multi-view input, it achieves an accuracy rate of 90.88%, precision rate of 98.38%, recall rate of 92.41% and false positive and false negative rates both below 5%, underlining the effectiveness of the proposed multimodal and multi-view fusion approach. The attention mechanism plays a crucial role in improving detection performance, particularly in identifying subtle smoke features.},
DOI = {10.32604/cmc.2025.067367}
}



