@Article{sdhm.2026.077736,
AUTHOR = {Haoyu Li, Jiayi Wang, Zhaoyu Wu, Shuo Yan, Ziqi Zhang, Yang Gao, Genwang Peng, Zhiwei Cao},
TITLE = {A Multimodal Defect Detection Method for Key Components of Rail Transit Systems},
JOURNAL = {Structural Durability \& Health Monitoring},
VOLUME = {},
YEAR = {},
NUMBER = {},
PAGES = {{pages}},
URL = {http://www.techscience.com/sdhm/online/detail/26281},
ISSN = {1930-2991},
ABSTRACT = {Key components of rail transit systems, such as tracks and vehicle bodies, are prone to developing various types and manifestations of defects during long-term operation. These defects not only accelerate component aging and failure but also pose serious threats to train operational safety. Among existing intelligent detection methods, they mostly rely solely on visible light images demonstrate limited robustness in complex scenarios. This limitation stems from their high dependence on ambient lighting conditions, rendering them insufficient to meet practical railway inspection requirements. While mainstream multimodal detection methods incorporate the complementary strengths of heterogeneous data sources, they fail to fully leverage the intrinsic associative value of data across different modalities. Furthermore, the occurrence frequency and sample size of various rail transit defects exhibit significant disparities, resulting in severe sample imbalance across detection datasets. This substantially reduces the detection accuracy for rare defect categories. To address these critical technical challenges, this paper proposes a multimodal defect detection method for key components of rail transit systems. The method introduces a multimodal input architecture by integrating Red, Green, Blue (RGB) visual data with depth geometric data. It incorporates a self-learning deep feature fusion module that enables complementary enhancement and deep coupling of heterogeneous modal information. This is achieved through targeted feature extraction and multi-round interactive fusion across different modalities. Additionally, we propose a weighted composite balanced loss function that employs dynamic adaptive weighting factors to adjust the model optimization direction in real time. This method effectively mitigates training bias caused by sample imbalance and improves detection performance for minority defect classes. Finally, experimental results on the track fastener defect RGBD dataset and the vehicle body fastener defect RGBD dataset demonstrate that the proposed method achieves optimal defect detection accuracy and meets real-time inspection requirements.},
DOI = {10.32604/sdhm.2026.077736}
}