@Article{cmc.2025.074899,
AUTHOR = {Zhichao Yu, Jiahui Yu, Simon James Fong, Yaoyang Wu},
TITLE = {YOLO-Drive: Robust Driver Distraction Recognition under Fine-Grained and Overlapping Behaviors},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {87},
YEAR = {2026},
NUMBER = {2},
PAGES = {--},
URL = {http://www.techscience.com/cmc/v87n2/66596},
ISSN = {1546-2226},
ABSTRACT = {Accurately recognizing driver distraction is critical for preventing traffic accidents, yet current detection models face two persistent challenges. First, distractions are often fine-grained, involving subtle cues such as brief eye closures or partial yawns, which are easily missed by conventional detectors. Second, in real-world scenarios, drivers frequently exhibit overlapping behaviors, such as simultaneously holding a cup, closing their eyes, and yawning, leading to multiple detection boxes and degraded model performance. Existing approaches fail to robustly address these complexities, resulting in limited reliability in safety critical applications. To overcome these pain points, we propose YOLO-Drive, a novel framework that enhances YOLO-based driver monitoring with EfficientViM and Polarized Spectral–Spatial Attention (PSSA) modules. EfficientViM provides lightweight yet powerful global–local feature extraction, enabling accurate recognition of subtle driver states. PSSA further amplifies discriminative features across spatial and spectral domains, ensuring robust separation of concurrent distraction cues. By explicitly modeling fine-grained and overlapping behaviors, our approach delivers significant improvements in both precision and robustness. Extensive experiments on benchmark driver distraction datasets demonstrate that YOLO-Drive consistently out-performs state-of-the-art models, achieving higher detection accuracy while maintaining real-time efficiency. These results validate YOLO-Drive as a practical and reliable solution for advanced driver monitoring systems, addressing long-standing challenges of subtle cue recognition and multi-cue distraction detection.},
DOI = {10.32604/cmc.2025.074899}
}