
@Article{cmc.2026.082431,
AUTHOR = {Kemeng Zhu, Dingju Zhu, Shihua Mao, Jinchen Wu, Depeng Kong, Kaileung Yung, Andrew W. H. Ip},
TITLE = {Scale-Robust Cross-Scale Representation Learning for Aerial Crop Pest Recognition},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {},
YEAR = {},
NUMBER = {},
PAGES = {{pages}},
URL = {http://www.techscience.com/cmc/online/detail/26798},
ISSN = {1546-2226},
ABSTRACT = {Unmanned aerial vehicles (UAVs) have become an increasingly important platform for agricultural remote sensing, yet the accurate recognition of pests and diseases is frequently compromised by drastic scale variability and complex environmental backgrounds. To address these challenges, this study introduces a novel attention-driven approach centered on a Multi-Scale Grouped Channel–Spatial Dual Attention (MS-GCDA) mechanism. The MS-GCDA module achieves robust feature calibration by decoupling and jointly modeling multi-scale spatial contexts and grouped channel dependencies, which significantly enhances the model’s sensitivity to fine-grained disease symptoms while suppressing background clutter. This core mechanism is integrated into Augmented EfficientNet (AugEffNet), a lightweight architecture further optimized for edge deployment through a learnable Latent Feature Projection (LFP) strategy that bridges the gap between high-dimensional feature extraction and real-time inference. Experimental evaluations on a self-collected luffa dataset and the public AppleLeaf9 benchmark demonstrate the superior efficacy of the proposed MS-GCDA mechanism. The framework achieves an accuracy of 97.27% and an F1-score of 97.05%, outperforming representative lightweight models. Notably, while the MS-GCDA ensures diagnostic precision under varying flight altitudes, the integration of LFP optimizes the deployment pipeline, resulting in a 94.7% reduction in inference latency (from 127.57 to 6.68 ms) and a decrease in computational cost to 122.88M Floating Point Operations per Second (FLOPs). Real-world field experiments validate that the synergy between scale-aware attention and efficient feature projection provides a reliable and scalable solution for autonomous plant protection in precision agriculture.},
DOI = {10.32604/cmc.2026.082431}
}



