
@Article{cmc.2026.075316,
AUTHOR = {Xinjie Yao, Junjie Zhu, Tao Hong, Dengyu Zhao, Weikai Liu, Guangsheng Xie},
TITLE = {Attention-Based Medical Image Analysis: Architectures, Applications, and Future Directions},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {},
YEAR = {},
NUMBER = {},
PAGES = {{pages}},
URL = {http://www.techscience.com/cmc/online/detail/26874},
ISSN = {1546-2226},
ABSTRACT = {The attention mechanism, as a key technology for enhancing the performance of deep learning, is gaining increasingly widespread attention in medical image analysis due to its ability to focus on critical features and suppress redundant information. In recent years, the continuous evolution of attention methods has significantly improved their accuracy and robustness in key medical tasks such as lesion detection, tissue segmentation, and multimodal fusion, providing crucial support for building reliable clinical decision support systems. This paper systematically reviews the advances in attention-based methods for medical image analysis, comparing their performance with mainstream models like U-shaped convolutional neural network (UNet), convolutional neural network (CNN), and Vision Transformer (ViT) across multiple tasks, and summarizes various evaluation metrics, including the Dice coefficient, area under the ROC (Receiver Operating Characteristic) curve (AUC), and mean Average Precision (mAP). The review highlights that attention mechanisms bring significant improvements across core tasks such as segmentation, classification, detection, registration, and multimodal fusion: the Dice coefficient increases by 5%–12% in segmentation tasks, AUC improves by 3%–8% in classification tasks, mAP rises by 7%–15% in detection tasks, alignment accuracy enhances by 10%–20% in registration tasks, and retrieval accuracy reaches 85%–95% in multimodal fusion. The design characteristics and performance gains of key architectures such as channel attention, spatial attention, and hybrid attention are further analyzed. Nevertheless, current research still faces critical challenges, including scarce annotated data, limited cross-center generalization, complexity in multimodal fusion, insufficient model interpretability, and high computational costs. Accordingly, future research directions are proposed to promote the in-depth development and clinical translation of attention mechanisms in medical image analysis.},
DOI = {10.32604/cmc.2026.075316}
}



