
@Article{cmc.2025.064268,
AUTHOR = {Asaad Algarni, Aysha Naseer , Mohammed Alshehri, Yahya AlQahtani, Abdulmonem Alshahrani, Jeongmin Park},
TITLE = {Hybrid HRNet-Swin Transformer: Multi-Scale Feature Fusion for Aerial Segmentation and Classification},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {85},
YEAR = {2025},
NUMBER = {1},
PAGES = {1981--1998},
URL = {http://www.techscience.com/cmc/v85n1/63509},
ISSN = {1546-2226},
ABSTRACT = {Remote sensing plays a pivotal role in environmental monitoring, disaster relief, and urban planning, where accurate scene classification of aerial images is essential. However, conventional convolutional neural networks (CNNs) struggle with long-range dependencies and preserving high-resolution features, limiting their effectiveness in complex aerial image analysis. To address these challenges, we propose a Hybrid HRNet-Swin Transformer model that synergizes the strengths of HRNet-W48 for high-resolution segmentation and the Swin Transformer for global feature extraction. This hybrid architecture ensures robust multi-scale feature fusion, capturing fine-grained details and broader contextual relationships in aerial imagery. Our methodology begins with preprocessing steps, including normalization, histogram equalization, and noise reduction, to enhance input data quality. The HRNet-W48 backbone maintains high-resolution feature maps throughout the network, enabling precise segmentation, while the Swin Transformer leverages hierarchical self-attention to model long-range dependencies efficiently. By integrating these components, our model achieves superior performance in segmentation and classification tasks compared to traditional CNNs and standalone transformer models. We evaluate our approach on two benchmark datasets: UC Merced and WHU-RS19. Experimental results demonstrate that the proposed hybrid model outperforms existing methods, achieving state-of-the-art accuracy while maintaining computational efficiency. Specifically, it excels in preserving fine spatial details and contextual understanding, critical for applications like land-use classification and disaster assessment.},
DOI = {10.32604/cmc.2025.064268}
}



