@Article{cmc.2025.071656,
AUTHOR = {Dan Wang, Zhoubin Li, Yuze Xia, Zhenhua Yu},
TITLE = {GLAMSNet: A Gated-Linear Aspect-Aware Multimodal Sentiment Network with Alignment Supervision and External Knowledge Guidance},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {85},
YEAR = {2025},
NUMBER = {3},
PAGES = {5823--5845},
URL = {http://www.techscience.com/cmc/v85n3/64207},
ISSN = {1546-2226},
ABSTRACT = {Multimodal Aspect-Based Sentiment Analysis (MABSA) aims to detect sentiment polarity toward specific aspects by leveraging both textual and visual inputs. However, existing models suffer from weak aspect-image alignment, modality imbalance dominated by textual signals, and limited reasoning for implicit or ambiguous sentiments requiring external knowledge. To address these issues, we propose a unified framework named Gated-Linear Aspect-Aware Multimodal Sentiment Network (GLAMSNet). First of all, an input encoding module is employed to construct modality-specific and aspect-aware representations. Subsequently, we introduce an image–aspect correlation matching module to provide hierarchical supervision for visual-textual alignment. Building upon these components, we further design a Gated-Linear Aspect-Aware Fusion (GLAF) module to enhance aspect-aware representation learning by adaptively filtering irrelevant textual information and refining semantic alignment under aspect guidance. Additionally, an External Language Model Knowledge-Guided mechanism is integrated to incorporate sentiment-aware prior knowledge from GPT-4o, enabling robust semantic reasoning especially under noisy or ambiguous inputs. Experimental studies conducted based on Twitter-15 and Twitter-17 datasets demonstrate that the proposed model outperforms most state-of-the-art methods, achieving 79.36% accuracy and 74.72% F1-score, and 74.31% accuracy and 72.01% F1-score, respectively.},
DOI = {10.32604/cmc.2025.071656}
}