@Article{cmc.2025.059745,
AUTHOR = {Jiakang Sun, Ke Chen, Xinyang He, Xu Liu, Ke Li, Cheng Peng},
TITLE = {UniTrans: Unified Parameter-Efficient Transfer Learning and Multimodal Alignment for Large Multimodal Foundation Model},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {83},
YEAR = {2025},
NUMBER = {1},
PAGES = {219--238},
URL = {http://www.techscience.com/cmc/v83n1/60076},
ISSN = {1546-2226},
ABSTRACT = {With the advancements in parameter-efficient transfer learning techniques, it has become feasible to leverage large pre-trained language models for downstream tasks under low-cost and low-resource conditions. However, applying this technique to multimodal knowledge transfer introduces a significant challenge: ensuring alignment across modalities while minimizing the number of additional parameters required for downstream task adaptation. This paper introduces UniTrans, a framework aimed at facilitating efficient knowledge transfer across multiple modalities. UniTrans leverages Vector-based Cross-modal Random Matrix Adaptation to enable fine-tuning with minimal parameter overhead. To further enhance modality alignment, we introduce two key components: the Multimodal Consistency Alignment Module and the Query-Augmentation Side Network, specifically optimized for scenarios with extremely limited trainable parameters. Extensive evaluations on various cross-modal downstream tasks demonstrate that our approach surpasses state-of-the-art methods while using just 5% of their trainable parameters. Additionally, it achieves superior performance compared to fully fine-tuned models on certain benchmarks.},
DOI = {10.32604/cmc.2025.059745}
}