
@Article{cmc.2025.071210,
AUTHOR = {Ans D. Alghamdi},
TITLE = {MultiAgent-CoT: A Multi-Agent Chain-of-Thought Reasoning Model for Robust Multimodal Dialogue Understanding},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {86},
YEAR = {2026},
NUMBER = {2},
PAGES = {1--35},
URL = {http://www.techscience.com/cmc/v86n2/64779},
ISSN = {1546-2226},
ABSTRACT = {Multimodal dialogue systems often fail to maintain coherent reasoning over extended conversations and suffer from hallucination due to limited context modeling capabilities. Current approaches struggle with cross-modal alignment, temporal consistency, and robust handling of noisy or incomplete inputs across multiple modalities. We propose MultiAgent-Chain of Thought (CoT), a novel multi-agent chain-of-thought reasoning framework where specialized agents for text, vision, and speech modalities collaboratively construct shared reasoning traces through inter-agent message passing and consensus voting mechanisms. Our architecture incorporates self-reflection modules, conflict resolution protocols, and dynamic rationale alignment to enhance consistency, factual accuracy, and user engagement. The framework employs a hierarchical attention mechanism with cross-modal fusion and implements adaptive reasoning depth based on dialogue complexity. Comprehensive evaluations on Situated Interactive MultiModal Conversations (SIMMC) 2.0, VisDial v1.0, and newly introduced challenging scenarios demonstrate statistically significant improvements in grounding accuracy (<i>p</i> < 0.01), chain-of-thought interpretability, and robustness to adversarial inputs compared to state-of-the-art monolithic transformer baselines and existing multi-agent approaches.},
DOI = {10.32604/cmc.2025.071210}
}



