@Article{cmes.2026.079954,
AUTHOR = {Sumin Yeon, Minjae Lee, Jiho Bae, Suwon Lee},
TITLE = {Resolving Ambiguity in Pointing Gestures Using Contextual Reasoning from Large Language Models},
JOURNAL = {Computer Modeling in Engineering \& Sciences},
VOLUME = {147},
YEAR = {2026},
NUMBER = {1},
PAGES = {0--0},
URL = {http://www.techscience.com/CMES/v147n1/67152},
ISSN = {1526-1506},
ABSTRACT = {In everyday life, people effectively convey their intentions through pointing gestures without explicitly naming objects. In particular, pointing gestures used in conjunction with linguistic expressions such as “this” and “that” play a crucial role in intuitively indicating objects or locations in space. Although research on the recognition of such nonverbal gestures has been actively pursued within the field of human-computer interaction (HCI), accurately interpreting a user’s intent remains challenging in situations where the pointing gesture is ambiguous. This paper proposes an integrated system that combines a large language model (LLM), capable of understanding complex human language expressions, with pointing gestures designed to designate targets in space, thereby effectively processing multimodal user commands. The system is designed to accurately recognize user intentions even in complex and uncertain environments (e.g., indoor spaces with multiple objects) by synergistically leveraging spatial information obtained from pointing gestures and contextual reasoning provided by the LLM. To validate the proposed approach, we constructed a dataset comprising complex real-world environments and diverse utterances, and conducted experiments to meticulously analyze the system’s performance and limitations. This study demonstrates the potential for natural expansion of language-based spatial understanding within HCI, and suggests avenues for future research in related fields.},
DOI = {10.32604/cmes.2026.079954}
}