@Article{cmc.2025.072146,
AUTHOR = {Shaoping Xiao, Zhaoan Wang, Junchao Li, Caden Noeller, Jiefeng Jiang, Jun Wang},
TITLE = {Implementation of Human-AI Interaction in Reinforcement Learning: Literature Review and Case Studies},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {86},
YEAR = {2026},
NUMBER = {2},
PAGES = {1--62},
URL = {http://www.techscience.com/cmc/v86n2/64794},
ISSN = {1546-2226},
ABSTRACT = {The integration of human factors into artificial intelligence (AI) systems has emerged as a critical research frontier, particularly in reinforcement learning (RL), where human-AI interaction (HAII) presents both opportunities and challenges. As RL continues to demonstrate remarkable success in model-free and partially observable environments, its real-world deployment increasingly requires effective collaboration with human operators and stakeholders. This article systematically examines HAII techniques in RL through both theoretical analysis and practical case studies. We establish a conceptual framework built upon three fundamental pillars of effective human-AI collaboration: computational trust modeling, system usability, and decision understandability. Our comprehensive review organizes HAII methods into five key categories: (1) learning from human feedback, including various shaping approaches; (2) learning from human demonstration through inverse RL and imitation learning; (3) shared autonomy architectures for dynamic control allocation; (4) human-in-the-loop querying strategies for active learning; and (5) explainable RL techniques for interpretable policy generation. Recent state-of-the-art works are critically reviewed, with particular emphasis on advances incorporating large language models in human-AI interaction research. To illustrate some concepts, we present three detailed case studies: an empirical trust model for farmers adopting AI-driven agricultural management systems, the implementation of ethical constraints in robotic motion planning through human-guided RL, and an experimental investigation of human trust dynamics using a multi-armed bandit paradigm. These applications demonstrate how HAII principles can enhance RL systems’ practical utility while bridging the gap between theoretical RL and real-world human-centered applications, ultimately contributing to more deployable and socially beneficial intelligent systems.},
DOI = {10.32604/cmc.2025.072146}
}