
@Article{jai.2025.069841,
AUTHOR = {Eleena Sarah Mathew},
TITLE = {Enhancing Security in Large Language Models: A Comprehensive Review of Prompt Injection Attacks and Defenses},
JOURNAL = {Journal on Artificial Intelligence},
VOLUME = {7},
YEAR = {2025},
NUMBER = {1},
PAGES = {347--363},
URL = {http://www.techscience.com/jai/v7n1/64040},
ISSN = {2579-003X},
ABSTRACT = {This review paper explores advanced methods to prompt Large Language Models (LLMs) into generating objectionable or unintended behaviors through adversarial prompt injection attacks. We examine a series of novel projects like HOUYI, Robustly Aligned LLM (RA-LLM), StruQ, and Virtual Prompt Injection that compel LLMs to produce affirmative responses to harmful queries. Several new benchmarks, such as PromptBench, AdvBench, AttackEval, INJECAGENT, and RobustnessSuite, have been created to evaluate the performance and resilience of LLMs against these adversarial attacks. Results show significant success rates in misleading models like Vicuna-7B, LLaMA-2-7B-Chat, GPT-3.5, and GPT-4. The review highlights limitations in existing defense mechanisms and proposes future directions for enhancing LLM alignment and safety protocols, including the concept of LLM SELF DEFENSE. Our study emphasizes the need for improved robustness in LLMs, which will potentially shape the future of Artificial Intelligence (AI) driven applications and security protocols. Understanding the vulnerabilities of LLMs is crucial for developing effective defenses against adversarial prompt injection attacks. This paper proposes a systemic classification framework that discusses various types of prompt injection attacks and defenses. We also go through a broad spectrum of state-of-the-art attack methods (such as HouYi and Virtual Prompt Injection) alongside advanced defense mechanisms (like RA-LLM, StruQ, and LLM Self-Defense), providing critical insights into vulnerabilities and robustness. We also integrate and compare results from multiple recent benchmarks, including PromptBench, INJECENT, and BIPIA.},
DOI = {10.32604/jai.2025.069841}
}



