
@Article{cmes.2025.059192,
AUTHOR = {Alaa Thobhani, Beiji Zou, Xiaoyan Kui, Amr Abdussalam, Muhammad Asim, Sajid Shah, Mohammed ELAffendi},
TITLE = {A Survey on Enhancing Image Captioning with Advanced Strategies and Techniques},
JOURNAL = {Computer Modeling in Engineering \& Sciences},
VOLUME = {142},
YEAR = {2025},
NUMBER = {3},
PAGES = {2247--2280},
URL = {http://www.techscience.com/CMES/v142n3/59756},
ISSN = {1526-1506},
ABSTRACT = {Image captioning has seen significant research efforts over the last decade. The goal is to generate meaningful semantic sentences that describe visual content depicted in photographs and are syntactically accurate. Many real-world applications rely on image captioning, such as helping people with visual impairments to see their surroundings. To formulate a coherent and relevant textual description, computer vision techniques are utilized to comprehend the visual content within an image, followed by natural language processing methods. Numerous approaches and models have been developed to deal with this multifaceted problem. Several models prove to be state-of-the-art solutions in this field. This work offers an exclusive perspective emphasizing the most critical strategies and techniques for enhancing image caption generation. Rather than reviewing all previous image captioning work, we analyze various techniques that significantly improve image caption generation and achieve significant performance improvements, including encompassing image captioning with visual attention methods, exploring semantic information types in captions, and employing multi-caption generation techniques. Further, advancements such as neural architecture search, few-shot learning, multi-phase learning, and cross-modal embedding within image caption networks are examined for their transformative effects. The comprehensive quantitative analysis conducted in this study identifies cutting-edge methodologies and sheds light on their profound impact, driving forward the forefront of image captioning technology.},
DOI = {10.32604/cmes.2025.059192}
}



