
@Article{cmc.2026.077501,
AUTHOR = {Ronghao Pan, Tomás Bernal-Beltrán, Alejandro Rodríguez-González, Ernestina Menasalvas-Ruíz, Rafael Valencia-García},
TITLE = {Evaluating Spanish Medical Entity Recognition: Large Language Models with Prompting versus Fine-Tuning},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {87},
YEAR = {2026},
NUMBER = {3},
PAGES = {--},
URL = {http://www.techscience.com/cmc/v87n3/66969},
ISSN = {1546-2226},
ABSTRACT = {The digitization of healthcare has resulted in the production of large amounts of structured and unstructured clinical data, creating the need for accurate and efficient named entity recognition (NER) to support medical procedures. This study evaluates and compares three approaches to NER in the medical domain in Spanish: using Large Language Models (LLMs) with In-Context Learning techniques (Zero-Shot, Few-Shot, and Chain-of-Thought); fine-tuning of LLMs; and fine-tuning of encoder-only models. Experiments were conducted on the Meddocan, Meddoprof, Meddoplace and Symptemist benchmark datasets. Fine-tuned encoder-only models achieve the best performance across all datasets, reaching macro-F1 scores of up to 76.71 on Meddocan, 71.51 on Meddoplace, 66.07 on Meddoprof and 63.50 on Symptemist. While LLMs with prompting offer flexibility and require no task-specific training, their performance varies significantly depending on the entity type. In addition, we evaluated fine-tuning of LLMs using QLoRA, but the improvements were limited due to the small amount of training data available per entity type, which made model adaptation less effective.},
DOI = {10.32604/cmc.2026.077501}
}



