@Article{cmc.2026.077579,
AUTHOR = {Seohyun Yoo, Joonseo Hyeon, Jaehyuk Cho},
TITLE = {Effective Data Balancing and Fine-Tuning Techniques for Medical sLLMs in Resource-Constrained Domains},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {87},
YEAR = {2026},
NUMBER = {3},
PAGES = {--},
URL = {http://www.techscience.com/cmc/v87n3/66972},
ISSN = {1546-2226},
ABSTRACT = {Despite remarkable advances in medical large language models (LLMs), their deployment in real clinical settings remains impractical due to prohibitive computational requirements and privacy regulations that restrict cloud-based solutions. Small LLMs (sLLMs) offer a promising alternative for on-premise deployment, yet they require domain-specific fine-tuning that still exceeds the hardware capacity of most healthcare institutions. Furthermore, the impact of multilingual data composition on medical sLLM performance remains poorly understood. We present a resource-efficient fine-tuning pipeline that integrates Quantized Low-Rank Adaptation (QLoRA), Fully Sharded Data Parallelism (FSDP), and Sequence Packing, validated across two model scales: MedGemma 4B for efficiency analysis and LLaMA 3.3 70B for data balance experiments. Our approach achieves 58.3% reduction in video random access memory (VRAM) usage (from 48 GB to 20 GB) and 5× training speedup on MedGemma 4B using NVIDIA L40s GPUs. Critically, experiments on LLaMA 3.3 70B reveal that English-heavy data mixing (10:3 ratio) degrades Korean medical law performance by 1.23 percentage points while providing only marginal English gains (+1.49 pp), demonstrating catastrophic forgetting in multilingual medical fine-tuning. Our work provides three contributions: (1) a practical fine-tuning pipeline operable within 20 GB VRAM, (2) empirical evidence that data balance—not volume—determines multilingual medical QA performance, and (3) actionable guidelines for deploying medical sLLMs in non-English clinical environments.},
DOI = {10.32604/cmc.2026.077579}
}