@Article{cmc.2026.084208,
AUTHOR = {Xing Fang, Yuanfang Chen, Qiang Lin, Kun Yang, Gyu Myoung Lee},
TITLE = {LaRP-CLIP: Layer-Aware Refinement with Prototype Guidance for Zero-Shot Anomaly Detection},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {},
YEAR = {},
NUMBER = {},
PAGES = {{pages}},
URL = {http://www.techscience.com/cmc/online/detail/27281},
ISSN = {1546-2226},
ABSTRACT = {The deployment of supervised anomaly detection is typically limited by the high cost of annotation, privacy constraints, and the scarcity of anomalous samples. These constraints have motivated the use of vision-language pre-trained models for zero-shot anomaly detection. However, existing CLIP-based methods still face three limitations: a shared set of prompts is applied across feature layers, anomaly maps are fused by fixed strategies, and image-level anomaly scores are determined solely by global image-text similarity. These limitations reduce the accuracy of pixel-level localization and weaken the reliability of image-level anomaly prediction. To overcome these limitations, LaRP-CLIP is proposed. It introduces layer-aware prompt decoupling to better match feature layers with different semantic characteristics, adaptive fusion with error-prior-guided local refinement to produce cleaner and more precise anomaly maps, and a prototype branch to improve image-level scoring. Experiments on four industrial datasets and seven medical datasets show that LaRP-CLIP achieves strong performance in both image-level detection and pixel-level localization.},
DOI = {10.32604/cmc.2026.084208}
}