@Article{cmc.2026.083365,
AUTHOR = {Khanh Nguyen-Trong, Tan Nguyen-Thi-Thanh},
TITLE = {From Public Benchmarks to a Low-Resource Target Domain: A Comparative Study of Wood Surface Defect Detection},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {},
YEAR = {},
NUMBER = {},
PAGES = {{pages}},
URL = {http://www.techscience.com/cmc/online/detail/27324},
ISSN = {1546-2226},
ABSTRACT = {Automated wood surface defect detection is difficult to evaluate reliably because defects are often small, low-contrast, and visually confounded by natural wood texture, while reported performance can vary substantially with benchmark design and domain shift. To address this issue, we conduct a comparative study across three practically relevant settings: a curated seven-class benchmark, a broader in-domain seven-class protocol derived from the same source dataset, and supervised adaptation to a low-resource Vietnamese target domain. We compare lightweight two-stage detectors based on Faster Region-based Convolutional Neural Network (Faster R-CNN) with MobileNetV3-FPN against a compact You Only Look Once version 8 (YOLOv8s) baseline, while also testing two small-object-oriented YOLO refinements as targeted diagnostic variants rather than as the primary claimed contribution. Across in-domain experiments, the compact YOLOv8s baseline delivers the strongest performance, achieving 84.38% AP50 on the curated benchmark, whereas performance drops to 81.16% AP50 under the broader protocol, indicating that benchmark breadth materially changes the apparent difficulty of the task and the relative strength of competing models. In the target-domain setting, source-initialized fine-tuning improves optimization behavior and can outperform target-only training in a representative single run, but repeated-seed evaluation does not confirm a stable held-out-test advantage under the same adaptation budget. These findings suggest that conclusions drawn from a single curated benchmark may overstate model robustness, and that for wood defect detection, protocol breadth and source-to-target shift should be treated as central evaluation factors rather than secondary experimental details.},
DOI = {10.32604/cmc.2026.083365}
}