@Article{cmc.2025.073798,
AUTHOR = {Simin Tang, Zhiyong Zhang, Junyan Pan, Gaoyuan Quan, Weiguo Wang, Junchang Jing},
TITLE = {AFI: Blackbox Backdoor Detection Method Based on Adaptive Feature Injection},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {87},
YEAR = {2026},
NUMBER = {1},
PAGES = {--},
URL = {http://www.techscience.com/cmc/v87n1/66075},
ISSN = {1546-2226},
ABSTRACT = {At inference time, deep neural networks are susceptible to backdoor attacks, which can produce attacker-controlled outputs when inputs contain carefully crafted triggers. Existing defense methods often focus on specific attack types or incur high costs, such as data cleaning or model fine-tuning. In contrast, we argue that it is possible to achieve effective and generalizable defense without removing triggers or incurring high model-cleaning costs. From the attacker’s perspective and based on characteristics of vulnerable neuron activation anomalies, we propose an Adaptive Feature Injection (AFI) method for black-box backdoor detection. AFI employs a pre-trained image encoder to extract multi-level deep features and constructs a dynamic weight fusion mechanism for precise identification and interception of poisoned samples. Specifically, we select the control samples with the largest feature differences from the clean dataset via feature-space analysis, and generate blended sample pairs with the test sample using dynamic linear interpolation. The detection statistic is computed by measuring the divergence <mml:math id="mml-ieqn-1"><mml:mi>G</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math> in model output responses. We systematically evaluate the effectiveness of AFI against representative backdoor attacks, including BadNets, Blend, WaNet, and IAB, on three benchmark datasets: MNIST, CIFAR-10, and ImageNet. Experimental results show that AFI can effectively detect poisoned samples, achieving average detection rates of 95.20%, 94.15%, and 86.49% on these datasets, respectively. Compared with existing methods, AFI demonstrates strong cross-domain generalization ability and robustness to unknown attacks.},
DOI = {10.32604/cmc.2025.073798}
}