@Article{cmc.2025.069562,
AUTHOR = {Ran Wei, Hui Shu},
TITLE = {Gradient-Guided Assembly Instruction Relocation for Adversarial Attacks Against Binary Code Similarity Detection},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {86},
YEAR = {2026},
NUMBER = {1},
PAGES = {1--23},
URL = {http://www.techscience.com/cmc/v86n1/64473},
ISSN = {1546-2226},
ABSTRACT = {Transformer-based models have significantly advanced binary code similarity detection (BCSD) by leveraging their semantic encoding capabilities for efficient function matching across diverse compilation settings. Although adversarial examples can strategically undermine the accuracy of BCSD models and protect critical code, existing techniques predominantly depend on inserting artificial instructions, which incur high computational costs and offer limited diversity of perturbations. To address these limitations, we propose AIMA, a novel gradient-guided assembly instruction relocation method. Our method decouples the detection model into tokenization, embedding, and encoding layers to enable efficient gradient computation. Since token IDs of instructions are discrete and non-differentiable, we compute gradients in the continuous embedding space to evaluate the influence of each token. The most critical tokens are identified by calculating the  norm of their embedding gradients. We then establish a mapping between instructions and their corresponding tokens to aggregate token-level importance into instruction-level significance. To maximize adversarial impact, a sliding window algorithm selects the most influential contiguous segments for relocation, ensuring optimal perturbation with minimal length. This approach efficiently locates critical code regions without expensive search operations. The selected segments are relocated outside their original function boundaries via a jump mechanism, which preserves runtime control flow and functionality while introducing “deletion” effects in the static instruction sequence. Extensive experiments show that AIMA reduces similarity scores by up to 35.8% in state-of-the-art BCSD models. When incorporated into training data, it also enhances model robustness, achieving a 5.9% improvement in AUROC.},
DOI = {10.32604/cmc.2025.069562}
}