@Article{cmc.2026.078473,
AUTHOR = {Tao Bai, Yang Tang, Kuan Shao, Zhenyong Zhang, Yuanteng Liu},
TITLE = {Privacy-Preserving Transformer Inference with Optimized Homomorphic Encryption and Secure Collaborative Computing},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {},
YEAR = {},
NUMBER = {},
PAGES = {{pages}},
URL = {http://www.techscience.com/cmc/online/detail/26447},
ISSN = {1546-2226},
ABSTRACT = {In recent years, the rapid development of artificial intelligence has greatly promoted the application of Machine Learning as a Service (MLaaS). Users can upload their requirements through front-end applications, and the server provides model inference services after receiving the user input. However, MLaaS may lead to serious privacy breaches. Large language model services are typical representatives of MLaaS, and the Transformer is a typical structure in large language models. Therefore, this paper proposes a privacy-protected Transformer inference scheme based on the CKKS fully homomorphic encryption scheme to optimize computational and communication efficiency. Firstly, this paper implements efficient matrix multiplication based on ring multiplication and optimizes the matrix partition parameters to adapt to different types (including ciphertext-plaintext and ciphertext-ciphertext) and different matrix dimensions. Secondly, this paper optimizes and designs secure Softmax, LayerNorm, and Gelu protocols based on parameter fuzzing and collaborative computing to perform efficient, secure atomic computations over ciphertexts. Finally, experiments on text classification were conducted on the IMDB and AGNEWS datasets. The results show that, under our experimental settings (including an AMD Ryzen 7 5700G CPU with 32 GB RAM and 8-thread parallel computing using the Lattigo library), the scheme proposed in this paper completes the inference process within 3 s, with communication costs below 1 GB, and the computing accuracy is comparable to that of plaintext computing.},
DOI = {10.32604/cmc.2026.078473}
}