
@Article{cmc.2025.061396,
AUTHOR = {Rongsen Wu, Jie Xu, Yuhang Zhang, Changming Zhao, Yiweng Xie, Zelei Wu, Yunji Li, Jinhong Guo, Shiyang Tang},
TITLE = {Video Action Recognition Method Based on Personalized Federated Learning and Spatiotemporal Features},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {83},
YEAR = {2025},
NUMBER = {3},
PAGES = {4961--4978},
URL = {http://www.techscience.com/cmc/v83n3/60986},
ISSN = {1546-2226},
ABSTRACT = {With the rapid development of artificial intelligence and Internet of Things technologies, video action recognition technology is widely applied in various scenarios, such as personal life and industrial production. However, while enjoying the convenience brought by this technology, it is crucial to effectively protect the privacy of users’ video data. Therefore, this paper proposes a video action recognition method based on personalized federated learning and spatiotemporal features. Under the framework of federated learning, a video action recognition method leveraging spatiotemporal features is designed. For the local spatiotemporal features of the video, a new differential information extraction scheme is proposed to extract differential features with a single RGB frame as the center, and a spatial-temporal module based on local information is designed to improve the effectiveness of local feature extraction; for the global temporal features, a method of extracting action rhythm features using differential technology is proposed, and a time module based on global information is designed. Different translational strides are used in the module to obtain bidirectional differential features under different action rhythms. Additionally, to address user data privacy issues, the method divides model parameters into local private parameters and public parameters based on the structure of the video action recognition model. This approach enhances model training performance and ensures the security of video data. The experimental results show that under personalized federated learning conditions, an average accuracy of 97.792% was achieved on the UCF-101 dataset, which is non-independent and identically distributed (non-IID). This research provides technical support for privacy protection in video action recognition.},
DOI = {10.32604/cmc.2025.061396}
}



