@Article{cmc.2020.09648,
AUTHOR = {Shiqi Wang, Yimin Yang, Ruizhong Wei, Qingming Jonathan Wu},
TITLE = {3-Dimensional Bag of Visual Words Framework on Action Recognition},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {63},
YEAR = {2020},
NUMBER = {3},
PAGES = {1081--1091},
URL = {http://www.techscience.com/cmc/v63n3/38863},
ISSN = {1546-2226},
ABSTRACT = {Human motion recognition plays a crucial role in the video analysis 
framework. However, a given video may contain a variety of noises, such as an unstable 
background and redundant actions, that are completely different from the key actions. 
These noises pose a great challenge to human motion recognition. To solve this problem, 
we propose a new method based on the 3-Dimensional (3D) Bag of Visual Words
(BoVW) framework. Our method includes two parts: The first part is the video action 
feature extractor, which can identify key actions by analyzing action features. In the 
video action encoder, by analyzing the action characteristics of a given video, we use the 
deep 3D CNN pre-trained model to obtain expressive coding information. A classifier 
with subnetwork nodes is used for the final classification. The extensive experiments 
demonstrate that our method leads to an impressive effect on complex video analysis. 
Our approach achieves state-of-the-art performance on the datasets of UCF101 (85.3%) 
and HMDB51 (54.5%).},
DOI = {10.32604/cmc.2020.09648}
}