@Article{cmes.2025.064588,
AUTHOR = {Jungpil Shin, Md. Al Mehedi Hasan, Md. Maniruzzaman, Satoshi Nishimura, Sultan Alfarhood},
TITLE = {Video-Based Human Activity Recognition Using Hybrid Deep Learning Model},
JOURNAL = {Computer Modeling in Engineering \& Sciences},
VOLUME = {143},
YEAR = {2025},
NUMBER = {3},
PAGES = {3615--3638},
URL = {http://www.techscience.com/CMES/v143n3/62814},
ISSN = {1526-1506},
ABSTRACT = {Activity recognition is a challenging topic in the field of computer vision that has various applications, including surveillance systems, industrial automation, and human-computer interaction. Today, the demand for automation has greatly increased across industries worldwide. Real-time detection requires edge devices with limited computational time. This study proposes a novel hybrid deep learning system for human activity recognition (HAR), aiming to enhance the recognition accuracy and reduce the computational time. The proposed system combines a pre-trained image classification model with a sequence analysis model. First, the dataset was divided into a training set (70%), validation set (10%), and test set (20%). Second, all the videos were converted into frames and deep-based features were extracted from each frame using convolutional neural networks (CNNs) with a vision transformer. Following that, bidirectional long short-term memory (BiLSTM)- and temporal convolutional network (TCN)-based models were trained using the training set, and their performances were evaluated using the validation set and test set. Four benchmark datasets (UCF11, UCF50, UCF101, and JHMDB) were used to evaluate the performance of the proposed HAR-based system. The experimental results showed that the combination of ConvNeXt and the TCN-based model achieved a recognition accuracy of 97.73% for UCF11, 98.81% for UCF50, 98.46% for UCF101, and 83.38% for JHMDB, respectively. This represents improvements in the recognition accuracy of 4%, 2.67%, 3.67%, and 7.08% for the UCF11, UCF50, UCF101, and JHMDB datasets, respectively, over existing models. Moreover, the proposed HAR-based system obtained superior recognition accuracy, shorter computational times, and minimal memory usage compared to the existing models.},
DOI = {10.32604/cmes.2025.064588}
}