@Article{cmc.2026.077569,
AUTHOR = {Ramakrishna Gandi, Geetha A., Ramasubbareddy B.},
TITLE = {NeuroVision: Multimodal Emotion Recognition via Dynamic Frame Enhancement and EEG-Guided Fusion},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {},
YEAR = {},
NUMBER = {},
PAGES = {{pages}},
URL = {http://www.techscience.com/cmc/online/detail/27068},
ISSN = {1546-2226},
ABSTRACT = {In the fields of affective computing, human-computer interaction, and psychological evaluation, the capacity to recognize emotions is crucial. Unimodal systems in the form of visual systems or of the physiological type are usually not designed to capture the complexity that exists in emotional states. The paper proposes NeuroVision: Multimodal Emotion Recognition System, combining facial video frames information and electroencephalogram (EEG) based information to enhance the accuracy and stability of the system. The system applies ResNet50 on the spatial information of facial expressions, Vision Transformer (ViT) on the temporal movements in the video, and an EEG-MLP Encoder to read the signal, without preprocessing, that captures pure neural patterns. The fully connected layers receive the fused characteristics and label them into three emotional conditions, using a softmax operational condition, which are: Happy, Sad, and Neutral. This model was evaluated on the LUMED-2 set, and the obtained accuracy of classification (96.9%) is higher than that observed by the existing unimodal and multimodal systems. Performing a lot of evaluations, like learning curves, confusion matrices, and benchmarking, proves that NeuroVision is effective and shows generalization capability, and can be used in real-time with an adaptive system, like mental health tracking and responsive interfaces.},
DOI = {10.32604/cmc.2026.077569}
}