
@Article{cmc.2025.067390,
AUTHOR = {Sarab Almuhaideb, Najwa Altwaijry, Isra Al-Turaiki, Ahmad Raza Khan, Hamza Ali Rizvi},
TITLE = {A Comparative Study of Data Representation Techniques for Deep Learning-Based Classification of Promoter and Histone-Associated DNA Regions},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {85},
YEAR = {2025},
NUMBER = {2},
PAGES = {3095--3128},
URL = {http://www.techscience.com/cmc/v85n2/63823},
ISSN = {1546-2226},
ABSTRACT = {Many bioinformatics applications require determining the class of a newly sequenced Deoxyribonucleic acid (DNA) sequence, making DNA sequence classification an integral step in performing bioinformatics analysis, where large biomedical datasets are transformed into valuable knowledge. Existing methods rely on a feature extraction step and suffer from high computational time requirements. In contrast, newer approaches leveraging deep learning have shown significant promise in enhancing accuracy and efficiency. In this paper, we investigate the performance of various deep learning architectures: Convolutional Neural Network (CNN), CNN-Long Short-Term Memory (CNN-LSTM), CNN-Bidirectional Long Short-Term Memory (CNN-BiLSTM), Residual Network (ResNet), and InceptionV3 for DNA sequence classification. Various numerical and visual data representation techniques are utilized to represent the input datasets, including: label encoding, -mer sentence encoding, -mer one-hot vector, Frequency Chaos Game Representation (FCGR) and 5-Color Map (ColorSquare). Three datasets are used for the training of the models including H3, H4 and DNA Sequence Dataset (Yeast, Human, Arabidopsis Thaliana). Experiments are performed to determine which combination of DNA representation and deep learning architecture yields improved performance for the classification task. Our results indicate that using a hybrid CNN-LSTM neural network trained on DNA sequences represented as one-hot encoded -mer sequences yields the best performance, achieving an accuracy of 92.1%.},
DOI = {10.32604/cmc.2025.067390}
}



