
@Article{cmes.2025.062302,
AUTHOR = {Mohamed A.G. Hazber, Ebrahim Mohammed Senan, Hezam Saud Alrashidi},
TITLE = {Feature Engineering Methods for Analyzing Blood Samples for Early Diagnosis of Hepatitis Using Machine Learning Approaches},
JOURNAL = {Computer Modeling in Engineering \& Sciences},
VOLUME = {142},
YEAR = {2025},
NUMBER = {3},
PAGES = {3229--3254},
URL = {http://www.techscience.com/CMES/v142n3/59787},
ISSN = {1526-1506},
ABSTRACT = {Hepatitis is an infection that affects the liver through contaminated foods or blood transfusions, and it has many types, from normal to serious. Hepatitis is diagnosed through many blood tests and factors; Artificial Intelligence (AI) techniques have played an important role in early diagnosis and help physicians make decisions. This study evaluated the performance of Machine Learning (ML) algorithms on the hepatitis data set. The dataset contains missing values that have been processed and outliers removed. The dataset was counterbalanced by the Synthetic Minority Over-sampling Technique (SMOTE). The features of the data set were processed in two ways: first, the application of the Recursive Feature Elimination (RFE) algorithm to arrange the percentage of contribution of each feature to the diagnosis of hepatitis, then selection of important features using the t-distributed Stochastic Neighbor Embedding (t-SNE) and Principal Component Analysis (PCA) algorithms. Second, the SelectKBest function was applied to give scores for each attribute, followed by the t-SNE and PCA algorithms. Finally, the classification algorithms K-Nearest Neighbors (KNN), Support Vector Machine (SVM), Artificial Neural Network (ANN), Decision Tree (DT), and Random Forest (RF) were fed by the dataset after processing the features in different methods are RFE with t-SNE and PCA and SelectKBest with t-SNE and PCA). All algorithms yielded promising results for diagnosing hepatitis data sets. The RF with RFE and PCA methods achieved accuracy, Precision, Recall, and AUC of 97.18%, 96.72%, 97.29%, and 94.2%, respectively, during the training phase. During the testing phase, it reached accuracy, Precision, Recall, and AUC by 96.31%, 95.23%, 97.11%, and 92.67%, respectively.},
DOI = {10.32604/cmes.2025.062302}
}



