@Article{cmes.2021.016817, AUTHOR = {Sunil Kr. Jha, Zulfiqar Ahmad}, TITLE = {An Effective Feature Generation and Selection Approach for Lymph Disease Recognition}, JOURNAL = {Computer Modeling in Engineering \& Sciences}, VOLUME = {129}, YEAR = {2021}, NUMBER = {2}, PAGES = {567--594}, URL = {http://www.techscience.com/CMES/v129n2/44816}, ISSN = {1526-1506}, ABSTRACT = {Health care data mining is noteworthy in disease diagnosis and recognition procedures. There exist several potentials to further improve the performance of machine learning based-classification methods in healthcare data analysis. The selection of a substantial subset of features is one of the feasible approaches to achieve improved recognition results of classification methods in disease diagnosis prediction. In the present study, a novel combined approach of feature generation using latent semantic analysis (LSA) and selection using ranker search (RAS) has been proposed to improve the performance of classification methods in lymph disease diagnosis prediction. The performance of the proposed combined approach (LSA-RAS) for feature generation and selection is validated using three function-based and two tree-based classification methods. The performance of the LSA-RAS selected features is compared with the original attributes and other subsets of attributes and features chosen by nine different attributes and features selection approaches in the analysis of a most widely used benchmark and open access lymph disease dataset. The LSA-RAS selected features improve the recognition accuracy of the classification methods significantly in the diagnosis prediction of the lymph disease. The tree-based classification methods have better recognition accuracy than the function-based classification methods. The best performance (recognition accuracy of 93.91%) is achieved for the logistic model tree (LMT) classification method using the feature subset generated by the proposed combined approach (LSA-RAS).}, DOI = {10.32604/cmes.2021.016817} }