
@Article{oncologie.2022.021256,
AUTHOR = {Minxi Rong, Yong Li, Xiaoli Guo, Tao Zong, Zhiyuan Ma, Penglei Li},
TITLE = {An ISSA-RF Algorithm for Prediction Model of Drug Compound Molecules Antagonizing ERα Gene Activity},
JOURNAL = {Oncologie},
VOLUME = {24},
YEAR = {2022},
NUMBER = {2},
PAGES = {309--327},
URL = {http://www.techscience.com/oncologie/v24n2/48742},
ISSN = {1765-2839},
ABSTRACT = {<b>Objectives: </b>The ERα biological activity prediction model is constructed by the compound molecular data of the
anti-breast cancer therapeutic target ERα and its biological activity data, which improves the screening efficiency
of anti-breast cancer drug candidates and saves the time and cost of drug development. <b>Methods: </b>In this paper,
Ridge model is used to screen out molecular descriptors with a high degree of influence on the biological activity
of Erα and divide datasets with different numbers of the molecular descriptors by screening results. Random
Forest (RF) is trained by Root Mean Square Error (RMSE) and Coefficient of determination (<i>R</i><sup>2</sup>) to determine
the parameter range of RF optimized by Improved Sparrow Search Algorithm (ISSA-RF) which adds adaptive
weights compared with the ordinary Sparrow Search Algorithm (SSA). Then the divided datasets were put into
the ISSA-RF with defined parameter ranges to construct a regression prediction model for the biological activity
of compounds on Erα, and compared with Genetic Algorithm Optimized Support Vector Machine (GA-SVM),
Back Propagation Neural Network (BP), Extreme Gradient Boosting (XGBoost) for analysis and comparison.
<b>Results:</b> We have tried a variety of combinations of molecular descriptors with different numbers and the above
four models all achieve the best accuracy model on the dataset constructed when using 100 molecular descriptors.
The ISSA-RF model proposed in this paper has a high degree of agreement between the predicted biological value
of ERα and the actual value and prediction accuracy (RMSE) is 0.6876389. <b>Conclusions:</b> In the training model,
ISSA-RF is proposed and it is proved that adding adaptive weights can greatly optimize the fitness accuracy of the
sparrow algorithm. In the experimental part, this paper uses a variety of molecular descriptors for training, which
reduces the chance of model training accuracy caused by the number of different molecular descriptors, and limits
the search range of the ISSA-RF model to avoid the local optimization of the model. Secondly, the parameter optimization time is greatly reduced. In conclusion, the prediction model of drug compound molecules that antagonize ERα gene activity (ISSA-RF) proposed in this paper improves the accuracy and efficiency of anti-breast
cancer drug candidates, and provides a new idea for building a quantitative structure-activity relationship model.},
DOI = {10.32604/oncologie.2022.021256}
}



