@Article{cmes.2020.010347, AUTHOR = {Jia Yang, Haiyuan Liu, Hao He}, TITLE = {Prediction of Intrinsically Disordered Proteins with a Low Computational Complexity Method}, JOURNAL = {Computer Modeling in Engineering \& Sciences}, VOLUME = {125}, YEAR = {2020}, NUMBER = {1}, PAGES = {111--123}, URL = {http://www.techscience.com/CMES/v125n1/40207}, ISSN = {1526-1506}, ABSTRACT = {The prediction of intrinsically disordered proteins is a hot research area in bio-information. Due to the high cost of experimental methods to evaluate disordered regions of protein sequences, it is becoming increasingly important to predict those regions through computational methods. In this paper, we developed a novel scheme by employing sequence complexity to calculate six features for each residue of a protein sequence, which includes the Shannon entropy, the topological entropy, the sample entropy and three amino acid preferences including Remark 465, Deleage/Roux, and Bfactor(2STD). Particularly, we introduced the sample entropy for calculating time series complexity by mapping the amino acid sequence to a time series of 0–9. To our knowledge, the sample entropy has not been previously used for predicting IDPs and hence is being used for the first time in our study. In addition, the scheme used a properly sized sliding window in every protein sequence which greatly improved the prediction performance. Finally, we used seven machine learning algorithms and tested with 10-fold cross-validation to get the results on the dataset R80 collected by Yang et al. and of the dataset DIS1556 from the Database of Protein Disorder (DisProt) (https://www. disprot.org) containing experimentally determined intrinsically disordered proteins (IDPs). The results showed that k-Nearest Neighbor was more appropriate and an overall prediction accuracy of 92%. Furthermore, our method just used six features and hence required lower computational complexity.}, DOI = {10.32604/cmes.2020.010347} }