@Article{cmes.2020.010347,
AUTHOR = {Jia Yang, Haiyuan Liu, Hao He},
TITLE = {Prediction of Intrinsically Disordered Proteins with a Low Computational Complexity Method},
JOURNAL = {Computer Modeling in Engineering \& Sciences},
VOLUME = {125},
YEAR = {2020},
NUMBER = {1},
PAGES = {111--123},
URL = {http://www.techscience.com/CMES/v125n1/40207},
ISSN = {1526-1506},
ABSTRACT = {The prediction of intrinsically disordered proteins is a hot research area
in bio-information. Due to the high cost of experimental methods to evaluate disordered regions of protein sequences, it is becoming increasingly important to
predict those regions through computational methods. In this paper, we developed
a novel scheme by employing sequence complexity to calculate six features for
each residue of a protein sequence, which includes the Shannon entropy, the topological entropy, the sample entropy and three amino acid preferences including
Remark 465, Deleage/Roux, and Bfactor(2STD). Particularly, we introduced the
sample entropy for calculating time series complexity by mapping the amino acid
sequence to a time series of 0–9. To our knowledge, the sample entropy has not
been previously used for predicting IDPs and hence is being used for the first time
in our study. In addition, the scheme used a properly sized sliding window in
every protein sequence which greatly improved the prediction performance. Finally, we used seven machine learning algorithms and tested with 10-fold cross-validation to get the results on the dataset R80 collected by Yang et al. and of the
dataset DIS1556 from the Database of Protein Disorder (DisProt) (https://www.
disprot.org) containing experimentally determined intrinsically disordered proteins (IDPs). The results showed that k-Nearest Neighbor was more appropriate
and an overall prediction accuracy of 92%. Furthermore, our method just used
six features and hence required lower computational complexity.},
DOI = {10.32604/cmes.2020.010347}
}