@Article{iasc.2022.022104,
AUTHOR = {Nahla H. Barakat, Ahmed H. ElSabbagh},
TITLE = {From Similarities to Probabilities: Feature Engineering for Predicting Drugs’ Adverse Reactions},
JOURNAL = {Intelligent Automation \& Soft Computing},
VOLUME = {32},
YEAR = {2022},
NUMBER = {2},
PAGES = {1207--1224},
URL = {http://www.techscience.com/iasc/v32n2/45595},
ISSN = {2326-005X},
ABSTRACT = {Social media recently became convenient platforms for different groups with common concerns to share their experiences, including Adverse Drug Reactions (ADRs). In this paper, we propose a two stage intelligent algorithm which we call “Simi_to_Prob”, that utilizes social media forums; for ranking ADRs, and evaluating the ADRs prevalence considering different age and gender groups as its first stage. In the second stage, ADRs are predicted utilizing a different data set from the Food and Drug Administration (FDA). In particular, Natural Language Processing (NLP) is used on social media to extract ranked lists of ADRs, which are then validated using novel intrinsic evaluation methods. In the second stage, feature engineering is used to extend the input feature space, then a two stage supervised machine learning method is used to predict future ADRs incidences. Our results show correct ranked list of ADRs for three antihypertensive drugs, where high Spearman’s rank correlation coefficients (rs) of of 0.7458, 0.6678 and 0.5929 were obtained between SIDER database for drug ADRs, and our obtained lists from social media. Furthermore, Relatedness between ADRs and age and gender groups achieved high area under the ROC curve (AUC) reaching 0.959. The second stage results showed high AUCs of 0.96 and 0.99 for the prediction of future ADRs probabilities. The proposed algorithm shows that mining social media can provide reliable source of information, and additional features that can be used to boost supervised machine learning methods’ performance in different domains including Pharmacovigilance research.},
DOI = {10.32604/iasc.2022.022104}
}