@Article{cmc.2022.019625,
AUTHOR = {Saleh Nagi Alsubari, Sachin N. Deshmukh, Ahmed Abdullah Alqarni, Nizar Alsharif, Theyazn H. H. Aldhyani, Fawaz Waselallah Alsaade, Osamah I. Khalaf},
TITLE = {Data Analytics for the Identification of Fake Reviews Using Supervised Learning},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {70},
YEAR = {2022},
NUMBER = {2},
PAGES = {3189--3204},
URL = {http://www.techscience.com/cmc/v70n2/44647},
ISSN = {1546-2226},
ABSTRACT = {Fake reviews, also known as deceptive opinions, are used to mislead
people and have gained more importance recently. This is due to the rapid
increase in online marketing transactions, such as selling and purchasing.
E-commerce provides a facility for customers to post reviews and comment
about the product or service when purchased. New customers usually go
through the posted reviews or comments on the website before making a
purchase decision. However, the current challenge is how new individuals can
distinguish truthful reviews from fake ones, which later deceive customers,
inflict losses, and tarnish the reputation of companies. The present paper
attempts to develop an intelligent system that can detect fake reviews on ecommerce platforms using n-grams of the review text and sentiment scores
given by the reviewer. The proposed methodology adopted in this study used
a standard fake hotel review dataset for experimenting and data preprocessing methods and a term frequency-Inverse document frequency (TF-IDF)
approach for extracting features and their representation. For detection and
classification, n-grams of review texts were inputted into the constructed
models to be classified as fake or truthful. However, the experiments were
carried out using four different supervised machine-learning techniques and
were trained and tested on a dataset collected from the Trip Advisor website.
The classification results of these experiments showed that naïve Bayes (NB),
support vector machine (SVM), adaptive boosting (AB), and random forest
(RF) received 88%, 93%, 94%, and 95%, respectively, based on testing accuracy
and the F1-score. The obtained results were compared with existing works
that used the same dataset, and the proposed methods outperformed the
comparable methods in terms of accuracy.},
DOI = {10.32604/cmc.2022.019625}
}