@Article{cmes.2025.061653,
AUTHOR = {Sangmin Kim, Byeongcheon Lee, Muazzam Maqsood, Jihoon Moon, Seungmin Rho},
TITLE = {Deep Learning-Based Natural Language Processing Model and Optical Character Recognition for Detection of Online Grooming on Social Networking Services},
JOURNAL = {Computer Modeling in Engineering \& Sciences},
VOLUME = {143},
YEAR = {2025},
NUMBER = {2},
PAGES = {2079--2108},
URL = {http://www.techscience.com/CMES/v143n2/61423},
ISSN = {1526-1506},
ABSTRACT = {The increased accessibility of social networking services (SNSs) has facilitated communication and information sharing among users. However, it has also heightened concerns about digital safety, particularly for children and adolescents who are increasingly exposed to online grooming crimes. Early and accurate identification of grooming conversations is crucial in preventing long-term harm to victims. However, research on grooming detection in South Korea remains limited, as existing models trained primarily on English text and fail to reflect the unique linguistic features of SNS conversations, leading to inaccurate classifications. To address these issues, this study proposes a novel framework that integrates optical character recognition (OCR) technology with KcELECTRA, a deep learning-based natural language processing (NLP) model that shows excellent performance in processing the colloquial Korean language. In the proposed framework, the KcELECTRA model is fine-tuned by an extensive dataset, including Korean social media conversations, Korean ethical verification data from AI-Hub, and Korean hate speech data from HuggingFace, to enable more accurate classification of text extracted from social media conversation images. Experimental results show that the proposed framework achieves an accuracy of 0.953, outperforming existing transformer-based models. Furthermore, OCR technology shows high accuracy in extracting text from images, demonstrating that the proposed framework is effective for online grooming detection. The proposed framework is expected to contribute to the more accurate detection of grooming text and the prevention of grooming-related crimes.},
DOI = {10.32604/cmes.2025.061653}
}