@Article{csse.2020.35.223, AUTHOR = {Guangli Zhu, Wenting Liu, Shunxiang Zhang, Xiang Chen , Chang Yin}, TITLE = {The Method for Extracting New Login Sentiment Words from Chinese Micro-Blog Basedf on Improved Mutual Information}, JOURNAL = {Computer Systems Science and Engineering}, VOLUME = {35}, YEAR = {2020}, NUMBER = {3}, PAGES = {223--232}, URL = {http://www.techscience.com/csse/v35n3/40093}, ISSN = {}, ABSTRACT = {The current method of extracting new login sentiment words not only ignores the diversity of patterns constituted by new multi-character words (the number of words is greater than two), but also disregards the influence of other new words co-occurring with a new word connoting sentiment. To solve this problem, this paper proposes a method for extracting new login sentiment words from Chinese micro-blog based on improved mutual information. First, micro-blog data are preprocessed, taking into consideration some nonsense signals such as web links and punctuation. Based on preprocessed data, the candidate strings are obtained by applying the N-gram segmentation method. Then, the extraction algorithm for new login words is proposed, which combines multi-character mutual information (MMI) and left and right adjacent entropy. In this algorithm, the MMI describes the internal cohesion of the candidate string of multiple words in a variety of constituted patterns. Then, the candidate strings are extended and filtered according to frequency, MMI, and right and left adjacency entropy, to extract new login words. Finally, the algorithm for the extraction of new login sentiment words is proposed. In this algorithm, the Sentiment Similarity between words (SW) is determined in order to measure the sentiment similarity of a new login word to other sentiment words and other new login sentiment words. Then, the sentiment tendency values of new login words are obtained by calculating the SW to extract new login sentiment words. Experimental results show that this method is very effective for the extraction of new login sentiment words.}, DOI = {10.32604/csse.2020.35.223} }