
@Article{cmc.2020.010813,
AUTHOR = {Tingxin Wei, Weiguang Qu, Junsheng Zhou, Yunfei Long, Yanhui Gu, Zhentao Xia},
TITLE = {Improving Chinese Word Representation with Conceptual Semantics},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {64},
YEAR = {2020},
NUMBER = {3},
PAGES = {1897--1913},
URL = {http://www.techscience.com/cmc/v64n3/39466},
ISSN = {1546-2226},
ABSTRACT = {The meaning of a word includes a conceptual meaning and a distributive
meaning. Word embedding based on distribution suffers from insufficient conceptual
semantic representation caused by data sparsity, especially for low-frequency words. In
knowledge bases, manually annotated semantic knowledge is stable and the essential
attributes of words are accurately denoted. In this paper, we propose a Conceptual
Semantics Enhanced Word Representation (CEWR) model, computing the synset
embedding and hypernym embedding of Chinese words based on the Tongyici Cilin
thesaurus, and aggregating it with distributed word representation to have both distributed
information and the conceptual meaning encoded in the representation of words. We
evaluate the CEWR model on two tasks: word similarity computation and short text
classification. The Spearman correlation between model results and human judgement are
improved to 64.71%, 81.84%, and 85.16% on Wordsim297, MC30, and RG65, 
respectively. Moreover, CEWR improves the F1 score by 3% in the short text
classification task. The experimental results show that CEWR can represent words in a
more informative approach than distributed word embedding. This proves that conceptual
semantics, especially hypernymous information, is a good complement to distributed
word representation.},
DOI = {10.32604/cmc.2020.010813}
}



