
@Article{cmc.2020.09962,
AUTHOR = {Anman Zhang, Bohan Li, Wenhuan Wang, Shuo Wan, Weitong Chen},
TITLE = {MII: A Novel Text Classification Model Combining Deep Active  Learning with BERT},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {63},
YEAR = {2020},
NUMBER = {3},
PAGES = {1499--1514},
URL = {http://www.techscience.com/cmc/v63n3/38889},
ISSN = {1546-2226},
ABSTRACT = {Active learning has been widely utilized to reduce the labeling cost of 
supervised learning. By selecting specific instances to train the model, the performance of 
the model was improved within limited steps. However, rare work paid attention to the 
effectiveness of active learning on it. In this paper, we proposed a deep active learning 
model with bidirectional encoder representations from transformers (BERT) for text 
classification. BERT takes advantage of the self-attention mechanism to integrate 
contextual information, which is beneficial to accelerate the convergence of training. As 
for the process of active learning, we design an instance selection strategy based on 
posterior probabilities Margin, Intra-correlation and Inter-correlation (MII). Selected 
instances are characterized by small margin, low intra-cohesion and high inter-cohesion. 
We conduct extensive experiments and analytics with our methods. The effect of learner 
is compared while the effect of sampling strategy and text classification is assessed from 
three real datasets. The results show that our method outperforms the baselines in terms 
of accuracy.},
DOI = {10.32604/cmc.2020.09962}
}



