
@Article{cmc.2020.09861,
AUTHOR = {Yu Jiang, Dengwen Yu, Mingzhao Zhao, Hongtao Bai, Chong Wang, Lili He},
TITLE = {Analysis of Semi-Supervised Text Clustering Algorithm on Marine Data},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {64},
YEAR = {2020},
NUMBER = {1},
PAGES = {207--216},
URL = {http://www.techscience.com/cmc/v64n1/39138},
ISSN = {1546-2226},
ABSTRACT = {Semi-supervised clustering improves learning performance as long as it uses a 
small number of labeled samples to assist un-tagged samples for learning. This paper 
implements and compares unsupervised and semi-supervised clustering analysis of BOAArgo ocean text data. Unsupervised K-Means and Affinity Propagation (AP) are two 
classical clustering algorithms. The Election-AP algorithm is proposed to handle the final 
cluster number in AP clustering as it has proved to be difficult to control in a suitable 
range. Semi-supervised samples thermocline data in the BOA-Argo dataset according to 
the thermocline standard definition, and use this data for semi-supervised cluster analysis. 
Several semi-supervised clustering algorithms were chosen for comparison of learning 
performance: Constrained-K-Means, Seeded-K-Means, SAP (Semi-supervised Affinity 
Propagation), LSAP (Loose Seed AP) and CSAP (Compact Seed AP). In order to adapt 
the single label, this paper improves the above algorithms to SCKM (improved 
Constrained-K-Means), SSKM (improved Seeded-K-Means), and SSAP (improved 
Semi-supervised Affinity Propagationg) to perform semi-supervised clustering analysis 
on the data. A DSAP (Double Seed AP) semi-supervised clustering algorithm based on 
compact seeds is proposed as the experimental data shows that DSAP has a better 
clustering effect. The unsupervised and semi-supervised clustering results are used to 
analyze the potential patterns of marine data.},
DOI = {10.32604/cmc.2020.09861}
}



