
@Article{cmc.2020.010066,
AUTHOR = {Shu Fang, Lei Huang, Yi Wan, Weize Sun, Jingxin Xu},
TITLE = {Outlier Detection for Water Supply Data Based on Joint Auto-Encoder},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {64},
YEAR = {2020},
NUMBER = {1},
PAGES = {541--555},
URL = {http://www.techscience.com/cmc/v64n1/39157},
ISSN = {1546-2226},
ABSTRACT = {With the development of science and technology, the status of the water 
environment has received more and more attention. In this paper, we propose a deep 
learning model, named a Joint Auto-Encoder network, to solve the problem of outlier 
detection in water supply data. The Joint Auto-Encoder network first expands the size of 
training data and extracts the useful features from the input data, and then reconstructs 
the input data effectively into an output. The outliers are detected based on the network’s 
reconstruction errors, with a larger reconstruction error indicating a higher rate to be an 
outlier. For water supply data, there are mainly two types of outliers: outliers with large 
values and those with values closed to zero. We set two separate thresholds, τ<sub>1</sub> and τ<sub>2</sub>, 
for the reconstruction errors to detect the two types of outliers respectively. The data 
samples with reconstruction errors exceeding the thresholds are voted to be outliers. The
two thresholds can be calculated by the classification confusion matrix and the receiver 
operating characteristic (ROC) curve. We have also performed comparisons between the 
Joint Auto-Encoder and the vanilla Auto-Encoder in this paper on both the synthesis data 
set and the MNIST data set. As a result, our model has proved to outperform the vanilla 
Auto-Encoder and some other outlier detection approaches with the recall rate of 98.94 
percent in water supply data.},
DOI = {10.32604/cmc.2020.010066}
}



