
@Article{iasc.2020.011705,
AUTHOR = {Yang Yang, Qian Zhao, Linna Ruan, Zhipeng Gao, Yonghua Huo, Xuesong Qiu},
TITLE = {Oversampling Methods Combined Clustering and Data Cleaning for Imbalanced Network Data},
JOURNAL = {Intelligent Automation \& Soft Computing},
VOLUME = {26},
YEAR = {2020},
NUMBER = {5},
PAGES = {1139--1155},
URL = {http://www.techscience.com/iasc/v26n5/40831},
ISSN = {2326-005X},
ABSTRACT = {In network anomaly detection, network traffic data are often imbalanced, that is, certain classes of network traffic data have a large sample data
volume while other classes have few, resulting in reduced overall network traffic
anomaly detection on a minority class of samples. For imbalanced data, researchers have proposed the use of oversampling techniques to balance data sets; in particular, an oversampling method called the SMOTE provides a simple and
effective solution for balancing data sets. However, current oversampling methods
suffer from the generation of noisy samples and poor information quality. Hence,
this study proposes an oversampling method for imbalanced network traffic data
that combines the SMOTE algorithm and FINCH clustering algorithm to filter out
minority sample clusters, proposes a scheme to allocate the number of synthetic
samples per cluster according to the clustering sparsity and sample weight, and finally
uses multi-layer sensors for noisy sample cleaning during sampling. We compare the
proposed method with other oversampling methods, verifying that a data set processed using this method works better in network traffic anomaly detection.},
DOI = {10.32604/iasc.2020.011705}
}



