@Article{cmc.2020.010727,
AUTHOR = {Jie Xu, Qun Wang, Yifan Wang, Khan Asif},
TITLE = {High Accuracy Network Cardinalities Estimation by Step Sampling Revision on GPU},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {64},
YEAR = {2020},
NUMBER = {3},
PAGES = {1819--1844},
URL = {http://www.techscience.com/cmc/v64n3/39461},
ISSN = {1546-2226},
ABSTRACT = {Host cardinality estimation is an important research field in network
management and network security. The host cardinality estimation algorithm based on
the linear estimator array is a common method. Existing algorithms do not take memory
footprint into account when selecting the number of estimators used by each host. This
paper analyzes the relationship between memory occupancy and estimation accuracy and
compares the effects of different parameters on algorithm accuracy. The cardinality
estimating algorithm is a kind of random algorithm, and there is a deviation between the
estimated results and the actual cardinalities. The deviation is affected by some
systematical factors, such as the random parameters inherent in linear estimator and the
random functions used to map a host to different linear estimators. These random factors
cannot be reduced by merging multiple estimators, and existing algorithms cannot
remove the deviation caused by such factors. In this paper, we regard the estimation
deviation as a random variable and proposed a sampling method, recorded as the linear
estimator array step sampling algorithm (*L2S*), to reduce the influence of the random
deviation. *L2S* improves the accuracy of the estimated cardinalities by evaluating and
remove the expected value of random deviation. The cardinality estimation algorithm
based on the estimator array is a computationally intensive algorithm, which takes a lot of
time when processing high-speed network data in a serial environment. To solve this
problem, a method is proposed to port the cardinality estimating algorithm based on the
estimator array to the Graphics Processing Unit (GPU). Experiments on real-world highspeed network traffic show that *L2S* can reduce the absolute bias by more than 22% on
average, and the extra time is less than 61 milliseconds on average.},
DOI = {10.32604/cmc.2020.010727}
}