@Article{cmc.2020.010727,
AUTHOR = {Jie Xu, Qun Wang, Yifan Wang, Khan Asif},
TITLE = {High Accuracy Network Cardinalities Estimation by Step Sampling Revision on GPU},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {64},
YEAR = {2020},
NUMBER = {3},
PAGES = {1819--1844},
URL = {http://www.techscience.com/cmc/v64n3/39461},
ISSN = {1546-2226},
ABSTRACT = {Host cardinality estimation is an important research field in network 
management and network security. The host cardinality estimation algorithm based on 
the linear estimator array is a common method. Existing algorithms do not take memory 
footprint into account when selecting the number of estimators used by each host. This 
paper analyzes the relationship between memory occupancy and estimation accuracy and 
compares the effects of different parameters on algorithm accuracy. The cardinality 
estimating algorithm is a kind of random algorithm, and there is a deviation between the 
estimated results and the actual cardinalities. The deviation is affected by some 
systematical factors, such as the random parameters inherent in linear estimator and the 
random functions used to map a host to different linear estimators. These random factors 
cannot be reduced by merging multiple estimators, and existing algorithms cannot 
remove the deviation caused by such factors. In this paper, we regard the estimation 
deviation as a random variable and proposed a sampling method, recorded as the linear 
estimator array step sampling algorithm (<i>L2S</i>), to reduce the influence of the random 
deviation. <i>L2S</i> improves the accuracy of the estimated cardinalities by evaluating and 
remove the expected value of random deviation. The cardinality estimation algorithm 
based on the estimator array is a computationally intensive algorithm, which takes a lot of 
time when processing high-speed network data in a serial environment. To solve this
problem, a method is proposed to port the cardinality estimating algorithm based on the 
estimator array to the Graphics Processing Unit (GPU). Experiments on real-world highspeed network traffic show that <i>L2S</i> can reduce the absolute bias by more than 22% on 
average, and the extra time is less than 61 milliseconds on average.},
DOI = {10.32604/cmc.2020.010727}
}