
@Article{cmc.2019.05325,
AUTHOR = {Zhaohui Zhang, Jian Chen, Ligong Chen, Qiuwen Liu, Lijun Yang, Pengwei Wang, Yongjun Zheng},
TITLE = {A Scalable Method of Maintaining Order Statistics for Big Data Stream},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {60},
YEAR = {2019},
NUMBER = {1},
PAGES = {117--132},
URL = {http://www.techscience.com/cmc/v60n1/28353},
ISSN = {1546-2226},
ABSTRACT = {Recently, there are some online quantile algorithms that work on how to analyze the order statistics about the high-volume and high-velocity data stream, but the drawback of these algorithms is not scalable because they take the GK algorithm as the subroutine, which is not known to be mergeable. Another drawback is that they can’t maintain the correctness, which means the error will increase during the process of the window sliding. In this paper, we use a novel data structure to store the sketch that maintains the order statistics over sliding windows. Therefore three algorithms have been proposed based on the data structure. And the fixed-size window algorithm can keep the sketch of the last W elements. It is also scalable because of the mergeable property. The time-based window algorithm can always keep the sketch of the data in the last T time units. Finally, we provide the window aggregation algorithm which can help extend our algorithm into the distributed system. This provides a speed performance boost and makes it more suitable for modern applications such as system/network monitoring and anomaly detection. The experimental results show that our algorithm can not only achieve acceptable performance but also can actually maintain the correctness and be mergeable.},
DOI = {10.32604/cmc.2019.05325}
}



