
@Article{cmc.2020.05564,
AUTHOR = {Guang Sun, Xiaoping Fan, Wangdong Jiang, Hangjun Zhou, Fenghua Li, Rong Yang},
TITLE = {Expanding Hot Code Path for Data Cleaning on Software Graph},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {63},
YEAR = {2020},
NUMBER = {2},
PAGES = {743--753},
URL = {http://www.techscience.com/cmc/v63n2/38541},
ISSN = {1546-2226},
ABSTRACT = {Graph analysis can be done at scale by using Spark GraphX which loading data 
into memory and running graph analysis in parallel. In this way, we should take data out of 
graph databases and put it into memory. Considering the limitation of memory size, the 
premise of accelerating graph analytical process reduces the graph data to a suitable size
without too much loss of similarity to the original graph. This paper presents our method of 
data cleaning on the software graph. We use SEQUITUR data compression algorithm to 
find out hot code path and store it as a whole paths directed acyclic graph. Hot code path is 
inherent regularity of a program. About 10 to 200 hot code path account for 40%-99% of a 
program’s execution cost. These hot paths are acyclic contribute more than 0.1%-1.0% of 
some execution metric. We expand hot code path to a suitable size which is good for 
runtime and keeps similarity to the original graph.},
DOI = {10.32604/cmc.2020.05564}
}



