@Article{cmc.2020.06553,
AUTHOR = {Huan Zhang, Kangfeng Zheng, Xiujuan Wang, Shoushan Luo, Bin Wu},
TITLE = {Strategy Selection for Moving Target Defense in Incomplete Information Game},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {62},
YEAR = {2020},
NUMBER = {2},
PAGES = {763--786},
URL = {http://www.techscience.com/cmc/v62n2/38275},
ISSN = {1546-2226},
ABSTRACT = {As a core component of the network, web applications have become one of the 
preferred targets for attackers because the static configuration of web applications 
simplifies the exploitation of vulnerabilities by attackers. Although the moving target 
defense (MTD) has been proposed to increase the attack difficulty for the attackers, there 
is no solo approach can cope with different attacks; in addition, it is impossible to 
implement all these approaches simultaneously due to the resource limitation. Thus, the 
selection of an optimal defense strategy based on MTD has become the focus of research. 
In general, the confrontation of two players in the security domain is viewed as a 
stochastic game, and the reward matrices are known to both players. However, in a real 
security confrontation, this scenario represents an incomplete information game. Each 
player can only observe the actions performed by the opponent, and the observed actions 
are not completely accurate. To accurately describe the attacker’s reward function to 
reach the Nash equilibrium, this work simulated and updated the strategy selection 
distribution of the attacker by observing and investigating the strategy selection history of 
the attacker. Next, the possible rewards of the attacker in each confrontation via the 
observation matrix were corrected. On this basis, the Nash-Q learning algorithm with 
reward quantification was proposed to select the optimal strategy. Moreover, the 
performances of the Minimax-Q learning algorithm and Naive-Q learning algorithm were 
compared and analyzed in the MTD environment. Finally, the experimental results 
showed that the strategy selection algorithm can enable defenders to select a more 
reasonable defensive strategy and achieve the maximum possible reward.},
DOI = {10.32604/cmc.2020.06553}
}