@article{5a3d0597413f4ad58e9002deeca9b33c,
title = "Understanding the mechanism of human–computer game: a distributed reinforcement learning perspective",
abstract = "In this paper, the mechanism of the human–computer game is investigated with the help of multi-agent systems (MASs) and reinforcement learning (RL). The game is formulated as a bipartite consensus problem while the interactions among humans and computers are modelled as a multi-agent system over a coopetition network. The coopetition network associated with the multi-agent system is represented by a signed graph, where positive/negative edges denote cooperative/competitive interactions. We assume the decision mechanism of the agents are model free and each agent has to make a distributed decision by learning the input and output data from himself/itself and his/its neighbours. The individual decision is developed with the neighbours' state information and a performance index function. A policy iteration (PI) algorithm is proposed to solve the Hamilton-Jacobi-Bellman equation and obtain the optimal decision strategy. Furthermore, an actor-critic neural network is adopted to approximate the performance index and the optimal decision strategy in an online manner. The simulation results are finally given to validate the proposed reinforcement learning approach.",
keywords = "Human–computer game, bipartite consensus, coopetition network, multi-agent system, reinforcement learning",
author = "Zhinan Peng and Jiangping Hu and Yiyi Zhao and Ghosh, {Bijoy K.}",
note = "Funding Information: This work was supported partially by National Nature Science Foundation of China under Grants No. 61473061, No. 71503206, No. 61104104, the Sichuan Science and Technology Program under Grant No. 2020YFSY0012, the Fundamental Research Funds for the Central Universities under Grant JBK2002021, and the Program for New Century Excellent Talents in University under Grant No. NCET-13-0091. Funding Information: This work was supported partially by National Nature Science Foundation of China under [grant numbers 61473061, 71503206, 61104104], the Sichuan Science and Technology Program under [grant number 2020YFSY0012], the Fundamental Research Funds for the Central Universities under [grant number JBK2002021], and the Program for New Century Excellent Talents in University under [grant number NCET-13-0091]. This work was supported partially by National Nature Science Foundation of China under Grants No. 61473061, No. 71503206, No. 61104104, the Sichuan Science and Technology Program under Grant No. 2020YFSY0012, the Fundamental Research Funds for the Central Universities under Grant JBK2002021, and the Program for New Century Excellent Talents in University under Grant No. NCET-13-0091. Publisher Copyright: {\textcopyright} 2020 Informa UK Limited, trading as Taylor & Francis Group.",
year = "2020",
month = nov,
day = "17",
doi = "10.1080/00207721.2020.1803436",
language = "English",
volume = "51",
pages = "2837--2848",
journal = "International Journal of Systems Science",
issn = "0020-7721",
number = "15",
}