@Article{cmc.2025.059880,
AUTHOR = {Yiqin Wang, Yufeng Wang, Jianhua Ma, Qun Jin},
TITLE = {MARCS: A Mobile Crowdsensing Framework Based on Data Shapley Value Enabled Multi-Agent Deep Reinforcement Learning},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {82},
YEAR = {2025},
NUMBER = {3},
PAGES = {4431--4449},
URL = {http://www.techscience.com/cmc/v82n3/59913},
ISSN = {1546-2226},
ABSTRACT = {Opportunistic mobile crowdsensing (MCS) non-intrusively exploits human mobility trajectories, and the participants’ smart devices as sensors have become promising paradigms for various urban data acquisition tasks. However, in practice, opportunistic MCS has several challenges from both the perspectives of MCS participants and the data platform. On the one hand, participants face uncertainties in conducting MCS tasks, including their mobility and implicit interactions among participants, and participants’ economic returns given by the MCS data platform are determined by not only their own actions but also other participants’ strategic actions. On the other hand, the platform can only observe the participants’ uploaded sensing data that depends on the unknown effort/action exerted by participants to the platform, while, for optimizing its overall objective, the platform needs to properly reward certain participants for incentivizing them to provide high-quality data. To address the challenge of balancing individual incentives and platform objectives in MCS, this paper proposes MARCS, an online sensing policy based on multi-agent deep reinforcement learning (MADRL) with centralized training and decentralized execution (CTDE). Specifically, the interactions between MCS participants and the data platform are modeled as a partially observable Markov game, where participants, acting as agents, use DRL-based policies to make decisions based on local observations, such as task trajectories and platform payments. To align individual and platform goals effectively, the platform leverages Shapley value to estimate the contribution of each participant’s sensed data, using these estimates as immediate rewards to guide agent training. The experimental results on real mobility trajectory datasets indicate that the revenue of MARCS reaches almost 35%, 53%, and 100% higher than DDPG, Actor-Critic, and model predictive control (MPC) respectively on the participant side and similar results on the platform side, which show superior performance compared to baselines.},
DOI = {10.32604/cmc.2025.059880}
}