
@Article{cmc.2024.055634,
AUTHOR = {Taha Yacine Trad, Kheireddine Choutri, Mohand Lagha, Souham Meshoul, Fouad Khenfri, Raouf Fareh, Hadil Shaiba},
TITLE = {Real-Time Implementation of Quadrotor UAV Control System Based on a Deep Reinforcement Learning Approach},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {81},
YEAR = {2024},
NUMBER = {3},
PAGES = {4757--4786},
URL = {http://www.techscience.com/cmc/v81n3/59024},
ISSN = {1546-2226},
ABSTRACT = {The popularity of quadrotor Unmanned Aerial Vehicles (UAVs) stems from their simple propulsion systems and structural design. However, their complex and nonlinear dynamic behavior presents a significant challenge for control, necessitating sophisticated algorithms to ensure stability and accuracy in flight. Various strategies have been explored by researchers and control engineers, with learning-based methods like reinforcement learning, deep learning, and neural networks showing promise in enhancing the robustness and adaptability of quadrotor control systems. This paper investigates a Reinforcement Learning (RL) approach for both high and low-level quadrotor control systems, focusing on attitude stabilization and position tracking tasks. A novel reward function and actor-critic network structures are designed to stimulate high-order observable states, improving the agent’s understanding of the quadrotor’s dynamics and environmental constraints. To address the challenge of RL hyperparameter tuning, a new framework is introduced that combines Simulated Annealing (SA) with a reinforcement learning algorithm, specifically Simulated Annealing-Twin Delayed Deep Deterministic Policy Gradient (SA-TD3). This approach is evaluated for path-following and stabilization tasks through comparative assessments with two commonly used control methods: Backstepping and Sliding Mode Control (SMC). While the implementation of the well-trained agents exhibited unexpected behavior during real-world testing, a reduced neural network used for altitude control was successfully implemented on a Parrot Mambo mini drone. The results showcase the potential of the proposed SA-TD3 framework for real-world applications, demonstrating improved stability and precision across various test scenarios and highlighting its feasibility for practical deployment.},
DOI = {10.32604/cmc.2024.055634}
}



