
@Article{cmes.2020.09014,
AUTHOR = {Jianhua Dong, Lifeng Wu, Xiaogang Liu, Cheng Fan, Menghui Leng, Qiliang Yang},
TITLE = {Simulation of Daily Diffuse Solar Radiation Based on Three Machine Learning Models},
JOURNAL = {Computer Modeling in Engineering \& Sciences},
VOLUME = {123},
YEAR = {2020},
NUMBER = {1},
PAGES = {49--73},
URL = {http://www.techscience.com/CMES/v123n1/38484},
ISSN = {1526-1506},
ABSTRACT = {Solar radiation is an important parameter in the fields of computer modeling, 
engineering technology and energy development. This paper evaluated the ability of three 
machine learning models, i.e., Extreme Gradient Boosting (XGBoost), Support Vector 
Machine (SVM) and Multivariate Adaptive Regression Splines (MARS), to estimate the 
daily diffuse solar radiation (<i>R<sub>d</sub></i>). The regular meteorological data of 1966-2015 at five 
stations in China were taken as the input parameters (including mean average temperature 
(<i>T<sub>a</sub></i>), theoretical sunshine duration (<i>N</i>), actual sunshine duration (<i>n</i>), daily average air 
relative humidity (<i>RH</i>), and extra-terrestrial solar radiation (<i>R<sub>a</sub></i>)). And their estimation 
accuracies were subjected to comparative analysis. The three models were first trained 
using meteorological data from 1966 to 2000. Then, the 2001-2015 data was used to test 
the trained machine learning model. The results show that the XGBoost had better 
accuracy than the other two models in coefficient of determination (R<sup>2</sup>
), root mean square 
error (RMSE), mean bias error (MBE) and normalized root mean square error (NRMSE). 
The MARS performed better in the training phase than the testing phase, but became less 
accurate in the testing phase, with the R<sup>2</sup> value falling by 2.7-16.9% on average. By 
contrast, the R<sup>2</sup> values of SVM and XGBoost increased by 2.9-12.2% and 1.9-14.3%, 
respectively. Despite trailing slightly behind the SVM at the Beijing station, the XGBoost 
showed good performance at the rest of the stations in the two phases. In the training 
phase, the accuracy growth is small but observable. In addition, the XGBoost had a 
slightly lower RMSE than the SVM, a signal of its edge in stability. Therefore, the three 
machine learning models can estimate the daily <i>R<sub>d</sub></i> based on local inputs and the 
XGBoost stands out for its excellent performance and stability.},
DOI = {10.32604/cmes.2020.09014}
}



