@Article{cmes.2023.027467,
AUTHOR = {Jing Xin, Kenan Du, Jiale Feng, Mao Shan},
TITLE = {An Improved High Precision 3D Semantic Mapping of Indoor Scenes from RGB-D Images},
JOURNAL = {Computer Modeling in Engineering \& Sciences},
VOLUME = {137},
YEAR = {2023},
NUMBER = {3},
PAGES = {2621--2640},
URL = {http://www.techscience.com/CMES/v137n3/53732},
ISSN = {1526-1506},
ABSTRACT = {This paper proposes an improved high-precision 3D semantic mapping method for indoor scenes using RGB-D
images. The current semantic mapping algorithms suffer from low semantic annotation accuracy and insufficient
real-time performance. To address these issues, we first adopt the Elastic Fusion algorithm to select key frames
from indoor environment image sequences captured by the Kinect sensor and construct the indoor environment
space model. Then, an indoor RGB-D image semantic segmentation network is proposed, which uses multi-scale
feature fusion to quickly and accurately obtain object labeling information at the pixel level of the spatial point
cloud model. Finally, Bayesian updating is used to conduct incremental semantic label fusion on the established
spatial point cloud model. We also employ dense conditional random fields (CRF) to optimize the 3D semantic
map model, resulting in a high-precision spatial semantic map of indoor scenes. Experimental results show that
the proposed semantic mapping system can process image sequences collected by RGB-D sensors in real-time and
output accurate semantic segmentation results of indoor scene images and the current local spatial semantic map.
Finally, it constructs a globally consistent high-precision indoor scenes 3D semantic map.},
DOI = {10.32604/cmes.2023.027467}
}