@Article{cmes.2021.016008, AUTHOR = {Ke Song, Paul Liu, Dongquan Liu}, TITLE = {Implementing Delay Multiply and Sum Beamformer on a Hybrid CPU-GPU Platform for Medical Ultrasound Imaging Using OpenMP and CUDA}, JOURNAL = {Computer Modeling in Engineering \& Sciences}, VOLUME = {128}, YEAR = {2021}, NUMBER = {3}, PAGES = {1133--1150}, URL = {http://www.techscience.com/CMES/v128n3/44008}, ISSN = {1526-1506}, ABSTRACT = {A novel beamforming algorithm named Delay Multiply and Sum (DMAS), which excels at enhancing the resolution and contrast of ultrasonic image, has recently been proposed. However, there are nested loops in this algorithm, so the calculation complexity is higher compared to the Delay and Sum (DAS) beamformer which is widely used in industry. Thus, we proposed a simple vector-based method to lower its complexity. The key point is to transform the nested loops into several vector operations, which can be efficiently implemented on many parallel platforms, such as Graphics Processing Units (GPUs), and multi-core Central Processing Units (CPUs). Consequently, we considered to implement this algorithm on such a platform. In order to maximize the use of computing power, we use the GPUs and multi-core CPUs in mixture. The platform used in our test is a low cost Personal Computer (PC), where a GPU and a multi-core CPU are installed. The results show that the hybrid use of a CPU and a GPU can get a significant performance improvement in comparison with using a GPU or using a multi-core CPU alone. The performance of the hybrid system is increased by about 47%–63% compared to a single GPU. When 32 elements are used in receiving, the fame rate basically can reach 30 fps. In the best case, the frame rate can be increased to 40 fps.}, DOI = {10.32604/cmes.2021.016008} }