
@Article{cmc.2020.010182,
AUTHOR = {Lin Zhou, Siyuan Lu, Qiuyue Zhong, Ying Chen, Yibin Tang, Yan Zhou},
TITLE = {Binaural Speech Separation Algorithm Based on Long and Short  Time Memory Networks},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {63},
YEAR = {2020},
NUMBER = {3},
PAGES = {1373--1386},
URL = {http://www.techscience.com/cmc/v63n3/38881},
ISSN = {1546-2226},
ABSTRACT = {Speaker separation in complex acoustic environment is one of challenging 
tasks in speech separation. In practice, speakers are very often unmoving or moving 
slowly in normal communication. In this case, the spatial features among the consecutive 
speech frames become highly correlated such that it is helpful for speaker separation by 
providing additional spatial information. To fully exploit this information, we design a 
separation system on Recurrent Neural Network (RNN) with long short-term memory 
(LSTM) which effectively learns the temporal dynamics of spatial features. In detail, a
LSTM-based speaker separation algorithm is proposed to extract the spatial features in 
each time-frequency (TF) unit and form the corresponding feature vector. Then, we treat 
speaker separation as a supervised learning problem, where a modified ideal ratio mask 
(IRM) is defined as the training function during LSTM learning. Simulations show that 
the proposed system achieves attractive separation performance in noisy and reverberant 
environments. Specifically, during the untrained acoustic test with limited priors, e.g., 
unmatched signal to noise ratio (SNR) and reverberation, the proposed LSTM based 
algorithm can still outperforms the existing DNN based method in the measures of PESQ 
and STOI. It indicates our method is more robust in untrained conditions.},
DOI = {10.32604/cmc.2020.010182}
}



