
@Article{iasc.2022.022021,
AUTHOR = {Mohamed Hashim Changrampadi, A. Shahina, M. Badri Narayanan, A. Nayeemulla Khan},
TITLE = {End-to-End Speech Recognition of Tamil Language},
JOURNAL = {Intelligent Automation \& Soft Computing},
VOLUME = {32},
YEAR = {2022},
NUMBER = {2},
PAGES = {1309--1323},
URL = {http://www.techscience.com/iasc/v32n2/45592},
ISSN = {2326-005X},
ABSTRACT = {Research in speech recognition is progressing with numerous state-of-the-art results in recent times. However, relatively fewer research is being carried out in Automatic Speech Recognition (ASR) for languages with low resources. We present a method to develop speech recognition model with minimal resources using Mozilla DeepSpeech architecture. We have utilized freely available online computational resources for training, enabling similar approaches to be carried out for research in a low-resourced languages in a financially constrained environments. We also present novel ways to build an efficient language model from publicly available web resources to improve accuracy in ASR. The proposed ASR model gives the best result of 24.7% Word Error Rate (WER), compared to 55% WER by Google speech-to-text. We have also demonstrated a semi-supervised development of speech corpus using our trained ASR model, indicating a cost effective approach of building large vocabulary corpus for low resource language. The trained Tamil ASR model and the training sets are released in public domain and are available on GitHub.},
DOI = {10.32604/iasc.2022.022021}
}



