@article{oai:naist.repo.nii.ac.jp:00003831,
 author = {Gomez, Randy and Toda, Tomoki and Saruwatari, Hiroshi and Shikano, Kiyohiro},
 issue = {2},
 journal = {IEICE Transactions on  Information and Systems},
 month = {Feb},
 note = {In real-time speech recognition applications, there is a need to implement a fast and reliable adaptation algorithm. We propose a method to reduce adaptation time of the rapid unsupervised speaker adaptation based on HMM-Sufficient Statistics. We use only a single arbitrary utterance without transcriptions in selecting the N-best speakers' Sufficient Statistics created offline to provide data for adaptation to a target speaker. Further reduction of N-best implies a reduction in adaptation time. However, it degrades recognition performance due to insufficiency of data needed to robustly adapt the model. Linear interpolation of the global HMM-Sufficient Statistics offsets this negative effect and achieves a 50% reduction in adaptation time without compromising the recognition performance. Furthermore, we compared our method with Vocal Tract Length Normalization (VTLN), Maximum A Posteriori (MAP) and Maximum Likelihood Linear Regression (MLLR). Moreover, we tested in office, car, crowd and booth noise environments in 10dB, 15dB, 20dB and 25dB SNRs.},
 pages = {554--561},
 title = {Reducing Computation Time of the Rapid Unsupervised Speaker Adaptation based on HMM-Sufficient Statistics},
 volume = {E90-D},
 year = {2007}
}