WEKO3
アイテム
{"_buckets": {"deposit": "27c5d769-64f4-4bcc-a715-37041e9afb5c"}, "_deposit": {"created_by": 4, "id": "3831", "owners": [4], "pid": {"revision_id": 0, "type": "depid", "value": "3831"}, "status": "published"}, "_oai": {"id": "oai:naist.repo.nii.ac.jp:00003831", "sets": ["35"]}, "author_link": ["6342", "341", "6343", "6344"], "item_7_biblio_info_9": {"attribute_name": "書誌情報", "attribute_value_mlt": [{"bibliographicIssueDates": {"bibliographicIssueDate": "2007-02", "bibliographicIssueDateType": "Issued"}, "bibliographicIssueNumber": "2", "bibliographicPageEnd": "561", "bibliographicPageStart": "554", "bibliographicVolumeNumber": "E90-D", "bibliographic_titles": [{"bibliographic_title": "IEICE Transactions on Information and Systems", "bibliographic_titleLang": "en"}]}]}, "item_7_description_7": {"attribute_name": "抄録", "attribute_value_mlt": [{"subitem_description": "In real-time speech recognition applications, there is a need to implement a fast and reliable adaptation algorithm. We propose a method to reduce adaptation time of the rapid unsupervised speaker adaptation based on HMM-Sufficient Statistics. We use only a single arbitrary utterance without transcriptions in selecting the N-best speakers\u0027 Sufficient Statistics created offline to provide data for adaptation to a target speaker. Further reduction of N-best implies a reduction in adaptation time. However, it degrades recognition performance due to insufficiency of data needed to robustly adapt the model. Linear interpolation of the global HMM-Sufficient Statistics offsets this negative effect and achieves a 50% reduction in adaptation time without compromising the recognition performance. Furthermore, we compared our method with Vocal Tract Length Normalization (VTLN), Maximum A Posteriori (MAP) and Maximum Likelihood Linear Regression (MLLR). Moreover, we tested in office, car, crowd and booth noise environments in 10dB, 15dB, 20dB and 25dB SNRs.", "subitem_description_language": "en", "subitem_description_type": "Abstract"}]}, "item_7_publisher_10": {"attribute_name": "出版者", "attribute_value_mlt": [{"subitem_publisher": "電子情報通信学会", "subitem_publisher_language": "ja"}]}, "item_7_relation_17": {"attribute_name": "DOI", "attribute_value_mlt": [{"subitem_relation_type": "isIdenticalTo", "subitem_relation_type_id": {"subitem_relation_type_id_text": "https://doi.org/10.1093/ietisy/e90-d.2.554", "subitem_relation_type_select": "DOI"}}]}, "item_7_rights_18": {"attribute_name": "権利", "attribute_value_mlt": [{"subitem_rights": "Copyright (C) 2007 電子情報通信学会.", "subitem_rights_language": "ja"}]}, "item_7_source_id_12": {"attribute_name": "ISSN", "attribute_value_mlt": [{"subitem_source_identifier": "0916-8532", "subitem_source_identifier_type": "ISSN"}]}, "item_7_source_id_14": {"attribute_name": "書誌レコードID", "attribute_value_mlt": [{"subitem_source_identifier": "AA10826272", "subitem_source_identifier_type": "NCID"}]}, "item_7_text_25": {"attribute_name": "NAIST ID", "attribute_value_mlt": [{"subitem_text_value": "73292716"}]}, "item_7_version_type_20": {"attribute_name": "著者版フラグ", "attribute_value_mlt": [{"subitem_version_resource": "http://purl.org/coar/version/c_970fb48d4fbd8a85", "subitem_version_type": "VoR"}]}, "item_access_right": {"attribute_name": "アクセス権", "attribute_value_mlt": [{"subitem_access_right": "open access", "subitem_access_right_uri": "http://purl.org/coar/access_right/c_abf2"}]}, "item_creator": {"attribute_name": "著者", "attribute_type": "creator", "attribute_value_mlt": [{"creatorNames": [{"creatorName": "Gomez, Randy", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "6342", "nameIdentifierScheme": "WEKO"}]}, {"creatorNames": [{"creatorName": "Toda, Tomoki", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "341", "nameIdentifierScheme": "WEKO"}, {"nameIdentifier": "90403328", "nameIdentifierScheme": "e-Rad", "nameIdentifierURI": "https://kaken.nii.ac.jp/ja/search/?qm=90403328"}]}, {"creatorNames": [{"creatorName": "Saruwatari, Hiroshi", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "6343", "nameIdentifierScheme": "WEKO"}]}, {"creatorNames": [{"creatorName": "Shikano, Kiyohiro", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "6344", "nameIdentifierScheme": "WEKO"}]}]}, "item_files": {"attribute_name": "ファイル情報", "attribute_type": "file", "attribute_value_mlt": [{"accessrole": "open_date", "date": [{"dateType": "Available", "dateValue": "2023-03-02"}], "displaytype": "detail", "download_preview_message": "", "file_order": 0, "filename": "IEICETransInfoSys_E90D_2_554.pdf", "filesize": [{"value": "6.0 MB"}], "format": "application/pdf", "future_date_message": "", "is_thumbnail": false, "licensetype": "license_note", "mimetype": "application/pdf", "size": 6000000.0, "url": {"label": "fulltext", "objectType": "fulltext", "url": "https://naist.repo.nii.ac.jp/record/3831/files/IEICETransInfoSys_E90D_2_554.pdf"}, "version_id": "3c7a97af-9d13-4811-bc70-2f731fe9a527"}]}, "item_keyword": {"attribute_name": "キーワード", "attribute_value_mlt": [{"subitem_subject": "HMM-sufficient statistics", "subitem_subject_language": "en", "subitem_subject_scheme": "Other"}, {"subitem_subject": "unsupervised", "subitem_subject_language": "en", "subitem_subject_scheme": "Other"}, {"subitem_subject": "rapid adaptation", "subitem_subject_language": "en", "subitem_subject_scheme": "Other"}, {"subitem_subject": "speech recognition", "subitem_subject_language": "en", "subitem_subject_scheme": "Other"}]}, "item_language": {"attribute_name": "言語", "attribute_value_mlt": [{"subitem_language": "eng"}]}, "item_resource_type": {"attribute_name": "資源タイプ", "attribute_value_mlt": [{"resourcetype": "journal article", "resourceuri": "http://purl.org/coar/resource_type/c_6501"}]}, "item_title": "Reducing Computation Time of the Rapid Unsupervised Speaker Adaptation based on HMM-Sufficient Statistics", "item_titles": {"attribute_name": "タイトル", "attribute_value_mlt": [{"subitem_title": "Reducing Computation Time of the Rapid Unsupervised Speaker Adaptation based on HMM-Sufficient Statistics", "subitem_title_language": "en"}]}, "item_type_id": "7", "owner": "4", "path": ["35"], "permalink_uri": "http://hdl.handle.net/10061/7823", "pubdate": {"attribute_name": "PubDate", "attribute_value": "2012-07-05"}, "publish_date": "2012-07-05", "publish_status": "0", "recid": "3831", "relation": {}, "relation_version_is_last": true, "title": ["Reducing Computation Time of the Rapid Unsupervised Speaker Adaptation based on HMM-Sufficient Statistics"], "weko_shared_id": -1}
Reducing Computation Time of the Rapid Unsupervised Speaker Adaptation based on HMM-Sufficient Statistics
http://hdl.handle.net/10061/7823
http://hdl.handle.net/10061/782314a0b296-b1a7-4db2-a236-be290f9439e7
名前 / ファイル | ライセンス | アクション |
---|---|---|
fulltext (6.0 MB)
|
|
Item type | 学術雑誌論文 / Journal Article(1) | |||||
---|---|---|---|---|---|---|
公開日 | 2012-07-05 | |||||
タイトル | ||||||
タイトル | Reducing Computation Time of the Rapid Unsupervised Speaker Adaptation based on HMM-Sufficient Statistics | |||||
言語 | ||||||
言語 | eng | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | HMM-sufficient statistics | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | unsupervised | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | rapid adaptation | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | speech recognition | |||||
資源タイプ | ||||||
資源タイプ | journal article | |||||
アクセス権 | ||||||
アクセス権 | open access | |||||
著者 |
Gomez, Randy
× Gomez, Randy× Toda, Tomoki× Saruwatari, Hiroshi× Shikano, Kiyohiro |
|||||
抄録 | ||||||
内容記述タイプ | Abstract | |||||
内容記述 | In real-time speech recognition applications, there is a need to implement a fast and reliable adaptation algorithm. We propose a method to reduce adaptation time of the rapid unsupervised speaker adaptation based on HMM-Sufficient Statistics. We use only a single arbitrary utterance without transcriptions in selecting the N-best speakers' Sufficient Statistics created offline to provide data for adaptation to a target speaker. Further reduction of N-best implies a reduction in adaptation time. However, it degrades recognition performance due to insufficiency of data needed to robustly adapt the model. Linear interpolation of the global HMM-Sufficient Statistics offsets this negative effect and achieves a 50% reduction in adaptation time without compromising the recognition performance. Furthermore, we compared our method with Vocal Tract Length Normalization (VTLN), Maximum A Posteriori (MAP) and Maximum Likelihood Linear Regression (MLLR). Moreover, we tested in office, car, crowd and booth noise environments in 10dB, 15dB, 20dB and 25dB SNRs. | |||||
書誌情報 |
en : IEICE Transactions on Information and Systems 巻 E90-D, 号 2, p. 554-561, 発行日 2007-02 |
|||||
出版者 | ||||||
出版者 | 電子情報通信学会 | |||||
ISSN | ||||||
収録物識別子タイプ | ISSN | |||||
収録物識別子 | 0916-8532 | |||||
DOI | ||||||
関連タイプ | isIdenticalTo | |||||
識別子タイプ | DOI | |||||
関連識別子 | https://doi.org/10.1093/ietisy/e90-d.2.554 | |||||
書誌レコードID | ||||||
収録物識別子タイプ | NCID | |||||
収録物識別子 | AA10826272 | |||||
権利 | ||||||
権利情報 | Copyright (C) 2007 電子情報通信学会. | |||||
著者版フラグ | ||||||
出版タイプ | VoR |