%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Bibliography for Language Model Adaptation % Compiled by Kevin Duh (mylastname at ee dot washington.edu) % Jan 2005 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @string{icassp="International Conference on Acoustics, Speech, and Signal Processing"} @string{hlt="Human Language Technology"} @string{csl="Computer Speech and Language"} @string{ieee-sap="IEEE Transaction on Speech and Audio Processing"} @string{ieee-pami="IEEE Trans. Pattern Analysis and Machine Intelligence"} @string{eurospeech="Proc. of Eurospeech"} @string{emnlp="Conference on Empirical Methods in Natural Language Processing (EMNLP)"} @string{acl="Assoc. of Computational Linguistics"} @string{icslp="International Conference on Spoken Language Processing"}; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % OVERVIEW and TUTORIAL PAPERS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @InProceedings{bellergarda01overview, author = {Jerome R. Bellergarda}, title = {An Overview of Statistical Language Model Adaptation}, booktitle = {Adaptation Methods for Speech Recognition, ISCA Tutorial and Research Workshop (ITRW)}, location = {Sophia Antipolis, France}, year = 2001, month = {August}, note = {Very good overview with lots of references. Classifies LM Adaptation into 3 broad categories: model interpolation, constraint specification, and meta-information extraction. Model interpolation methods include analogs of LM backoff and interpolation, MAP adaptation, and cache models. Constraint specification usually uses the exponential models associated with MaxEnt; however, rather than finding the maximum entropy distribution of all distributions that satisfy the in-domain data's features, we find the distribution that is closest to the out-domain distribution (i.e. prior). Meta-information extraction is any technique that uses knowledge beyond the word sequences for adaptation. This includes knowledge of topic (e.g. topic mixtures), semantics (e.g. latent semantic analysis), and syntactic information (e.g. structure language models). Concludes by saying that combination of methods could be promising.} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Studies of Cross-domain Variability and Brittleness %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @InProceedings{Rosenfeld95, author = "Rosenfeld, R.", title = "Optimizing lexical and n-gram coverage via judicious use of linguistic data.", booktitle = "Proc. Eurospeech", year = 1995, volume = 3, pages = "1763-1766", note = "Examined the effect of various types and amounts of North American Business language data on the OOV rate and n-gram coverage. Concluded that the source of the data and recency affect OOV rate significantly and calls for careful choosing of vocabulary list. On the other hand, similar study on n-gram coverage shows an opposite result, with more data leading to better perplexity/WER. Good experimental methodology." } @Article{schwarm04, author = {S. Schwarm, I. Bulyko and M. Ostendorf}, title = {Adaptive Language Modeling with Varied Sources to Cover New Vocabulary Item}, journal = {ieee-sap}, year = 2004, volume = 12, number = 3, pages = {334-342}, month = {May}, note = {Looked at stylistic and content differences between several corpora for adaptation (e.g. published text, email, meeting, conversational web, topic-related web). Used order-dependent mixture weights; concluded that web data is useful for high-order n-grams but topic-match was more important in reducing WER. Looked at perplexity and bigram hit rate for assesing usefulness of new data sources.} } @Article{iyer99relevance, author = {R. Iyer and M. Ostendorf}, title = {Relevance Weighting for combining multi-domain data for n-gram language modeling}, journal = {csl}, year = 1999, volume = 13, number = 3, pages = {267-282}, } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Model Interpolation Techniques %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @InProceedings{iyer97transforming, author = {R. Iyer and M. Ostendorf}, title = {Transforming Out-of-Domain Estimates to Improve In-Domain Language Models}, booktitle = {eurospeech}, year = 1997, volume = 4, pages = {1975-1978}, note = {Use POS conditioning for weighting out-of-domain data} } @PhdThesis{iyer98phd, author = "R. Iyer", title = "Improving and Predicting Performance of Statistical Language Models in Sparse Domains", school = "Boston University", year = 1998, } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % MAP Adaptation Techniques %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @InProceedings{federico96, author = {M. Federico}, title = {Bayesian Estimation Methods for N-Gram Language Model Adaptation}, booktitle = {icslp}, pages = {240-243}, year = 1996, month = {Oct} } @InProceedings{chen99map, author = {L. Chen and T. Huang}, title = {An Improved MAP Method for Language Model Adaptation}, booktitle = {Proc. 1999 European Conf. Speech Comm. Technology}, pages = {1923-1926}, year = 1999, volume = 5, month = {September}, location = {Budapest, Hungrary} } @InProceedings{masataki97, author = {H. Masataki, Y. Sagisaka and T. Tawahara}, title = {Task Adaptation Using MAP Estimation in N-gram Language Model}, booktitle = {icassp}, pages = {783-786}, year = 1997, volume = 1, month = {May}, location = {Munich, Germany} } @InProceedings{bacchiani04adapt, author = {M. Bacchiani, B. Roark and M. Saraclar}, title = {Language Model Adaptation with {MAP} Estimation and the Perceptron Algorithm}, booktitle = {hlt}, year = 2004 } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Exponential Models: % MDI Adaptation techniques %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{rosenfeld96maximum, author = "R. Rosenfeld", title = "A Maximum Entropy Approach to Adaptive Statistical Language Modeling", booktitle = {csl}, pages = {187-228}, note = {Longer version: Carnegie Mellon Tech. Rep. CMU-CS-94-138.", year = "1996", url = "citeseer.ist.psu.edu/rosenfeld96maximum.html" } @InProceedings{federico99mdi, author = {M. Federico}, title = {Efficient Language Model Adaptation Through MDI Estimation}, booktitle = {Proc. 1999 Euro. Conf. Speech Comm. Tech.}, pages = {1583-1586}, year = 1999, volume = 4, month = {September} } @InProceedings{rao97mdi, author = {P.S. Rao, S. Dharanipragada and S. Roukos}, title = {MDI Adaptation of Language Models Across Corpora}, booktitle = {Proc. 1997 Euro Conf. Speech Comm. Tech., Rhodes, Greece}, pages = {1979-1982}, year = 1997, volume = 4, month = {September} } @InProceedings{kneser97adapt, author = {R. Kneser, J. Peters and D. Klakow}, title = {Language Model Adaptation Using Dynamic Marginals}, booktitle = {Proc. 1997 Euro Conf. Speech Comm. Tech., Rhodes, Greece}, pages = {1971-1974}, year = 1997, volume = 4 } %%%%%%%%%%%%%%%%%%%%%%%%% % Topic Mixtures %%%%%%%%%%%%%%%%%%%%%%%%% @INPROCEEDINGS{iyer96, author = "R. Iyer and M. Ostendorf", title = "Modeling long range dependencies in languages: Topic Mixtures vs. Dynamic Cache Models", booktitle = icslp, year = "1996", pages = "236-239", note = "Sentence-level mixtures for capturing long-range within-sentence effects and topic dependent effects across sentences. Incorporating cache models in mixture gets additional perplexity and word error rate improvements" } @InProceedings{iyer94sentence, author = {R. Iyer, M. Ostendorf and J.R. Rohlicek}, title = {Language Modeling with Sentence-Level Mixtures}, booktitle = {Proc. ARPA Speech and Natural Language Workshop}, pages = {82-86}, year = 1994, month = {March}, publisher = {Morgan Kaufmann Publishers} } @InProceedings{kneser93adapt, author = {R. Kneser and V. Steinbiss}, title = {On the Dynamic Adaptation of Stochastic Language Models}, booktitle = {icassp}, pages = {586-588}, year = 1993, month = {May} } @InProceedings{seymore97topic, author = {K. Seymore and R. Rosenfeld}, title = {Using Story Topics for Language Model Adaptation}, booktitle = {Proc. 1997 Euro. Conf. Speech Comm. Tech., Rhodes, Greece}, pages = {1987-1990}, year = 1997, volume = 4, month = {September} } @InProceedings{adda99, author = {G. Adda, M. Jardino and J.L. Gauvain}, title = {Language Modeling for Broadcast News Transcription}, booktitle = {Proc. 1999 Euro Conf. Speech Comm. Tech., Budapest, Hungary}, pages = {1759-1762}, year = 1999, volume = 4, month = {September} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Using Semantics and Syntax in Adaptation %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @InProceedings{kneser97semantic, author = {R. Kneser and J. Peters}, title = {Semantic Clustering for Adaptative Language Modeling}, booktitle = {icassp}, pages = {779-782}, year = 1997, volume = 2, month = {May} } @INPROCEEDINGS{martin97, author = "S. Martin, Jorg Liermann and Hermann Ney", title = "Adaptive topic-dependent language modeling using word-based varigrams", booktitle = eurospeech, year = "1997", pages = "3:1447-1450", note = "Extensions to standard interpolated word-trigram and cache model: extend trigram into a varigram of m useful words, and add topic-specific trigram models." } @InProceedings{bellegarda01adapt, author = {J.R. Bellegarda}, title = {A Novel Approach to the Adaptation of Latent Semantic Information}, booktitle = {Proc. 2001 ISCA Workshop on Adaptation Methods}, year = 2001 } @InProceedings{chelba01portability, author = {C. Chelba}, title = {Portability of Syntactic Structure for Language Modeling}, booktitle = {icassp}, year = 2001, month = {May}, location = {Salt Lake City, Utah} } @InProceedings{zhang99, author = {R. Zhang, E. Black and A. Finch}, title = {Using Detailed Linguistic Structure in Language Modeling}, booktitle = {Proc. 1999 Euro Conf. Speech Comm. Tech., Budapest, Hungrary}, pages = {1815-1818}, year = 1999, volume = 4, month = {September} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Adaptation using Information Retrieval, Active Learning, etc. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @InProceedings{mahajan99, author = {Milind Mahajan, Doug Beeferman, X.D. Huang}, title = {Improved Topic-Dependent Language Modeling Using Information Retrieval Techniques}, booktitle = {icassp}, year = 1999 } @InProceedings{bigi04, author = {Brigitte Bigi, Yan Huang, Renato De Mori}, title = {Vocabulary and Language Model Adaptation using Information Retrieval}, booktitle = {icslp}, year = 2004, note = {Use first-pass sentence hypotheses to retrieve relevant "documents", which is used to train and adapt the base language model. Experiments in French as well as in English (Hub4, Switchboard, and NAB). Use KL divergence as distance metric in retrieval.} } @InProceedings{zhao04, author = {Bing Zhao, Matthias Eck and Stephen Vogel}, title = {Language Model Adaptation for Statistical Machine Translation via Structured Query Models}, booktitle = {COLING}, year = 2004, note = {Language model adaptation for Machine Translation. Self-note: It may be interesting to look at how LM adaptation in MT and ASR differs (or do they?).} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Other techniques for LM Adaptation %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @InProceedings{gao00, author = {Jiafeng Gao, Mingjing Lu, Kai-Fu Lee}, title = {N-gram Distribution Based Language Model Adaptation}, booktitle = {icslp}, year = 2000, note = {LM adaptation based on pruning n-grams} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Data augmentation % - Methods to increase training data size %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Inproceedings{bulyko03web, author = {I, Bulyko, M. Ostendorf and A. Stolcke}, title = {Getting More Mileage from Web Text Sources for Conversational Speech Language Modeling using Class-Dependent Mixtures}, booktitle = {hlt}, year = 2003, note = {Retrieving conversational-style sentences from the web. Class-dependent interpolation.} } @InProceedings{galescu98, author = {Lucian Galescu, Eric Ringger and James Allen}, title = {Rapid Language Model Development for New Task Domains}, booktitle = {First International Conference on Language Resources and Evaluation (LREC)}, year = 1998, month = {May}, note = {1. Build CFG for new domain and use it to generate artifical corpus. 2. Use out-domain corpora in class-based LM framework to estimate in-domain LM.} } @InProceedings{berger98just-in-time, author = {A. Berger and R. Miller}, title = {Just-in-time language modeling}, booktitle = icassp, pages = {II:705-708}, year = {1998} } @InProceedings{zhu01www, author = {X. Zhu and R. Rosenfeld}, title = {Improving trigram language modeling with the World Wide Web}, booktitle = icassp, year = {2001}, pages = {I:533-536} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Unsupervised Adaptation %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @InProceedings{giuliani01unsupervised, author = {Diego Giuliani and Marcello Federico}, title = {Unsupervised Language and Acoustic Model Adaptation for Cross Domain Portability}, year = 2001, booktitle = {Adaptation Methods for Speech Recognition, ISCA Tutorial and Research Workshop (ITRW)}, location = {Sophia Antipolis, France}, year = 2001, month = {August}, note = {Porting a broadcast news speech recognizer to tourist information conversational speech domain. Iterative adaptation procedure that alternatively generates sentence hypotheses and AM/LM adaptation.} } @InProceedings{yokoyama0, author = {T. Yokoyama, T. Shinozaki, K. Iwano and S. Furui}, title = {Unsupervised Class-based Language Model Adaptation for Spontaneous Speech Recognition}, booktitle = {ISCA/IEEE Workshop on Spontaneous Speech Proc. and Recog. (SSPR), Tokyo, Japan}, year = 2003 } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Adaptation for Conversational Speech %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @InProceedings{bertoldi01, author = {N. Bertoldi, F. Brugnara, M. Cettolo, M. Federico and D. Giuliani}, title = {From Broadcast news to Spontaneous Dialogue Transcription: Portability Issues}, booktitle = {icassp}, year = 2001, address = {Salt Lake City, Utah}, month = {May} } @InProceedings{hori03, author = {Takaoki Hori, Daniel Willett and Yasuhiro Minami}, title = {Language Model Adaptation Using {WFST}-Based Speaking-style Translation}, booktitle = {icassp}, year = 2003, note = {Use speech recognition and machine translation together as one WFST for adaptation of written Japanese to spontaneous spoken Japanese} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Adaptation for Resource-Deficient Languages % Cross-lingual Adaptation % Adaptation for Languages with rich morphology, etc. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @Article{khudanpur04contemporaneous, author = {Sanjeev Khudanpur and Woosung Kim}, title = {Contemporaneous Text as Side-Information in Statistical Language Modeling}, journal = {csl}, year = 2004, volume = 18, number = 2, pages = {143-162}, month = {April} } @InProceedings{kim04lsa, author = {Woosung Kim and Sanjeev Khudanpur}, title = {Cross-Lingual Latent Semantic Analysis for Language Modeling}, booktitle = {icassp}, pages = {257--260}, year = 2004, month = {May} } @InProceedings{kim03trigger, author = {Woosung Kim and Sanjeev Khudanpur}, title = {Cross-Lingual Lexical Triggers in Statistical Language Modeling}, journal = {emnlp}, year = 2003, pages = {17--24}, month = {July} } @InProceedings{nakajima02, author = {Hideharu Nakajima, Hirofumi Yamamoto, Taro Watanabe}, title = {Language Model Adaptation with Additional Text Generated by Machine Translation}, booktitle = {COLING 2002}, year = 2002, note = {Use IBM Model 4 to generate sentences for target domain. Experiments on English-Japanese.} } @InProceedings{maucec01, author = {M. Maucec, Z. Kacic and Bogomir Horvat}, title = {A Framework for Language Model Adaptation for Highly-Inflected Slovenian Language}, author = {E. W. D. Whittaker}, title = {Temporal Adaptation of Language Models}, booktitle = {Adaptation Methods for Speech Recognition, ISCA Tutorial and Research Workshop (ITRW)}, location = {Sophia Antipolis, France}, year = 2001, month = {August} note = {Claims that current adaptation techniques may not work well with morphologically-rich languaguages. Their solution: Decomposed words into smaller morphological components to reduce vocabulary growth, but map words into (semantic/topic) equivalence classes prior to applying IR-based adaptation. Self-note: Interesting paper. Make sure I correct the spelling of their names before citing.} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Adapting Parsers %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @InProceedings{hwa99, author = {Rebecca Hwa}, title = {Supervised Grammar Induction Using Training Data with limited Constituent Information}, booktitle = {acl}, year = 1999 } @InProceedings{gildea01variation, author = {Daniel Gildea}, title = {Corpus Variation and Parser Performance}, booktitle = {emnlp}, year = 2001 } @InProceedings{roark03pcfg, author = {B. Roark and M. Bacchiani}, title = {Supervised and unsupervised {PCFG} adaptation to Novel Domains}, booktitle = {HLT-NAACL}, year = 2003, month = {May} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Online or Cache Adaptation % (as opposed to off-line task-level adaptation %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @InProceedings{jelinek91dynamicLM, author = {F. Jelinek, B. Merialdo, S. Roukos, and M. Strauss}, title = {A Dynamic LM for Speech Recogntion}, booktitle = {Proc. ARPA Workshop on Speech and Natural Language}, pages = {293-295}, year = 1991 } @Article{kuhn92cache, author = {R. Kuhn and R. de Mori}, title = {A Cache based natural language model for speech recogntion}, journal = {ieee-pami}, year = 1992 } @InProceedings{kalai99online, author = {A. Kalai, S. Chen, A. Blum, and R. Rosenfeld}, title = {On-line Algorithms for combining language models}, booktitle = {icassp}, year = 1999 } @InProceedings{clark97, author = {P. Clarkson and A. Robinson}, title = {Language model adaptation using mixtures and an exponentially decaying cache}, booktitle = icassp, year = {1997}, pages = {II:799-802}, } @InProceedings{whittaker01temporal, author = {E. W. D. Whittaker}, title = {Temporal Adaptation of Language Models}, booktitle = {Adaptation Methods for Speech Recognition, ISCA Tutorial and Research Workshop (ITRW)}, location = {Sophia Antipolis, France}, year = 2001, month = {August}, note = {LM Adaptation for information retrieval of spoken news/radio programs (i.e. SpeechBot)} }