2000.bib

@comment{{This file has been generated by bib2bib 1.92}}
@comment{{Command line: /home/korin/bibtex2html-1.92-LINUX/bib2bib -oc /home/korin/projects/publications/new_output/transitdata/2000-citations -ob /home/korin/projects/publications/new_output/transitdata/2000.bib -c 'year : "2000"' /home/korin/projects/publications/filtlists/full_publications_list.bib}}
@article{gotoh-roysoc00,
  author = {Gotoh, Y. and Renals, S.},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/rs00-preprint.ps.gz},
  title = {Information Extraction from Broadcast News},
  journal = {Philosophical Transactions of the Royal Society of London, Series A},
  pages = {1295--1310},
  volume = {358},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/rs00-preprint.pdf},
  abstract = {This paper discusses the development of trainable statistical models for extracting content from television and radio news broadcasts. In particular we concentrate on statistical finite state models for identifying proper names and other named entities in broadcast speech. Two models are presented: the first models name class information as a word attribute; the second explicitly models both word-word and class-class transitions. A common n-gram based formulation is used for both models. The task of named entity identification is characterized by relatively sparse training data and issues related to smoothing are discussed. Experiments are reported using the DARPA/NIST Hub-4E evaluation for North American Broadcast News.},
  categories = {stobs,ie,lm,bnews,sheffield}
}
@inproceedings{kessens-00,
  author = {Kessens, J.M. and Wester, M. and Strik, H.},
  title = {Automatic Detection and Verification of {D}utch Phonological Rules},
  booktitle = {PHONUS 5: Proceedings of the "Workshop on Phonetics and Phonology in ASR"},
  address = {Saarbruecken},
  pages = {117-128},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/kessens.2000.2.pdf},
  abstract = {In this paper, we propose two methods for automatically obtaining hypotheses about pronunciation variation. To this end, we used two different approaches in which we employed a continuous speech recognizer to derive this information from the speech signal. For the first method, the output of a phone recognition was compared to a reference transcription in order obtain hypotheses about pronunciation variation. Since phone recognition contains errors, we used forced recognition in order to exclude unreliable hypotheses. For the second method, forced recognition was also used, but the hypotheses about the deletion of phones were not constrained beforehand. This was achieved by allowing each phone to be deleted. After forced recognition, we selected the most frequently applied rules as the set of deletion rules. Since previous research showed that forced recognition is a reliable tool for testing hypotheses about pronunciation variation, we can expect that this will also hold for the hypotheses about pronunciation variation which we found using each of the two methods. Another reason for expecting the rule hypotheses to be reliable is that we found that 37-53\% of the rules are related to Dutch phonological processes that have been described in the literature.},
  categories = {asr, pm, VIOS, Nijmegen}
}
@inproceedings{Ban00,
  author = {Bangham, J.A. and Cox, S.J. and Lincoln, M. and Marshall, I. and Tutt, M. and Wells, M},
  title = {Signing for the deaf using virtual humans},
  booktitle = {IEE Colloquium on Speech and Language processing for Disabled and Elderly},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/iee2000-04PaperAFinal.pdf},
  abstract = {Research at Televirtual (Norwich) and the University of East Anglia, funded predominantly by the Independent Television Commission and more recently by the UK Post Office also, has investigated the feasibility of using virtual signing as a communication medium for presenting information to the Deaf. We describe and demonstrate the underlying virtual signer technology, and discuss the language processing techniques and discourse models which have been investigated for information communication in a transaction application in Post Offices, and for presentation of more general textual material in texts such as subtitles accompanying television programmes.},
  categories = {visicast,sign language,translation,UEA}
}
@article{Stolcke_2000_a,
  author = {Stolcke, Andreas and Coccaro, N. and Bates, R. and Taylor, P. and Ess-Dykema, C. Van and Ries, K. and Shriberg, Elizabeth and Jurafsky, D. and R.Martin and Meteer, M.},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Stolcke_2000_a.ps},
  title = {Dialog Act Modeling for Automatic Tagging and Recognition of Conversational Speech},
  journal = {Computational Linguistics},
  number = {3},
  volume = {26},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Stolcke_2000_a.pdf},
  categories = {prosody, recognition, langauge modelling, dialogue, id4s}
}
@inproceedings{strom00,
  author = {Syrdal, Ann K. and Wightman, Colin W. and Conkie, Alistair and Stylianou, Yannis and Beutnagel, Mark and Schroeter, Juergen and Strom, Volker and Lee, Ki-Seung},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/strom00.ps},
  title = {Corpus-based Techniques in the AT&T NEXTGEN Synthesis System},
  booktitle = {Proc.~Int.~Conf.~on Spoken Language Processing},
  address = {Beijing},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/strom00.pdf},
  abstract = {The AT\&T text-to-speech (TTS) synthesis system has been used as a framework for experimenting with a perceptually-guided data-driven approach to speech synthesis, with a primary focus on data-driven elements in the "back end". Statistical training techniques applied to a large corpus are used to make decisions about predicted speech events and selected speech inventory units. Our recent advances in automatic phonetic and prosodic labelling and a new faster harmonic plus noise model (HMM) and unit preselection implementations have significantly improved TTS quality and speeded up both development time and runtime.}
}
@article{renals-specom00,
  author = {Renals, S. and Abberley, D. and Kirby, D. and Robinson, T.},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/specom00-preprint.ps.gz},
  title = {Indexing and Retrieval of Broadcast News},
  journal = {Speech Communication},
  pages = {5--20},
  volume = {32},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/specom00-preprint.pdf},
  abstract = {This paper describes a spoken document retrieval (SDR) system for British and North American Broadcast News. The system is based on a connectionist large vocabulary speech recognizer and a probabilistic information retrieval system. We discuss the development of a realtime Broadcast News speech recognizer, and its integration into an SDR system. Two advances were made for this task: automatic segmentation and statistical query expansion using a secondary corpus. Precision and recall results using the Text Retrieval Conference (TREC) SDR evaluation infrastructure are reported throughout the paper, and we discuss the application of these developments to a large scale SDR task based on an archive of British English broadcast news.},
  categories = {thisl,bnews,trec,ir,recognition,sheffield}
}
@inproceedings{Matsuda2000ICSLP10,
  author = {Matsuda, Shigeki and Nakai, Mitsuru and Shimodaira, Hiroshi and Sagayama, Shigeki},
  title = {{Feature-dependent Allophone Clustering}},
  journal = {},
  abstract = {We propose a novel method for clustering allophones called Feature-Dependent Allophone Clustering (FD-AC) that determines feature-dependent HMM topology automatically. Existing methods for allophone clustering are based on parameter sharing between the allophone models that resemble each other in behaviors of feature vector sequences. However, all the features of the vector sequences may not necessarily have a common allophone clustering structures. It is considered that the vector sequences can be better modeled by allocating the optimal allophone clustering structure to each feature. In this paper, we propose Feature-Dependent Successive State Splitting (FD-SSS) as an implementation of FD-AC. In speaker-dependent continuous phoneme recognition experiments, HMMs created by FD-SSS reduced the error rates by about 10% compared with the conventional HMMs that have a common allophone clustering structure for all the features.},
  month = {October},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Matsuda2000ICSLP10.pdf},
  booktitle = {Proc. ICSLP2000},
  pages = {413--416},
  categories = {asr, atr, jaist}
}
@article{carreira-nc00,
  author = {Carreira-Perpiñán, M. and Renals, S.},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/nc00-preprint.ps.gz},
  title = {Practical identifiability of finite mixtures of multivariate {Bernoulli} distributions},
  journal = {Neural Computation},
  pages = {141--152},
  volume = {12},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/nc00-preprint.pdf},
  abstract = {The class of finite mixtures of multivariate Bernoulli distributions is known to be nonidentifiable, i.e., different values of the mixture parameters can correspond to exactly the same probability distribution. In principle, this would mean that sample estimates using this model would give rise to different interpretations. We give empirical support to the fact that estimation of this class of mixtures can still produce meaningful results in practice, thus lessening the importance of the identifiability problem. We also show that the EM algorithm is guaranteed to converge to a proper maximum likelihood estimate, owing to a property of the log-likelihood surface. Experiments with synthetic data sets show that an original generating distribution can be estimated from a sample. Experiments with an electropalatography (EPG) data set show important structure in the data.},
  categories = {ml,lv,artic,sheffield}
}
@article{Taylor_2000_b,
  author = {Taylor, Paul},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Taylor_2000_b.ps},
  title = {Analysis and Synthesis of Intonation using the Tilt Model},
  journal = {Journal of the Acoustical Society of America},
  number = {3},
  volume = {107},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Taylor_2000_b.pdf},
  pages = {1697-1714},
  categories = {prosody, intonation, id4s}
}
@mastersthesis{Gutkin:00,
  author = {Gutkin, Alexander},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/gutkin_mphil.ps.gz},
  school = {Department of Engineering, University of Cambridge},
  title = {{L}og-{L}inear {I}nterpolation of {L}anguage {M}odels},
  address = {UK},
  month = {December},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/gutkin_mphil.pdf},
  type = {{MPhil.} thesis},
  categories = {statistical speech recognition, language modelling}
}
@phdthesis{Dusterhoff_2000_a,
  author = {Dusterhoff, Kurt},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Dusterhoff_2000_a.ps},
  school = {University of Edinburgh},
  title = {Synthesizing Fundamental Frequency Using Models Automatically Trained from Data},
  booktitle = {Synthesizing Fundamental Frequency Using Models Automatically Trained from Data},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Dusterhoff_2000_a.pdf},
  categories = {intonation, synthesis, prosody}
}
@inproceedings{Wester-00,
  author = {Wester, M. and Kessens, J.M. and Strik, H.},
  title = {Pronunciation variation in {ASR}: Which variation to model?},
  booktitle = {Proc. {ICSLP} '00},
  address = {Beijing},
  pages = {488-491},
  volume = {IV},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/wester.2000.1.pdf},
  abstract = {This paper describes how the performance of a continuous speech recognizer for Dutch has been improved by modeling within-word and cross-word pronunciation variation. A relative improvement of 8.8\% in WER was found compared to baseline system performance. However, as WERs do not reveal the full effect of modeling pronunciation variation, we performed a detailed analysis of the differences in recognition results that occur due to modeling pronunciation variation and found that indeed a lot of the differences in recognition results are not reflected in the error rates. Furthermore, error analysis revealed that testing sets of variants in isolation does not predict their behavior in combination. However, these results appeared to be corpus dependent.},
  categories = {asr, pm, VIOS, Nijmegen}
}
@phdthesis{Wright_2000_a,
  author = {Wright, Helen},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Wright_2000_a.ps},
  school = {University of Edinburgh},
  title = {Modelling Prosodic and Dialogue Information for Automatic Speech Recognition},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Wright_2000_a.pdf},
  categories = {prosody, dialogue, recognition, id4s}
}
@inproceedings{wrench2000b,
  author = {Wrench, A. and Richmond, K.},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Wrench_2000_a.ps},
  title = {Continuous Speech Recognition Using Articulatory Data},
  booktitle = {Proc. {ICSLP} 2000},
  address = {Beijing, China},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Wrench_2000_a.pdf},
  abstract = {In this paper we show that there is measurable information in the articulatory system which can help to disambiguate the acoustic signal. We measure directly the movement of the lips, tongue, jaw, velum and larynx and parameterise this articulatory feature space using principle components analysis. The parameterisation is developed and evaluated using a speaker dependent phone recognition task on a specially recorded TIMIT corpus of 460 sentences. The results show that there is useful supplementary information contained in the articulatory data which yields a small but significant improvement in phone recognition accuracy of 2\%. However, preliminary attempts to estimate the articulatory data from the acoustic signal and use this to supplement the acoustic input have not yielded any significant improvement in phone accuracy.},
  categories = {artic, asr, ann, mlp, hmm, inversion, mocha,edinburgh}
}
@inproceedings{Wester-Fosler-00,
  author = {Wester, M. and Fosler-Lussier, E.},
  title = {A comparison of data-derived and knowledge-based modeling of pronunciation variation},
  booktitle = {Proc. ICSLP '00},
  address = {Beijing},
  pages = {270-273},
  volume = {I},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/wester.2000.2.pdf},
  abstract = {This paper focuses on modeling pronunciation variation in two different ways: data-derived and knowledge-based. The knowledge-based approach consists of using phonological rules to generate variants. The data-derived approach consists of performing phone recognition, followed by various pruning and smoothing methods to alleviate some of the errors in the phone recognition. Using phonological rules led to a small improvement in WER; whereas, using a data-derived approach in which the phone recognition was smoothed using simple decision trees (d-trees) prior to lexicon generation led to a significant improvement compared to the baseline. Furthermore, we found that 10\% of variants generated by the phonological rules were also found using phone recognition, and this increased to 23\% when the phone recognition output was smoothed by using d-trees. In addition, we propose a metric to measure confusability in the lexicon and we found that employing this confusion metric to prune variants results in roughly the same improvement as using the d-tree method.},
  categories = {asr, pm, VIOS, Berkeley}
}
@inproceedings{koumpis-icslp00,
  author = {Koumpis, K. and Renals, S.},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/icslp00.ps.gz},
  title = {Transcription and Summarization of Voicemail Speech},
  booktitle = {Proc. ICSLP},
  address = {Beijing},
  pages = {688--691},
  volume = {2},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/icslp00.pdf},
  abstract = {This paper describes the development of a system to transcribe and summarize voicemail messages. The results of the research presented in this paper are two-fold. First, a hybrid connectionist approach to the Voicemail transcription task shows that competitive performance can be achieved using a context-independent system with fewer parameters than those based on mixtures of Gaussian likelihoods. Second, an effective and robust combination of statistical with prior knowledge sources for term weighting is used to extract information from the decoders output in order to deliver summaries to the message recipients via a GSM Short Message Service (SMS) gateway.},
  categories = {voicemail,summarization,sheffield}
}
@inproceedings{gotoh-icassp00,
  author = {Gotoh, Y. and Renals, S.},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/icassp2000.ps.gz},
  title = {Variable word rate n-grams},
  booktitle = {Proc IEEE ICASSP},
  address = {Istanbul},
  pages = {1591--1594},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/icassp2000.pdf},
  abstract = {The rate of occurrence of words is not uniform but varies from document to document. Despite this observation, parameters for conventional n-gram language models are usually derived using the assumption of a constant word rate. In this paper we investigate the use of variable word rate assumption, modelled by a Poisson distribution or a continuous mixture of Poissons. We present an approach to estimating the relative frequencies of words or n-grams taking prior information of their occurrences into account. Discounting and smoothing schemes are also considered. Using the Broadcast News task, the approach demonstrates a reduction of perplexity up to 10\%.},
  categories = {stobs,lm,bnews,sheffield}
}
@article{Taylor_2000_a,
  author = {Taylor, P A},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Taylor_2000_a.ps},
  title = {Concept-to-Speech by Phonological Structure Matching},
  journal = {Philosophical Transactions of the Royal Society, Series A},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Taylor_2000_a.pdf},
  categories = {prosody, synthesis, unit selection, waveform generation, festival, unisyn}
}
@inproceedings{frankel00:NN_LDM,
  author = {Frankel, J. and Richmond, K. and King, S. and Taylor, P.},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Frankel_et_al_ICSLP2000.ps},
  title = {An automatic speech recognition system using neural networks and linear dynamic models to recover and model articulatory traces},
  booktitle = {Proc. {ICSLP}},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Frankel_et_al_ICSLP2000.pdf},
  abstract = {In this paper we describe a speech recognition system using linear dynamic models and articulatory features. Experiments are reported in which measured articulation from the MOCHA corpus has been used, along with those where the articulatory parameters are estimated from the speech signal using a recurrent neural network.},
  categories = {am,artic,asr,ldm,mocha,edinburgh,inversion,ann}
}
@inproceedings{gotoh-asr2000,
  author = {Gotoh, Y. and Renals, S.},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/asr2000.ps.gz},
  title = {Sentence Boundary Detection in Broadcast Speech Transcripts},
  booktitle = {ISCA ITRW: ASR2000},
  address = {Paris},
  pages = {228--235},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/asr2000.pdf},
  abstract = {This paper presents an approach to identifying sentence boundaries in broadcast speech transcripts. We describe finite state models that extract sentence boundary information statistically from text and audio sources. An n-gram language model is constructed from a collection of British English news broadcasts and scripts. An alternative model is estimated from pause duration information in speech recogniser outputs aligned with their programme script counterparts. Experimental results show that the pause duration model alone outperforms the language modelling approach and that, by combining these two models, it can be improved further and precision and recall scores of over 70\% were attained for the task.},
  categories = {stobs,ie,lm,prosody,bnews,sheffield}
}
@inproceedings{Matsuda2000ICASSP,
  author = {Matsuda, Shigeki and Nakai, Mitsuru and Shimodaira, Hiroshi and Sagayama, Shigeki},
  title = {{Asynchronous-Transition {HMM}}},
  booktitle = {Proc. ICASSP 2000 (Istanbul, Turkey), Vol. II},
  month = {June},
  pages = {1001--1004},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Matsuda2000ICASSP.pdf},
  abstract = {We propose a new class of hidden Markov model (HMM) called asynchronous-transition HMM (AT-HMM). Opposed to conventional HMMs where hidden state transition occurs simultaneously to all features, the new class of HMM allows state transitions asynchronous between individual features to better model asynchronous timings of acoustic feature changes. In this paper, we focus on a particular class of AT-HMM with sequential constraints introducing a concept of ``state tying across time''. To maximize the advantage of the new model, we also introduce feature-wise state tying technique. Speaker-dependent speech recognition experiments demonstrated that reduced error rates more than 30\% and 50\% in phoneme and isolated word recognition, respectively, compared with conventional HMMs.},
  categories = {asr, atr, jaist}
}
@inproceedings{wester00:_using_dutch_asr,
  author = {Wester, M. and Kessens, J.M. and Strik, H.},
  title = {Using {D}utch phonological rules to model pronunciation variation in {ASR}},
  booktitle = {Phonus 5: proceedings of the "workshop on phonetics and phonology in {ASR}"},
  address = {Saarbruecken},
  pages = {105-116},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/wester.2000.3.pdf},
  abstract = {In this paper, we describe how the performance of a continuous speech recognizer for Dutch has been improved by modeling within-word and cross-word pronunciation variation. Within-word variants were automatically generated by applying five phonological rules to the words in the lexicon. Cross-word pronunciation variation was modeled by adding multi-words and their variants to the lexicon. The best results were obtained when the cross-word method was combined with the within-word method: a relative improvement of 8.8\% in the WER was found compared to baseline system performance. We also describe an error analysis that was carried out to investigate whether rules in isolation can predict the performance of rules in combination.},
  categories = {asr, pm, VIOS, Nijmegen}
}
@phdthesis{mayo:00,
  author = {Mayo, C.},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/thesis.pdf},
  school = {Queen Margaret University College},
  title = {The relationship between phonemic awareness and cue weighting in speech perception: longitudinal and cross-sectional child studies},
  categories = {speech perception, development, cue weighting, phonemic awareness, literacy},
  year = {2000}
}
@inproceedings{Goubanova-Taylor:2000,
  author = {Goubanova, O. and Taylor, P.},
  title = {Using {B}ayesian {B}elief Networks for model duration in text-to-speech systems},
  booktitle = {CD-ROM Proc. ICSLP 2000},
  year = {2000},
  address = {Beijing, China}
}
@inproceedings{Morais_2000_a,
  author = {Morais, Edmilson and Taylor, Paul and Violaro, Fabio},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Morais_2000_a.ps},
  title = {Concatenative Text-To-Speech Synthesis Based On Prototype Waveform Interpolation (A Time Frequency Approach)},
  booktitle = {Proc. ICSLP 2000},
  address = {Beijing, China},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Morais_2000_a.pdf},
  categories = {waveform generation, festival}
}
@inproceedings{king00:recognition_syll,
  author = {King, S. and Taylor, P. and Frankel, J. and Richmond, K.},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/King_et_al_Phonus2000.ps},
  title = {Speech recognition via phonetically-featured syllables},
  booktitle = {PHONUS},
  address = {Institute of Phonetics, University of the Saarland},
  pages = {15-34},
  volume = {5},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/King_et_al_Phonus2000.pdf},
  abstract = {We describe recent work on two new automatic speech recognition systems. The first part of this paper describes the components of a system based on phonological features (which we call EspressoA) in which the values of these features are estimated from the speech signal before being used as the basis for recognition. In the second part of the paper, another system (which we call EspressoB) is described in which articulatory parameters are used instead of phonological features and a linear dynamical system model is used to perform recognition from automatically estimated values of these articulatory parameters.},
  categories = {am,artic,asr,ldm,phonetic_feature,mocha,timit,edinburgh}
}
@inproceedings{Shimodaira2000ICSLP10,
  author = {Shimodaira, Hiroshi and Akae, Toshihiko and Nakai, Mitsuru and Sagayama, Shigeki},
  title = {{Jacobian Adaptation of {HMM} with Initial Model Selection for Noisy Speech Recognition}},
  booktitle = {Proc. ICSLP2000},
  month = {October},
  pages = {1003--1006},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Shimodaira2000ICSLP10.pdf},
  abstract = {An extension of Jacobian Adaptation (JA) of HMMs for degraded speech recognition is presented in which appropriate set of initial models is selected from a number of initial-model sets designed for different noise environments. Based on the first order Taylor series approximation in the acoustic feature domain, JA adapts the acoustic model parameters trained in the initial noise environment A to the new environment B much faster than PMC that creates the acoustic models for the target environment from scratch. Despite the advantage of JA to PMC, JA has a theoretical limitation that the change of acoustic parameters from the environment A to B should be small in order that the linear approximation holds. To extend the coverage of JA, the ideas of multiple sets of initial models and their automatic selection scheme are discussed. Speaker-dependent isolated-word recognition experiments are carried out to evaluate the proposed method.},
  categories = {asr, jaist}
}
@article{king:taylor:csl2000,
  author = {King, Simon and Taylor, Paul},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/King_Taylor_csl2000.ps},
  title = {Detection of Phonological Features in Continuous Speech using Neural Networks},
  journal = {Computer {S}peech and {L}anguage},
  number = {4},
  pages = {333-353},
  volume = {14},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/King_Taylor_csl2000.pdf},
  abstract = {We report work on the first component of a two stage speech recognition architecture based on phonological features rather than phones. The paper reports experiments on three phonological feature systems: 1) the Sound Pattern of English (SPE) system which uses binary features, 2)a multi valued (MV) feature system which uses traditional phonetic categories such as manner, place etc, and 3) Government Phonology (GP) which uses a set of structured primes. All experiments used recurrent neural networks to perform feature detection. In these networks the input layer is a standard framewise cepstral representation, and the output layer represents the values of the features. The system effectively produces a representation of the most likely phonological features for each input frame. All experiments were carried out on the TIMIT speaker independent database. The networks performed well in all cases, with the average accuracy for a single feature ranging from 86 to 93 percent. We describe these experiments in detail, and discuss the justification and potential advantages of using phonological features rather than phones for the basis of speech recognition.},
  categories = {}
}
@inproceedings{abberley-trec00,
  author = {Abberley, D. and Renals, S. and Ellis, D. and Robinson, T.},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/trec8.ps.gz},
  title = {The {THISL} {SDR} system at {TREC}--8},
  booktitle = {Proc. Eighth Text Retrieval Conference (TREC--8)},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/trec8.pdf},
  abstract = {This paper describes the participation of the THISL group at the TREC-8 Spoken Document Retrieval (SDR) track. The THISL SDR system consists of the realtime version of the Abbot large vocabulary speech recognition system and the thislIR text retrieval system. The TREC-8 evaluation assessed SDR performance on a corpus of 500 hours of broadcast news material collected over a five month period. The main test condition involved retrieval of stories defined by manual segmentation of the corpus in which non-news material, such as commercials, were excluded. An optional test condition required required retrieval of the same stories from the unsegmented audio stream. The THISL SDR system participated at both test conditions. The results show that a system such as THISL can produce respectable information retrieval performance on a realistically-sized corpus of unsegmented audio material.},
  categories = {thisl,bnews,trec,ir,recognition,eval,sheffield}
}