2000.bib

@comment{{This file has been generated by bib2bib 1.92}}
@comment{{Command line: /home/korin/bibtex2html-1.92-LINUX/bib2bib -oc /home/korin/projects/publications/new_output/transitdata/2000-citations -ob /home/korin/projects/publications/new_output/transitdata/2000.bib -c 'year : "2000"' /home/korin/projects/publications/filtlists/full_publications_list.bib}}
@article{gotoh-roysoc00,
  author = {Y.~Gotoh and S.~Renals},
  title = {Information Extraction from Broadcast News},
  journal = {Philosophical Transactions of the Royal Society of
                   London, Series A},
  volume = {358},
  pages = {1295--1310},
  abstract = {This paper discusses the development of trainable
                   statistical models for extracting content from
                   television and radio news broadcasts. In particular we
                   concentrate on statistical finite state models for
                   identifying proper names and other named entities in
                   broadcast speech. Two models are presented: the first
                   models name class information as a word attribute; the
                   second explicitly models both word-word and class-class
                   transitions. A common n-gram based formulation is used
                   for both models. The task of named entity
                   identification is characterized by relatively sparse
                   training data and issues related to smoothing are
                   discussed. Experiments are reported using the
                   DARPA/NIST Hub-4E evaluation for North American
                   Broadcast News.},
  categories = {stobs,ie,lm,bnews,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/rs00-preprint.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/rs00-preprint.ps.gz},
  year = 2000
}
@inproceedings{kessens-00,
  author = {J.M. Kessens and M. Wester and H. Strik},
  title = {Automatic Detection and Verification of {D}utch
                   Phonological Rules},
  booktitle = {PHONUS 5: Proceedings of the "Workshop on Phonetics
                   and Phonology in ASR"},
  pages = {117-128},
  address = {Saarbruecken},
  abstract = {In this paper, we propose two methods for
                   automatically obtaining hypotheses about pronunciation
                   variation. To this end, we used two different
                   approaches in which we employed a continuous speech
                   recognizer to derive this information from the speech
                   signal. For the first method, the output of a phone
                   recognition was compared to a reference transcription
                   in order obtain hypotheses about pronunciation
                   variation. Since phone recognition contains errors, we
                   used forced recognition in order to exclude unreliable
                   hypotheses. For the second method, forced recognition
                   was also used, but the hypotheses about the deletion of
                   phones were not constrained beforehand. This was
                   achieved by allowing each phone to be deleted. After
                   forced recognition, we selected the most frequently
                   applied rules as the set of deletion rules. Since
                   previous research showed that forced recognition is a
                   reliable tool for testing hypotheses about
                   pronunciation variation, we can expect that this will
                   also hold for the hypotheses about pronunciation
                   variation which we found using each of the two methods.
                   Another reason for expecting the rule hypotheses to be
                   reliable is that we found that 37-53\% of the rules are
                   related to Dutch phonological processes that have been
                   described in the literature.},
  categories = {asr, pm, VIOS, Nijmegen},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/kessens.2000.2.pdf},
  year = 2000
}
@inproceedings{Ban00,
  author = {Bangham, J.A. and Cox, S.J. and Lincoln, M. and
                   Marshall, I. and Tutt, M. and Wells, M},
  title = {Signing for the deaf using virtual humans},
  booktitle = {IEE Colloquium on Speech and Language processing for
                   Disabled and Elderly},
  abstract = {Research at Televirtual (Norwich) and the University
                   of East Anglia, funded predominantly by the Independent
                   Television Commission and more recently by the UK Post
                   Office also, has investigated the feasibility of using
                   virtual signing as a communication medium for
                   presenting information to the Deaf. We describe and
                   demonstrate the underlying virtual signer technology,
                   and discuss the language processing techniques and
                   discourse models which have been investigated for
                   information communication in a transaction application
                   in Post Offices, and for presentation of more general
                   textual material in texts such as subtitles
                   accompanying television programmes.},
  categories = {visicast,sign language,translation,UEA},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/iee2000-04PaperAFinal.pdf},
  year = 2000
}
@article{Stolcke_2000_a,
  author = {Andreas Stolcke and N. Coccaro and R. Bates and P.
                   Taylor and C. Van Ess-Dykema and K. Ries and Elizabeth
                   Shriberg and D. Jurafsky and R.Martin and M. Meteer},
  title = {Dialog Act Modeling for Automatic Tagging and
                   Recognition of Conversational Speech},
  journal = {Computational Linguistics},
  volume = 26,
  number = 3,
  categories = {prosody, recognition, langauge modelling, dialogue,
                   id4s},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Stolcke_2000_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Stolcke_2000_a.ps},
  year = 2000
}
@inproceedings{strom00,
  author = {Ann K. Syrdal and Colin W. Wightman and Alistair
                   Conkie and Yannis Stylianou and Mark Beutnagel and
                   Juergen Schroeter and Volker Strom and Ki-Seung Lee},
  title = {Corpus-based Techniques in the AT&T NEXTGEN Synthesis
                   System},
  booktitle = {Proc.~Int.~Conf.~on Spoken Language Processing},
  address = {Beijing},
  abstract = {The AT\&T text-to-speech (TTS) synthesis system has
                   been used as a framework for experimenting with a
                   perceptually-guided data-driven approach to speech
                   synthesis, with a primary focus on data-driven elements
                   in the "back end". Statistical training techniques
                   applied to a large corpus are used to make decisions
                   about predicted speech events and selected speech
                   inventory units. Our recent advances in automatic
                   phonetic and prosodic labelling and a new faster
                   harmonic plus noise model (HMM) and unit preselection
                   implementations have significantly improved TTS quality
                   and speeded up both development time and runtime.},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/strom00.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/strom00.ps},
  year = 2000
}
@article{renals-specom00,
  author = {S.~Renals and D.~Abberley and D.~Kirby and T.~Robinson},
  title = {Indexing and Retrieval of Broadcast News},
  journal = {Speech Communication},
  volume = {32},
  pages = {5--20},
  abstract = {This paper describes a spoken document retrieval (SDR)
                   system for British and North American Broadcast News.
                   The system is based on a connectionist large vocabulary
                   speech recognizer and a probabilistic information
                   retrieval system. We discuss the development of a
                   realtime Broadcast News speech recognizer, and its
                   integration into an SDR system. Two advances were made
                   for this task: automatic segmentation and statistical
                   query expansion using a secondary corpus. Precision and
                   recall results using the Text Retrieval Conference
                   (TREC) SDR evaluation infrastructure are reported
                   throughout the paper, and we discuss the application of
                   these developments to a large scale SDR task based on
                   an archive of British English broadcast news.},
  categories = {thisl,bnews,trec,ir,recognition,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/specom00-preprint.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/specom00-preprint.ps.gz},
  year = 2000
}
@inproceedings{Matsuda2000ICSLP10,
  author = {Shigeki Matsuda and Mitsuru Nakai and Hiroshi
                   Shimodaira and Shigeki Sagayama},
  title = {{Feature-dependent Allophone Clustering}},
  booktitle = {Proc. ICSLP2000},
  pages = {413--416},
  abstract = { We propose a novel method for clustering allophones
                   called Feature-Dependent Allophone Clustering (FD-AC)
                   that determines feature-dependent HMM topology
                   automatically. Existing methods for allophone
                   clustering are based on parameter sharing between the
                   allophone models that resemble each other in behaviors
                   of feature vector sequences. However, all the features
                   of the vector sequences may not necessarily have a
                   common allophone clustering structures. It is
                   considered that the vector sequences can be better
                   modeled by allocating the optimal allophone clustering
                   structure to each feature. In this paper, we propose
                   Feature-Dependent Successive State Splitting (FD-SSS)
                   as an implementation of FD-AC. In speaker-dependent
                   continuous phoneme recognition experiments, HMMs
                   created by FD-SSS reduced the error rates by about 10%
                   compared with the conventional HMMs that have a common
                   allophone clustering structure for all the features. },
  categories = {asr, atr, jaist},
  journal = {},
  month = oct,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Matsuda2000ICSLP10.pdf},
  year = 2000
}
@article{carreira-nc00,
  author = {M.~Carreira-Perpiñán and S.~Renals},
  title = {Practical identifiability of finite mixtures of
                   multivariate {Bernoulli} distributions},
  journal = {Neural Computation},
  volume = {12},
  pages = {141--152},
  abstract = {The class of finite mixtures of multivariate Bernoulli
                   distributions is known to be nonidentifiable, i.e.,
                   different values of the mixture parameters can
                   correspond to exactly the same probability
                   distribution. In principle, this would mean that sample
                   estimates using this model would give rise to different
                   interpretations. We give empirical support to the fact
                   that estimation of this class of mixtures can still
                   produce meaningful results in practice, thus lessening
                   the importance of the identifiability problem. We also
                   show that the EM algorithm is guaranteed to converge to
                   a proper maximum likelihood estimate, owing to a
                   property of the log-likelihood surface. Experiments
                   with synthetic data sets show that an original
                   generating distribution can be estimated from a sample.
                   Experiments with an electropalatography (EPG) data set
                   show important structure in the data.},
  categories = {ml,lv,artic,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/nc00-preprint.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/nc00-preprint.ps.gz},
  year = 2000
}
@article{Taylor_2000_b,
  author = {Paul Taylor},
  title = {Analysis and Synthesis of Intonation using the Tilt
                   Model},
  journal = {Journal of the Acoustical Society of America},
  volume = 107,
  number = 3,
  pages = {1697-1714},
  categories = {prosody, intonation, id4s},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Taylor_2000_b.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Taylor_2000_b.ps},
  year = 2000
}
@mastersthesis{Gutkin:00,
  author = {Alexander Gutkin},
  title = {{L}og-{L}inear {I}nterpolation of {L}anguage {M}odels},
  school = {Department of Engineering, University of Cambridge},
  type = {{MPhil.} thesis},
  address = {UK},
  categories = {statistical speech recognition, language modelling},
  month = dec,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/gutkin_mphil.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/gutkin_mphil.ps.gz},
  year = 2000
}
@phdthesis{Dusterhoff_2000_a,
  author = {Kurt Dusterhoff},
  title = {Synthesizing Fundamental Frequency Using Models
                   Automatically Trained from Data},
  school = {University of Edinburgh},
  booktitle = {Synthesizing Fundamental Frequency Using Models
                   Automatically Trained from Data},
  categories = {intonation, synthesis, prosody},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Dusterhoff_2000_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Dusterhoff_2000_a.ps},
  year = 2000
}
@inproceedings{Wester-00,
  author = {M. Wester and J.M. Kessens and H. Strik},
  title = {Pronunciation variation in {ASR}: Which variation to
                   model?},
  booktitle = {Proc. of {ICSLP} '00},
  volume = {IV},
  pages = {488-491},
  address = {Beijing},
  abstract = {This paper describes how the performance of a
                   continuous speech recognizer for Dutch has been
                   improved by modeling within-word and cross-word
                   pronunciation variation. A relative improvement of
                   8.8\% in WER was found compared to baseline system
                   performance. However, as WERs do not reveal the full
                   effect of modeling pronunciation variation, we
                   performed a detailed analysis of the differences in
                   recognition results that occur due to modeling
                   pronunciation variation and found that indeed a lot of
                   the differences in recognition results are not
                   reflected in the error rates. Furthermore, error
                   analysis revealed that testing sets of variants in
                   isolation does not predict their behavior in
                   combination. However, these results appeared to be
                   corpus dependent.},
  categories = {asr, pm, VIOS, Nijmegen},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/wester.2000.1.pdf},
  year = 2000
}
@phdthesis{Wright_2000_a,
  author = {Helen Wright},
  title = {Modelling Prosodic and Dialogue Information for
                   Automatic Speech Recognition},
  school = {University of Edinburgh},
  categories = {prosody, dialogue, recognition, id4s},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Wright_2000_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Wright_2000_a.ps},
  year = 2000
}
@inproceedings{wrench2000b,
  author = {Wrench, A. and Richmond, K.},
  title = {Continuous Speech Recognition Using Articulatory Data},
  booktitle = {Proc. {ICSLP} 2000},
  address = {Beijing, China},
  abstract = {In this paper we show that there is measurable
                   information in the articulatory system which can help
                   to disambiguate the acoustic signal. We measure
                   directly the movement of the lips, tongue, jaw, velum
                   and larynx and parameterise this articulatory feature
                   space using principle components analysis. The
                   parameterisation is developed and evaluated using a
                   speaker dependent phone recognition task on a specially
                   recorded TIMIT corpus of 460 sentences. The results
                   show that there is useful supplementary information
                   contained in the articulatory data which yields a small
                   but significant improvement in phone recognition
                   accuracy of 2\%. However, preliminary attempts to
                   estimate the articulatory data from the acoustic signal
                   and use this to supplement the acoustic input have not
                   yielded any significant improvement in phone accuracy.},
  categories = {artic, asr, ann, mlp, hmm, inversion, mocha,edinburgh},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Wrench_2000_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Wrench_2000_a.ps},
  year = 2000
}
@inproceedings{Wester-Fosler-00,
  author = {M. Wester and E. Fosler-Lussier},
  title = {A comparison of data-derived and knowledge-based
                   modeling of pronunciation variation},
  booktitle = {Proc. of ICSLP '00},
  volume = {I},
  pages = {270-273},
  address = {Beijing},
  abstract = {This paper focuses on modeling pronunciation variation
                   in two different ways: data-derived and
                   knowledge-based. The knowledge-based approach consists
                   of using phonological rules to generate variants. The
                   data-derived approach consists of performing phone
                   recognition, followed by various pruning and smoothing
                   methods to alleviate some of the errors in the phone
                   recognition. Using phonological rules led to a small
                   improvement in WER; whereas, using a data-derived
                   approach in which the phone recognition was smoothed
                   using simple decision trees (d-trees) prior to lexicon
                   generation led to a significant improvement compared to
                   the baseline. Furthermore, we found that 10\% of
                   variants generated by the phonological rules were also
                   found using phone recognition, and this increased to
                   23\% when the phone recognition output was smoothed by
                   using d-trees. In addition, we propose a metric to
                   measure confusability in the lexicon and we found that
                   employing this confusion metric to prune variants
                   results in roughly the same improvement as using the
                   d-tree method.},
  categories = {asr, pm, VIOS, Berkeley},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/wester.2000.2.pdf},
  year = 2000
}
@inproceedings{koumpis-icslp00,
  author = {K.~Koumpis and S. Renals},
  title = {Transcription and Summarization of Voicemail Speech},
  booktitle = {Proc. ICSLP},
  volume = {2},
  pages = {688--691},
  address = {Beijing},
  abstract = {This paper describes the development of a system to
                   transcribe and summarize voicemail messages. The
                   results of the research presented in this paper are
                   two-fold. First, a hybrid connectionist approach to the
                   Voicemail transcription task shows that competitive
                   performance can be achieved using a context-independent
                   system with fewer parameters than those based on
                   mixtures of Gaussian likelihoods. Second, an effective
                   and robust combination of statistical with prior
                   knowledge sources for term weighting is used to extract
                   information from the decoders output in order to
                   deliver summaries to the message recipients via a GSM
                   Short Message Service (SMS) gateway.},
  categories = {voicemail,summarization,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/icslp00.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/icslp00.ps.gz},
  year = 2000
}
@inproceedings{gotoh-icassp00,
  author = {Y.~Gotoh and S.~Renals},
  title = {Variable word rate n-grams},
  booktitle = {Proc IEEE ICASSP},
  pages = {1591--1594},
  address = {Istanbul},
  abstract = {The rate of occurrence of words is not uniform but
                   varies from document to document. Despite this
                   observation, parameters for conventional n-gram
                   language models are usually derived using the
                   assumption of a constant word rate. In this paper we
                   investigate the use of variable word rate assumption,
                   modelled by a Poisson distribution or a continuous
                   mixture of Poissons. We present an approach to
                   estimating the relative frequencies of words or n-grams
                   taking prior information of their occurrences into
                   account. Discounting and smoothing schemes are also
                   considered. Using the Broadcast News task, the approach
                   demonstrates a reduction of perplexity up to 10\%.},
  categories = {stobs,lm,bnews,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/icassp2000.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/icassp2000.ps.gz},
  year = 2000
}
@article{Taylor_2000_a,
  author = {P A Taylor},
  title = {Concept-to-Speech by Phonological Structure Matching},
  journal = {Philosophical Transactions of the Royal Society,
                   Series A},
  categories = {prosody, synthesis, unit selection, waveform
                   generation, festival, unisyn},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Taylor_2000_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Taylor_2000_a.ps},
  year = 2000
}
@inproceedings{frankel00:NN_LDM,
  author = {Frankel, J. and Richmond, K. and King, S. and Taylor,
                   P.},
  title = {An automatic speech recognition system using neural
                   networks and linear dynamic models to recover and model
                   articulatory traces},
  booktitle = {Proc. {ICSLP}},
  abstract = {In this paper we describe a speech recognition system
                   using linear dynamic models and articulatory features.
                   Experiments are reported in which measured articulation
                   from the MOCHA corpus has been used, along with those
                   where the articulatory parameters are estimated from
                   the speech signal using a recurrent neural network.},
  categories = {am,artic,asr,ldm,mocha,edinburgh,inversion,ann},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Frankel_et_al_ICSLP2000.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Frankel_et_al_ICSLP2000.ps},
  year = 2000
}
@inproceedings{gotoh-asr2000,
  author = {Y.~Gotoh and S.~Renals},
  title = {Sentence Boundary Detection in Broadcast Speech
                   Transcripts},
  booktitle = {ISCA ITRW: ASR2000},
  pages = {228--235},
  address = {Paris},
  abstract = {This paper presents an approach to identifying
                   sentence boundaries in broadcast speech transcripts. We
                   describe finite state models that extract sentence
                   boundary information statistically from text and audio
                   sources. An n-gram language model is constructed from a
                   collection of British English news broadcasts and
                   scripts. An alternative model is estimated from pause
                   duration information in speech recogniser outputs
                   aligned with their programme script counterparts.
                   Experimental results show that the pause duration model
                   alone outperforms the language modelling approach and
                   that, by combining these two models, it can be improved
                   further and precision and recall scores of over 70\%
                   were attained for the task.},
  categories = {stobs,ie,lm,prosody,bnews,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/asr2000.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/asr2000.ps.gz},
  year = 2000
}
@inproceedings{Matsuda2000ICASSP,
  author = {Shigeki Matsuda and Mitsuru Nakai and Hiroshi
                   Shimodaira and Shigeki Sagayama},
  title = {{Asynchronous-Transition {HMM}}},
  booktitle = {Proc. ICASSP 2000 (Istanbul, Turkey), Vol. II},
  pages = {1001--1004},
  abstract = { We propose a new class of hidden Markov model (HMM)
                   called asynchronous-transition HMM (AT-HMM). Opposed to
                   conventional HMMs where hidden state transition occurs
                   simultaneously to all features, the new class of HMM
                   allows state transitions asynchronous between
                   individual features to better model asynchronous
                   timings of acoustic feature changes. In this paper, we
                   focus on a particular class of AT-HMM with sequential
                   constraints introducing a concept of ``state tying
                   across time''. To maximize the advantage of the new
                   model, we also introduce feature-wise state tying
                   technique. Speaker-dependent speech recognition
                   experiments demonstrated that reduced error rates more
                   than 30\% and 50\% in phoneme and isolated word
                   recognition, respectively, compared with conventional
                   HMMs. },
  categories = {asr, atr, jaist},
  month = jun,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Matsuda2000ICASSP.pdf},
  year = 2000
}
@inproceedings{wester00:_using_dutch_asr,
  author = {M. Wester and J.M. Kessens and H. Strik},
  title = {Using {D}utch phonological rules to model
                   pronunciation variation in {ASR}},
  booktitle = {Phonus 5: proceedings of the "workshop on phonetics
                   and phonology in {ASR}"},
  pages = {105-116},
  address = {Saarbruecken},
  abstract = {In this paper, we describe how the performance of a
                   continuous speech recognizer for Dutch has been
                   improved by modeling within-word and cross-word
                   pronunciation variation. Within-word variants were
                   automatically generated by applying five phonological
                   rules to the words in the lexicon. Cross-word
                   pronunciation variation was modeled by adding
                   multi-words and their variants to the lexicon. The best
                   results were obtained when the cross-word method was
                   combined with the within-word method: a relative
                   improvement of 8.8\% in the WER was found compared to
                   baseline system performance. We also describe an error
                   analysis that was carried out to investigate whether
                   rules in isolation can predict the performance of rules
                   in combination.},
  categories = {asr, pm, VIOS, Nijmegen},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/wester.2000.3.pdf},
  year = 2000
}
@phdthesis{mayo:00,
  author = {Mayo, C.},
  title = {The relationship between phonemic awareness and cue
                   weighting in speech perception: longitudinal and
                   cross-sectional child studies},
  school = {Queen Margaret University College},
  categories = {speech perception, development, cue weighting,
                   phonemic awareness, literacy},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/thesis.pdf},
  year = 2000
}
@inproceedings{Goubanova-Taylor:2000,
  author = {Goubanova, O. and Taylor, P.},
  title = {Using {B}ayesian {B}elief Networks for model duration
                   in text-to-speech systems},
  booktitle = {CD-ROM Proc. ICSLP 2000},
  address = {Beijing, China},
  year = 2000
}
@inproceedings{Morais_2000_a,
  author = {Edmilson Morais and Paul Taylor and Fabio Violaro},
  title = {Concatenative Text-To-Speech Synthesis Based On
                   Prototype Waveform Interpolation (A Time Frequency
                   Approach)},
  booktitle = {Proc. ICSLP 2000},
  address = {Beijing, China},
  categories = {waveform generation, festival},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Morais_2000_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Morais_2000_a.ps},
  year = 2000
}
@inproceedings{king00:recognition_syll,
  author = {King, S. and Taylor, P. and Frankel, J. and Richmond,
                   K.},
  title = {Speech recognition via phonetically-featured syllables},
  booktitle = {PHONUS},
  volume = {5},
  pages = {15-34},
  address = {Institute of Phonetics, University of the Saarland},
  abstract = {We describe recent work on two new automatic speech
                   recognition systems. The first part of this paper
                   describes the components of a system based on
                   phonological features (which we call EspressoA) in
                   which the values of these features are estimated from
                   the speech signal before being used as the basis for
                   recognition. In the second part of the paper, another
                   system (which we call EspressoB) is described in which
                   articulatory parameters are used instead of
                   phonological features and a linear dynamical system
                   model is used to perform recognition from automatically
                   estimated values of these articulatory parameters.},
  categories = {am,artic,asr,ldm,phonetic_feature,mocha,timit,edinburgh},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/King_et_al_Phonus2000.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/King_et_al_Phonus2000.ps},
  year = 2000
}
@inproceedings{Shimodaira2000ICSLP10,
  author = {Hiroshi Shimodaira and Toshihiko Akae and Mitsuru
                   Nakai and Shigeki Sagayama},
  title = {{Jacobian Adaptation of {HMM} with Initial Model
                   Selection for Noisy Speech Recognition}},
  booktitle = {Proc. ICSLP2000},
  pages = {1003--1006},
  abstract = { An extension of Jacobian Adaptation (JA) of HMMs for
                   degraded speech recognition is presented in which
                   appropriate set of initial models is selected from a
                   number of initial-model sets designed for different
                   noise environments. Based on the first order Taylor
                   series approximation in the acoustic feature domain, JA
                   adapts the acoustic model parameters trained in the
                   initial noise environment A to the new environment B
                   much faster than PMC that creates the acoustic models
                   for the target environment from scratch. Despite the
                   advantage of JA to PMC, JA has a theoretical limitation
                   that the change of acoustic parameters from the
                   environment A to B should be small in order that the
                   linear approximation holds. To extend the coverage of
                   JA, the ideas of multiple sets of initial models and
                   their automatic selection scheme are discussed.
                   Speaker-dependent isolated-word recognition experiments
                   are carried out to evaluate the proposed method. },
  categories = {asr, jaist},
  month = oct,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Shimodaira2000ICSLP10.pdf},
  year = 2000
}
@article{king:taylor:csl2000,
  author = {Simon King and Paul Taylor},
  title = {Detection of Phonological Features in Continuous
                   Speech using Neural Networks},
  journal = {Computer {S}peech and {L}anguage},
  volume = 14,
  number = 4,
  pages = {333-353},
  abstract = {We report work on the first component of a two stage
                   speech recognition architecture based on phonological
                   features rather than phones. The paper reports
                   experiments on three phonological feature systems: 1)
                   the Sound Pattern of English (SPE) system which uses
                   binary features, 2)a multi valued (MV) feature system
                   which uses traditional phonetic categories such as
                   manner, place etc, and 3) Government Phonology (GP)
                   which uses a set of structured primes. All experiments
                   used recurrent neural networks to perform feature
                   detection. In these networks the input layer is a
                   standard framewise cepstral representation, and the
                   output layer represents the values of the features. The
                   system effectively produces a representation of the
                   most likely phonological features for each input frame.
                   All experiments were carried out on the TIMIT speaker
                   independent database. The networks performed well in
                   all cases, with the average accuracy for a single
                   feature ranging from 86 to 93 percent. We describe
                   these experiments in detail, and discuss the
                   justification and potential advantages of using
                   phonological features rather than phones for the basis
                   of speech recognition.},
  categories = {},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/King_Taylor_csl2000.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/King_Taylor_csl2000.ps},
  year = 2000
}
@inproceedings{abberley-trec00,
  author = {D.~Abberley and S.~Renals and D.~Ellis and T.~Robinson},
  title = {The {THISL} {SDR} system at {TREC}--8},
  booktitle = {Proc. Eighth Text Retrieval Conference (TREC--8)},
  abstract = {This paper describes the participation of the THISL
                   group at the TREC-8 Spoken Document Retrieval (SDR)
                   track. The THISL SDR system consists of the realtime
                   version of the Abbot large vocabulary speech
                   recognition system and the thislIR text retrieval
                   system. The TREC-8 evaluation assessed SDR performance
                   on a corpus of 500 hours of broadcast news material
                   collected over a five month period. The main test
                   condition involved retrieval of stories defined by
                   manual segmentation of the corpus in which non-news
                   material, such as commercials, were excluded. An
                   optional test condition required required retrieval of
                   the same stories from the unsegmented audio stream. The
                   THISL SDR system participated at both test conditions.
                   The results show that a system such as THISL can
                   produce respectable information retrieval performance
                   on a realistically-sized corpus of unsegmented audio
                   material.},
  categories = {thisl,bnews,trec,ir,recognition,eval,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/trec8.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/trec8.ps.gz},
  year = 2000
}