1999.bib

@comment{{This file has been generated by bib2bib 1.92}}
@comment{{Command line: /home/korin/bibtex2html-1.92-LINUX/bib2bib -oc /home/korin/projects/publications/new_output/transitdata/1999-citations -ob /home/korin/projects/publications/new_output/transitdata/1999.bib -c 'year : "1999"' /home/korin/projects/publications/filtlists/full_publications_list.bib}}
@inproceedings{fitt_isard_eurospeech99,
  author = {Sue Fitt and Steve Isard},
  title = {Synthesis of regional {E}nglish using a keyword
                   lexicon},
  booktitle = {Proc. Eurospeech 1999},
  volume = 2,
  pages = {823-826},
  address = {Budapest},
  abstract = {We discuss the use of an accent-independent keyword
                   lexicon to synthesise speakers with different regional
                   accents. The paper describes the system architecture
                   and the transcription system used in the lexicon, and
                   then focuses on the construction of word-lists for
                   recording speakers. We illustrate by mentioning some of
                   the features of Scottish and Irish English, which we
                   are currently synthesising, and describe how these are
                   captured by keyword synthesis.},
  categories = {speech synthesis, lexicon, accents, regional
                   pronunciation},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Fitt_1999_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Fitt_1999_a.ps},
  year = 1999
}
@inproceedings{Poesio_1999_a,
  author = {M Poesio and R. Henschel and J. Hitzeman and R. Kibble
                   and S. Montague and K. van Deemter},
  title = {Towards An Annotation Scheme for Noun Phrase
                   Generation},
  booktitle = {Proceedings of the EACL workshop on linguistically
                   interpreted corpora (LINC-99)},
  address = {Norway},
  categories = {markup, GNOME},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Poesio_1999_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Poesio_1999_a.ps},
  year = 1999
}
@inproceedings{mayo:99,
  author = {Mayo, C.},
  title = {Perceptual weighting and phonemic awareness in
                   pre--reading and early--reading children},
  booktitle = {XIVth International Congress of Phonetic Sciences, San
                   Francisco},
  categories = {speech perception, development, cue weighting,
                   phonemic awareness, literacy},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/0479.pdf},
  year = 1999
}
@inproceedings{strom99,
  author = {V. Strom and H. Heine},
  title = {Utilizing Prosody for Unconstrained Morpheme
                   Recognition},
  booktitle = {Proc. European Conf. on Speech Communication and
                   Technology},
  address = {Budapest},
  abstract = {Speech recognition systems for languages with a rich
                   inflectional morphology (like German) suffer from the
                   limitations of a word--based full--form lexicon.
                   Although the morphological and acoustical knowledge
                   about words is coded implicitly within the lexicon
                   entries (which are usually closely related to the
                   orthography of the language at hand) this knowledge is
                   usually not explicitly available for other tasks (e.g.
                   detecting OOV words, prosodic analysis). This paper
                   presents an HMM--based `word' recognizer that uses
                   morpheme--like units on the string level for
                   recognizing spontaneous German conversational speech
                   (Verbmobil corpus). The system has no explicit word
                   knowledge but uses a morpheme--bigram to capture the
                   German word and sentence structure to some extent. The
                   morpheme recognizer is tightly coupled with a prosodic
                   classifier in order to compensate for some of the
                   additional ambiguity introduced by using morphemes
                   instead of words.},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/paper.eurospeech99.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/paper.eurospeech99.ps},
  year = 1999
}
@article{Kessens-Wester-99,
  author = {J.M. Kessens and M. Wester and H. Strik},
  title = {Improving the performance of a {D}utch {CSR} by
                   modeling within-word and cross-word pronunciation
                   variation},
  journal = {Speech Communication},
  volume = {29},
  pages = {193-207},
  abstract = {This article describes how the performance of a Dutch
                   continuous speech recognizer was improved by modeling
                   pronunciation variation. We propose a general procedure
                   for modeling pronunciation variation. In short, it
                   consists of adding pronunciation variants to the
                   lexicon, retraining phone models and using language
                   models to which the pronunciation variants have been
                   added. First, within-word pronunciation variants were
                   generated by applying a set of five optional
                   phonological rules to the words in the baseline
                   lexicon. Next, a limited number of cross-word processes
                   were modeled, using two different methods. In the first
                   approach, cross-word processes were modeled by directly
                   adding the cross-word variants to the lexicon, and in
                   the second approach this was done by using multi-words.
                   Finally, the combination of the within-word method with
                   the two cross-word methods was tested. The word error
                   rate (WER) measured for the baseline system was
                   12.75\%. Compared to the baseline, a small but
                   statistically significant improvement of 0.68\% in WER
                   was measured for the within-word method, whereas both
                   cross-word methods in isolation led to small,
                   non-signicant improvements. The combination of the
                   within-word method and cross-word method 2 led to the
                   best result: an absolute improvement of 1.12\% in WER
                   was found compared to the baseline, which is a relative
                   improvement of 8.8\% in WER.},
  categories = {asr, pm, VIOS, Nijmegen},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/journalversion.pdf},
  year = 1999
}
@inproceedings{cook-darpa99,
  author = {G.~Cook and K.~Al-Ghoneim and D.~Ellis and
                   E.~Fosler-Lussier and Y.~Gotoh and B.~Kingsbury and
                   N.~Morgan and S.~Renals and T.~Robinson and G.~Williams},
  title = {The {SPRACH} system for the transcription of broadcast
                   news},
  booktitle = {Proc. DARPA Broadcast News Workshop},
  pages = {161--166},
  abstract = {This paper describes the SPRACH system developed for
                   the 1998 Hub-4E broadcast news evaluation. The system
                   is based on the connectionist-HMM framework and uses
                   both recurrent neural network and multi-layer
                   perceptron acoustic models. We describe both a system
                   designed for the primary transcription hub, and a
                   system for the less-than 10 times real-time spoke. We
                   then describe recent developments to CHRONOS, a
                   time-first stack decoder. We show how these
                   developments have simplified the evaluation system, and
                   led to significant reductions in the error rate of the
                   10x real-time system. We also present a system designed
                   to operate in real-time with negligible search error.},
  categories = {sprach,bnews,recognition,am,hybrid,abbot,search,eval,sheffield},
  http = {http://homepages.inf.ed.ac.uk/srenals/pubs/1999/darpa99-sprach.html},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/darpa99-sprach.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/darpa99-sprach.ps.gz},
  year = 1999
}
@inproceedings{Kessens-ICPhS-99,
  author = {J.M. Kessens and M. Wester and H. Strik},
  title = {Modeling within-word and cross-word pronunciation
                   variation to improve the performance of a {D}utch {CSR}},
  booktitle = {Proc. of ICPhS '99},
  pages = {1665-1668},
  address = {San Francisco},
  abstract = {This paper describes how the performance of a
                   continuous speech recognizer for Dutch has been
                   improved by modeling within-word and cross-word
                   pronunciation variation. Within-word variants were
                   automatically generated by applying five phonological
                   rules to the words in the lexicon. For the within-word
                   method, a significant improvement is found compared to
                   the baseline. Cross-word pronunciation variation was
                   modeled using two different methods: 1) adding
                   cross-word variants directly to the lexicon, 2) only
                   adding multi-words and their variants to the lexicon.
                   Overall, cross-word method 2 leads to better results
                   than cross-word method 1. The best results were
                   obtained when cross-word method 2 was combined with the
                   within-word method: a relative improvement of 8.8\% WER
                   was found compared to the baseline.},
  categories = {asr, pm, VIOS, Nijmegen},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/kessens.1999.1.pdf},
  year = 1999
}
@inproceedings{Hitzeman_1999_a,
  author = {Janet Hitzeman and Alan W. Black and Paul Taylor and
                   Chris Mellish and Jon Oberlander},
  title = {An Annotation Scheme for Concept-to-Speech Synthesis},
  booktitle = {Proceedings of the European Workshop on Natural
                   Language Generation},
  pages = {59-66},
  address = {Toulouse, France},
  categories = {synthesis, intonation, sole},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Hitzeman_1999_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Hitzeman_1999_a.ps},
  year = 1999
}
@inproceedings{robinson-eurospeech99,
  author = {T.~Robinson and D.~Abberley and D.~Kirby and S.~Renals},
  title = {Recognition, indexing and retrieval of {British}
                   broadcast news with the {THISL} SYSTEM},
  booktitle = {Proc. Eurospeech},
  pages = {1067--1070},
  address = {Budapest},
  abstract = {This paper described the THISL spoken document
                   retrieval system for British and North American
                   Broadcast News. The system is based on the Abbot large
                   vocabulary speech recognizer and a probabilistic text
                   retrieval system. We discuss the development of a
                   realtime British English Broadcast News system, and its
                   integration into a spoken document retrieval system.
                   Detailed evaluation is performed using a similar North
                   American Broadcast News system, to take advantage of
                   the TREC SDR evaluation methodology. We report results
                   on this evaluation, with particular reference to the
                   effect of query expansion and of automatic segmentation
                   algorithms.},
  categories = {thisl,bnews,trec,ir,recognition,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/eurospeech99-thisl.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/eurospeech99-thisl.ps.gz},
  year = 1999
}
@inproceedings{Keeni1999IJCNN,
  author = {Kanad Keeni and Kenji Nakayama and Hiroshi Shimodaira},
  title = {{Estimation of Initial Weights and Hidden Units for
                   Fast Learning of Multi-layer Neural Networks for
                   Pattern Classification}},
  booktitle = {IEEE International Joint Conference on Neural Networks
                   (IJCNN'99)},
  categories = {ann, jaist},
  journal = {},
  month = jul,
  year = 1999
}
@inproceedings{Wright_1999_a,
  author = {H. Wright and Massimo Poesio and Stephen Isard},
  title = {Using high level dialogue information for dialogue act
                   recognition using prosodic features},
  booktitle = {Proceedings of an {ESCA} Tutorial and Research
                   Workshop on Dialogue and Prosody},
  pages = {139-143},
  address = {Eindhoven, The Netherlands},
  categories = {dialogue, prosody, asr},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Wright_1999_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Wright_1999_a.ps},
  year = 1999
}
@phdthesis{LIN99-phd,
  author = {Lincoln, M.},
  title = {Characterization of Speakers for Improved Automatic
                   Speech Recognition},
  school = {University of East Anglia},
  abstract = {Automatic speech recognition technology is becoming
                   increasingly widespread in many applications. For
                   dictation tasks, where a single talker is to use the
                   system for long periods of time, the high recognition
                   accuracies obtained are in part due to the user
                   performing a lengthy enrolment procedure to tune the
                   parameters of the recogniser to their particular voice
                   characteristics and speaking style. Interactive speech
                   systems, where the speaker is using the system for only
                   a short period of time (for example to obtain
                   information) do not have the luxury of long enrolments
                   and have to adapt rapidly to new speakers and speaking
                   styles. This thesis discusses the variations between
                   speakers and speaking styles which result in decreased
                   recognition performance when there is a mismatch
                   between the talker and the systems models. An
                   unsupervised method to rapidly identify and normalise
                   differences in vocal tract length is presented and
                   shown to give improvements in recognition accuracy for
                   little computational overhead. Two unsupervised methods
                   of identifying speakers with similar speaking styles
                   are also presented. The first, a data-driven technique,
                   is shown to accurately classify British and American
                   accented speech, and is also used to improve
                   recognition accuracy by clustering groups of similar
                   talkers. The second uses the phonotactic information
                   available within pronunciation dictionaries to model
                   British and American accented speech. This model is
                   then used to rapidly and accurately classify speakers.},
  categories = {adaptation, ASR, speaker characteristics, BT, UEA},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/thesis.pdf},
  year = 1999
}
@inproceedings{gotoh-esca99,
  author = {Y.~Gotoh and S.~Renals},
  title = {Statistical annotation of named entities in spoken
                   audio},
  booktitle = {Proc. ESCA Workshop on Accessing Information In Spoken
                   Audio},
  pages = {43--48},
  address = {Cambridge},
  abstract = {In this paper we describe stochastic finite state
                   model for named entity (NE) identification, based on
                   explicit word-level n-gram relations. NE categories are
                   incorporated in the model as word attributes. We
                   present an overview of the approach, describing how the
                   extensible vocabulary model may be used for NE
                   identification. We report development and evaluation
                   results on a North American Broadcast News task. This
                   approach resulted in average precision and recall
                   scores of around 83\% on hand transcribed data, and
                   73\% on the SPRACH recogniser output. We also present
                   an error analysis and a comparison of our approach with
                   an alternative statistical approach.},
  categories = {sprach,stobs,ie,lm,bnews,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/esca99-ne.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/esca99-ne.ps.gz},
  year = 1999
}
@inproceedings{carreira-icphs99,
  author = {M.~Carreira-Perpiñán and S.~Renals},
  title = {A latent-variable modelling approach to the
                   acoustic-to-articulatory mapping problem},
  booktitle = {Proc. 14th Int. Congress of Phonetic Sciences},
  pages = {2013-2016},
  address = {San Francisco},
  abstract = {We present a latent variable approach to the
                   acoustic-to-articulatory mapping problem, where
                   different vocal tract configurations can give rise to
                   the same acoustics. In latent variable modelling, the
                   combined acoustic and articulatory data are assumed to
                   have been generated by an underlying low-dimensional
                   process. A parametric probabilistic model is estimated
                   and mappings are derived from the respective
                   conditional distributions. This has the advantage over
                   other methods, such as articulatory codebooks or neural
                   networks, of directly addressing the nonuniqueness
                   problem. We demonstrate our approach with
                   electropalatographic and acoustic data from the ACCOR
                   database.},
  categories = {ml,lv,artic,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/icphs99.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/icphs99.ps.gz},
  year = 1999
}
@inproceedings{fitt_icphs99,
  author = {Sue Fitt},
  title = {The treatment of vowels preceding 'r' in a keyword
                   lexicon of {E}nglish},
  booktitle = {Proc. ICPhS 1999},
  abstract = {Work is progressing on a keyword lexicon aimed at
                   enabling the synthesis of various regional accents of
                   English. This paper focuses on a particular issue, that
                   of vowels before orthographic 'r'. These vowels are
                   discussed with respect to rhotic and non-rhotic
                   accents, in terms of both keyword sets and phonetic
                   realisation. Criteria for the use of keysymbols are
                   discussed, and it is noted that these criteria result
                   in inclusion of post-vocalic /r/ in the lexicon, with
                   deletion by rule for non-rhotic accents. It is noted
                   that some keyvowels in our original set have had to be
                   split, while others may prove to be reduncant.},
  categories = {speech synthesis, lexicon, accents, regional
                   pronunciation, rhotic},
  month = aug,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Fitt_1999_b.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Fitt_1999_b.ps},
  year = 1999
}
@article{renals-sap99,
  author = {S.~Renals and M.~Hochberg},
  title = {Start-synchronous search for large vocabulary
                   continuous speech recognition},
  journal = {IEEE Trans. on Speech and Audio Processing},
  volume = {7},
  pages = {542--553},
  abstract = {In this paper, we present a novel, efficient search
                   strategy for large vocabulary continuous speech
                   recognition. The search algorithm, based on a stack
                   decoder framework, utilizes phone-level posterior
                   probability estimates (produced by a connectionist/HMM
                   acoustic model) as a basis for phone deactivation
                   pruning - a highly efficient method of reducing the
                   required computation. The single-pass algorithm is
                   naturally factored into the time-asynchronous
                   processing of the word sequence and the
                   time-synchronous processing of the HMM state sequence.
                   This enables the search to be decoupled from the
                   language model while still maintaining the
                   computational benefits of time-synchronous processing.
                   The incorporation of the language model in the search
                   is discussed and computationally cheap approximations
                   to the full language model are introduced. Experiments
                   were performed on the North American Business News task
                   using a 60,000 word vocabulary and a trigram language
                   model. Results indicate that the computational cost of
                   the search may be reduced by more than a factor of 40
                   with a relative search error of less than 2\% using the
                   techniques discussed in the paper.},
  categories = {sprach,recognition,search,bnews,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/sap99-preprint.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/sap99-preprint.ps.gz},
  year = 1999
}
@inproceedings{clark_icphs99,
  author = {Robert A. J. Clark},
  title = {Using Prosodic Structure to Improve Pitch Range
                   Variation in Text to Speech Synthesis},
  booktitle = {Proc. {XIV}th international congress of phonetic
                   sciences},
  volume = 1,
  pages = {69--72},
  categories = {synthesis, prosody, intonation, festival},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/clark_icphs99.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/clark_icphs99.ps},
  year = 1999
}
@inproceedings{gotoh-icassp99,
  author = {Y.~Gotoh and S.~Renals and G.~Williams},
  title = {Named entity tagged language models},
  booktitle = {Proc IEEE ICASSP},
  pages = {513--516},
  address = {Phoenix AZ},
  abstract = {We introduce Named Entity (NE) Language Modelling, a
                   stochastic finite state machine approach to identifying
                   both words and NE categories from a stream of spoken
                   data. We provide an overview of our approach to NE
                   tagged language model (LM) generation together with
                   results of the application of such a LM to the task of
                   out-of-vocabulary (OOV) word reduction in large
                   vocabulary speech recognition. Using the Wall Street
                   Journal and Broadcast News corpora, it is shown that
                   the tagged LM was able to reduce the overall word error
                   rate by 14\%, detecting up to 70\% of previously OOV
                   words. We also describe an example of the direct
                   tagging of spoken data with NE categories.},
  categories = {sprach,ie,lm,bnews,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/icassp99.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/icassp99.ps.gz},
  year = 1999
}
@inproceedings{renals-eurospeech99,
  author = {S.~Renals and Y.~Gotoh},
  title = {Integrated transcription and identification of named
                   entities in broadcast speech},
  booktitle = {Proc. Eurospeech},
  pages = {1039--1042},
  address = {Budapest},
  abstract = {This paper presents an approach to integrating
                   functions for both transcription and named entity (NE)
                   identification into a large vocabulary continuous
                   speech recognition system. It builds on NE tagged
                   language modelling approach, which was recently applied
                   for development of the statistical NE annotation
                   system. We also present results for proper name
                   identification experiment using the Hub-4E open
                   evaluation data.},
  categories = {sprach,stobs,ie,lm,bnews,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/eurospeech99-ne.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/eurospeech99-ne.ps.gz},
  year = 1999
}
@inproceedings{clarkdusterhoff_eurospeech99,
  author = {Robert. A. J. Clark and Kurt E. Dusterhoff},
  title = {Objective Methods for Evaluating Synthetic Intonation},
  booktitle = {Proc. {E}urospeech 1999},
  volume = 4,
  pages = {1623--1626},
  categories = {synthesis, prosody, intonation},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/clarkdusterhoff_eurospeech99.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/clarkdusterhoff_eurospeech99.ps},
  year = 1999
}
@inproceedings{richmond99,
  author = {Richmond, K.},
  title = {Estimating Velum Height from Acoustics During
                   Continuous Speech},
  booktitle = {Proc. Eurospeech},
  volume = 1,
  pages = {149--152},
  address = {Budapest, Hungary},
  abstract = {This paper reports on present work, in which a
                   recurrent neural network is trained to estimate `velum
                   height' during continuous speech. Parallel
                   acoustic-articulatory data comprising more than 400
                   read {TIMIT} sentences is obtained using
                   electromagnetic articulography (EMA). This data is
                   processed and used as training data for a range of
                   neural network sizes. The network demonstrating the
                   highest accuracy is identified. This performance is
                   then evaluated in detail by analysing the network's
                   output for each phonetic segment contained in 50
                   hand-labelled utterances set aside for testing
                   purposes.},
  categories = {artic, ann, mlp, inversion, mocha, edinburgh},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Richmond_1999_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Richmond_1999_a.ps},
  year = 1999
}
@article{williams-csl99,
  author = {G.~Williams and S.~Renals},
  title = {Confidence measures from local posterior probability
                   estimates},
  journal = {Computer Speech and Language},
  volume = {13},
  pages = {395--411},
  abstract = {In this paper we introduce a set of related confidence
                   measures for large vocabulary continuous speech
                   recognition (LVCSR) based on local phone posterior
                   probability estimates output by an acceptor HMM
                   acoustic model. In addition to their computational
                   efficiency, these confidence measures are attractive as
                   they may be applied at the state-, phone-, word- or
                   utterance-levels, potentially enabling discrimination
                   between different causes of low confidence recognizer
                   output, such as unclear acoustics or mismatched
                   pronunciation models. We have evaluated these
                   confidence measures for utterance verification using a
                   number of different metrics. Experiments reveal several
                   trends in `profitability of rejection', as measured by
                   the unconditional error rate of a hypothesis test.
                   These trends suggest that crude pronunciation models
                   can mask the relatively subtle reductions in confidence
                   caused by out-of-vocabulary (OOV) words and
                   disfluencies, but not the gross model mismatches
                   elicited by non-speech sounds. The observation that a
                   purely acoustic confidence measure can provide improved
                   performance over a measure based upon both acoustic and
                   language model information for data drawn from the
                   Broadcast News corpus, but not for data drawn from the
                   North American Business News corpus suggests that the
                   quality of model fit offered by a trigram language
                   model is reduced for Broadcast News data. We also argue
                   that acoustic confidence measures may be used to inform
                   the search for improved pronunciation models.},
  categories = {recognition,conf,hybrid,bnews,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/csl99-preprint.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/csl99-preprint.ps.gz},
  year = 1999
}
@inproceedings{Wester-ICPhS-99,
  author = {M. Wester and J.M. Kessens},
  title = {Comparison between expert listeners and continuous
                   speech recognizers in selecting pronunciation variants},
  booktitle = {Proc. of ICPhS '99},
  pages = {723-726},
  address = {San Francisco},
  abstract = {In this paper, the performance of an automatic
                   transcription tool is evaluated. The transcription tool
                   is a continuous speech recognizer (CSR) which can be
                   used to select pronunciation variants (i.e. detect
                   insertions and deletions of phones). The performance of
                   the CSR was compared to a reference transcription based
                   on the judgments of expert listeners. We investigated
                   to what extent the degree of agreement between the
                   listeners and the CSR was affected by employing various
                   sets of phone models (PMs). Overall, the PMs perform
                   more similarly to the listeners when pronunciation
                   variation is modeled. However, the various sets of PMs
                   lead to different results for insertion and deletion
                   processes. Furthermore, we found that to a certain
                   degree, word error rates can be used to predict which
                   set of PMs to use in the transcription tool.},
  categories = {asr, pm, VIOS, Nijmegen},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/wester.1999.1.pdf},
  year = 1999
}
@inproceedings{Rokui1999ICANN09,
  author = {Jun Rokui and Hiroshi Shimodaira},
  title = {{Multistage Building Learning based on
                   Misclassification Measure}},
  booktitle = {9-th International Conference on Artificial Neural
                   Networks, Edinburgh, UK},
  categories = {nn, mce, jaist},
  journal = {},
  month = sep,
  year = 1999
}
@inproceedings{Dusterhoff_1999_b,
  author = {Kurt E. Dusterhoff},
  title = {Automatic Intonation Analysis Using Acoustic Data},
  booktitle = {Proceedings, ESCA TRW on Dialogue and Prosody},
  address = {Eindhoven},
  categories = {intonation, prosody, recognition},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Dusterhoff_1999_b.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Dusterhoff_1999_b.ps},
  year = 1999
}
@inproceedings{Dusterhoff_1999_a,
  author = {Kurt E. Dusterhoff and Alan W. Black and Paul A.
                   Taylor},
  title = {Using Decision Trees within the Tilt Intonation Model
                   to Predict F0 Contours},
  booktitle = {Eurospeech 99},
  address = {Budapest},
  categories = {intonation, synthesis, festival},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Dusterhoff_1999_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Dusterhoff_1999_a.ps},
  year = 1999
}
@inproceedings{Williams_1999_a,
  author = {Briony Williams},
  title = {A {W}elsh speech database: preliminary results},
  booktitle = {Eurospeech 99},
  address = {Eurospeech 99, Budapest, Hungary},
  categories = {database, phonetics, welsh, welshdata},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Williams_1999_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Williams_1999_a.ps},
  year = 1999
}
@inproceedings{Keeni1999ICCIMA,
  author = {Kanad Keeni and Kenji Nakayama and Hiroshi Shimodaira},
  title = {{A Training Scheme for Pattern Classification Using
                   Multi-layer Feed-forward Neural Networks}},
  booktitle = {IEEE International Conference on Computational
                   Intelligence and Multimedia Applications},
  pages = {307--311},
  categories = {ann, jaist},
  journal = {},
  month = sep,
  year = 1999
}
@article{cs-CL-9907021,
  author = {Günther Gšrz and Jšrg Spilker and Volker Strom and
                   Hans Weber},
  title = {Architectural Considerations for Conversational
                   Systems -- The Verbmobil/INTARC Experience},
  journal = {proceedings of First International Workshop on Human
                   Computer Conversation},
  volume = {cs.CL/9907021},
  abstract = { The paper describes the speech to speech translation
                   system INTARC, developed during the first phase of the
                   Verbmobil project. The general design goals of the
                   INTARC system architecture were time synchronous
                   processing as well as incrementality and interactivity
                   as a means to achieve a higher degree of robustness and
                   scalability. Interactivity means that in addition to
                   the bottom-up (in terms of processing levels) data flow
                   the ability to process top-down restrictions
                   considering the same signal segment for all processing
                   levels. The construction of INTARC 2.0, which has been
                   operational since fall 1996, followed an engineering
                   approach focussing on the integration of symbolic
                   (linguistic) and stochastic (recognition) techniques
                   which led to a generalization of the concept of a ``one
                   pass'' beam search.},
  address = {Bellagio, Italy},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/INTARC99.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/INTARC99.ps},
  year = 1999
}
@inproceedings{mayoturk:99,
  author = {Mayo, C.},
  title = {The development of phonemic awareness and perceptual
                   weighting in relation to early and later literacy
                   acquisition},
  booktitle = {20th Annual Child Phonology Conference, Bangor, Wales},
  categories = {speech perception, development, cue weighting,
                   phonemic awareness, literacy},
  year = 1999
}
@article{gotoh-nle99,
  author = {Y.~Gotoh and S.~Renals},
  title = {Topic-based mixture language modelling},
  journal = {Journal of Natural Language Engineering},
  volume = {5},
  pages = {355--375},
  abstract = {This paper describes an approach for constructing a
                   mixture of language models based on simple statistical
                   notions of semantics using probabilistic models
                   developed for information retrieval. The approach
                   encapsulates corpus-derived semantic information and is
                   able to model varying styles of text. Using such
                   information, the corpus texts are clustered in an
                   unsupervised manner and a mixture of topic-specific
                   language models is automatically created. The principal
                   contribution of this work is to characterise the
                   document space resulting from information retrieval
                   techniques and to demonstrate the approach for mixture
                   language modelling. A comparison is made between manual
                   and automatic clustering in order to elucidate how the
                   global content information is expressed in the space.
                   We also compare (in terms of association with manual
                   clustering and language modelling accuracy) alternative
                   term-weighting schemes and the effect of singular
                   valued decomposition dimension reduction (latent
                   semantic analysis). Test set perplexity results using
                   the British National Corpus indicate that the approach
                   can improve the potential of statistical language
                   modelling. Using an adaptive procedure, the
                   conventional model may be tuned to track text data with
                   a slight increase in computational cost.},
  categories = {sprach,stobs,lm,bnc,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/jnle99-preprint.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/jnle99-preprint.ps.gz},
  year = 1999
}
@inproceedings{Taylor_1999_a,
  author = {Paul Taylor and Alan W Black},
  title = {Speech Synthesis by Phonological Structure Matching},
  booktitle = {Eurospeech99},
  address = {Budapest, Hungary},
  categories = {synthesis, unit selection, waveform generation,
                   unisyn, festival},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Taylor_1999_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Taylor_1999_a.ps},
  year = 1999
}
@inproceedings{renals-mmsp99,
  author = {S.~Renals and D.~Abberley and D.~Kirby and T.~Robinson},
  title = {The {THISL} System for Indexing and Retrieval of
                   Broadcast News},
  booktitle = {Proc. IEEE Workshop on Multimedia Signal Processing},
  pages = {77--82},
  address = {Copenhagen},
  abstract = {This paper describes the THISL news retrieval system
                   which maintains an archive of BBC radio and television
                   news recordings. The system uses the Abbot large
                   vocabulary continuous speech recognition system to
                   transcribe news broadcasts, and the thislIR text
                   retrieval system to index and access the transcripts.
                   Decoding and indexing is performed automatically, and
                   the archive is updated with three hours of new material
                   every day. A web-based interface to the retrieval
                   system has been devised to facilitate access to the
                   archive.},
  categories = {thisl,bnews,trec,ir,recognition,sheffield},
  http = {http://homepages.inf.ed.ac.uk/srenals/pubs/1999/mmsp99-54/},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/mmsp99.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/mmsp99.ps.gz},
  year = 1999
}
@inproceedings{McKenna_1999_a,
  author = {John McKenna and Stephen Isard},
  title = {Tailoring Kalman Filtering Towards Speaker
                   Characterisation},
  booktitle = {Proc. {E}urospeech '99},
  volume = 6,
  pages = {2793-2796},
  address = {Budapest},
  categories = {signal processing, synthesis},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/McKenna_1999_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/McKenna_1999_a.ps},
  year = 1999
}
@inproceedings{abberley-esca99,
  author = {D.~Abberley and D.~Kirby and S.~Renals and T.~Robinson},
  title = {The {THISL} broadcast news retrieval system},
  booktitle = {Proc. ESCA Workshop on Accessing Information In Spoken
                   Audio},
  pages = {19--24},
  address = {Cambridge},
  abstract = {This paper described the THISL spoken document
                   retrieval system for British and North American
                   Broadcast News. The system is based on the
                   \textsc{Abbot} large vocabulary speech recognizer,
                   using a recurrent network acoustic model, and a
                   probabilistic text retrieval system. We discuss the
                   development of a realtime British English Broadcast
                   News system, and its integration into a spoken document
                   retrieval system. Detailed evaluation is performed
                   using a similar North American Broadcast News system,
                   to take advantage of the TREC SDR evaluation
                   methodology. We report results on this evaluation, with
                   particular reference to the effect of query expansion
                   and of automatic segmentation algorithms.},
  categories = {thisl,bnews,trec,ir,recognition,sheffield},
  http = {http://homepages.inf.ed.ac.uk/srenals/pubs/1999/esca99-thisl/},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/esca99-thisl.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/esca99-thisl.ps.gz},
  year = 1999
}
@inproceedings{king:wrench:icphs1999,
  author = {Simon King and Alan Wrench},
  title = {Dynamical System Modelling of Articulator Movement},
  booktitle = {Proc. {ICPhS} 99},
  pages = {2259-2262},
  address = {San Francisco},
  abstract = {We describe the modelling of articulatory movements
                   using (hidden) dynamical system models trained on
                   Electro-Magnetic Articulograph (EMA) data. These models
                   can be used for automatic speech recognition and to
                   give insights into articulatory behaviour. They belong
                   to a class of continuous-state Markov models, which we
                   believe can offer improved performance over
                   conventional Hidden Markov Models (HMMs) by better
                   accounting for the continuous nature of the underlying
                   speech production process -- that is, the movements of
                   the articulators. To assess the performance of our
                   models, a simple speech recognition task was used, on
                   which the models show promising results.},
  categories = {asr, artic, ema},
  month = aug,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/King_Wrench_icphs1999.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/King_Wrench_icphs1999.ps},
  year = 1999
}
@inproceedings{renals-darpa99,
  author = {S.~Renals and Y.~Gotoh and R.~Gaizauskas and
                   M.~Stevenson},
  title = {The {SPRACH/LaSIE} system for named entity
                   identification in broadcast news},
  booktitle = {Proc. DARPA Broadcast News Workshop},
  pages = {47--50},
  abstract = {We have developed two conceptually different systems
                   that are able to identify named entities from spoken
                   audio. One (referred to as SPRACH-S) has a stochastic
                   finite state machine structure for use with an acoustic
                   model that identifies both words and named entities
                   from speech data. The other (referred to as SPRACH-R)
                   is a rule-based system which uses matching against
                   stored name lists, part-of-speech tagging, and light
                   phrasal parsing with specialised named entity grammars.
                   We provide an overview of the two approaches and
                   present results on the Hub-4E IE-NE evaluation task.},
  categories = {sprach,stobs,ie,lm,bnews,sheffield},
  http = {http://homepages.inf.ed.ac.uk/srenals/pubs/1999/darpa99-ne.html},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/darpa99-ne.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/darpa99-ne.ps.gz},
  year = 1999
}
@inproceedings{abberley-trec99,
  author = {D.~Abberley and S.~Renals and G.~Cook and T.~Robinson},
  title = {Retrieval of broadcast news documents with the {THISL}
                   system},
  booktitle = {Proc. Seventh Text Retrieval Conference (TREC--7)},
  pages = {181--190},
  abstract = {This paper describes the THISL system that
                   participated in the TREC-7 evaluation, Spoken Document
                   Retrieval (SDR) Track, and presents the results
                   obtained, together with some analysis. The THISL system
                   is based on the {\sc Abbot} speech recognition system
                   and the thislIR text retrieval system. In this
                   evaluation we were concerned with investigating the
                   suitability for SDR of a recognizer running at less
                   than ten times realtime, the use of multiple
                   transcriptions and word graphs, the effect of simple
                   query expansion algorithms and the effect of varying
                   standard IR parameters.},
  categories = {thisl,bnews,trec,ir,recognition,eval,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/trec7.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/trec7.ps.gz},
  year = 1999
}