1999.bib
@comment{{This file has been generated by bib2bib 1.92}}
@comment{{Command line: /home/korin/bibtex2html-1.92-LINUX/bib2bib -oc /home/korin/projects/publications/new_output/transitdata/1999-citations -ob /home/korin/projects/publications/new_output/transitdata/1999.bib -c 'year : "1999"' /home/korin/projects/publications/filtlists/full_publications_list.bib}}
@inproceedings{fitt_isard_eurospeech99,
author = {Sue Fitt and Steve Isard},
title = {Synthesis of regional {E}nglish using a keyword
lexicon},
booktitle = {Proc. Eurospeech 1999},
volume = 2,
pages = {823-826},
address = {Budapest},
abstract = {We discuss the use of an accent-independent keyword
lexicon to synthesise speakers with different regional
accents. The paper describes the system architecture
and the transcription system used in the lexicon, and
then focuses on the construction of word-lists for
recording speakers. We illustrate by mentioning some of
the features of Scottish and Irish English, which we
are currently synthesising, and describe how these are
captured by keyword synthesis.},
categories = {speech synthesis, lexicon, accents, regional
pronunciation},
month = sep,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Fitt_1999_a.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Fitt_1999_a.ps},
year = 1999
}
@inproceedings{Poesio_1999_a,
author = {M Poesio and R. Henschel and J. Hitzeman and R. Kibble
and S. Montague and K. van Deemter},
title = {Towards An Annotation Scheme for Noun Phrase
Generation},
booktitle = {Proceedings of the EACL workshop on linguistically
interpreted corpora (LINC-99)},
address = {Norway},
categories = {markup, GNOME},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Poesio_1999_a.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Poesio_1999_a.ps},
year = 1999
}
@inproceedings{mayo:99,
author = {Mayo, C.},
title = {Perceptual weighting and phonemic awareness in
pre--reading and early--reading children},
booktitle = {XIVth International Congress of Phonetic Sciences, San
Francisco},
categories = {speech perception, development, cue weighting,
phonemic awareness, literacy},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/0479.pdf},
year = 1999
}
@inproceedings{strom99,
author = {V. Strom and H. Heine},
title = {Utilizing Prosody for Unconstrained Morpheme
Recognition},
booktitle = {Proc. European Conf. on Speech Communication and
Technology},
address = {Budapest},
abstract = {Speech recognition systems for languages with a rich
inflectional morphology (like German) suffer from the
limitations of a word--based full--form lexicon.
Although the morphological and acoustical knowledge
about words is coded implicitly within the lexicon
entries (which are usually closely related to the
orthography of the language at hand) this knowledge is
usually not explicitly available for other tasks (e.g.
detecting OOV words, prosodic analysis). This paper
presents an HMM--based `word' recognizer that uses
morpheme--like units on the string level for
recognizing spontaneous German conversational speech
(Verbmobil corpus). The system has no explicit word
knowledge but uses a morpheme--bigram to capture the
German word and sentence structure to some extent. The
morpheme recognizer is tightly coupled with a prosodic
classifier in order to compensate for some of the
additional ambiguity introduced by using morphemes
instead of words.},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/paper.eurospeech99.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/paper.eurospeech99.ps},
year = 1999
}
@article{Kessens-Wester-99,
author = {J.M. Kessens and M. Wester and H. Strik},
title = {Improving the performance of a {D}utch {CSR} by
modeling within-word and cross-word pronunciation
variation},
journal = {Speech Communication},
volume = {29},
pages = {193-207},
abstract = {This article describes how the performance of a Dutch
continuous speech recognizer was improved by modeling
pronunciation variation. We propose a general procedure
for modeling pronunciation variation. In short, it
consists of adding pronunciation variants to the
lexicon, retraining phone models and using language
models to which the pronunciation variants have been
added. First, within-word pronunciation variants were
generated by applying a set of five optional
phonological rules to the words in the baseline
lexicon. Next, a limited number of cross-word processes
were modeled, using two different methods. In the first
approach, cross-word processes were modeled by directly
adding the cross-word variants to the lexicon, and in
the second approach this was done by using multi-words.
Finally, the combination of the within-word method with
the two cross-word methods was tested. The word error
rate (WER) measured for the baseline system was
12.75\%. Compared to the baseline, a small but
statistically significant improvement of 0.68\% in WER
was measured for the within-word method, whereas both
cross-word methods in isolation led to small,
non-signicant improvements. The combination of the
within-word method and cross-word method 2 led to the
best result: an absolute improvement of 1.12\% in WER
was found compared to the baseline, which is a relative
improvement of 8.8\% in WER.},
categories = {asr, pm, VIOS, Nijmegen},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/journalversion.pdf},
year = 1999
}
@inproceedings{cook-darpa99,
author = {G.~Cook and K.~Al-Ghoneim and D.~Ellis and
E.~Fosler-Lussier and Y.~Gotoh and B.~Kingsbury and
N.~Morgan and S.~Renals and T.~Robinson and G.~Williams},
title = {The {SPRACH} system for the transcription of broadcast
news},
booktitle = {Proc. DARPA Broadcast News Workshop},
pages = {161--166},
abstract = {This paper describes the SPRACH system developed for
the 1998 Hub-4E broadcast news evaluation. The system
is based on the connectionist-HMM framework and uses
both recurrent neural network and multi-layer
perceptron acoustic models. We describe both a system
designed for the primary transcription hub, and a
system for the less-than 10 times real-time spoke. We
then describe recent developments to CHRONOS, a
time-first stack decoder. We show how these
developments have simplified the evaluation system, and
led to significant reductions in the error rate of the
10x real-time system. We also present a system designed
to operate in real-time with negligible search error.},
categories = {sprach,bnews,recognition,am,hybrid,abbot,search,eval,sheffield},
http = {http://homepages.inf.ed.ac.uk/srenals/pubs/1999/darpa99-sprach.html},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/darpa99-sprach.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/darpa99-sprach.ps.gz},
year = 1999
}
@inproceedings{Kessens-ICPhS-99,
author = {J.M. Kessens and M. Wester and H. Strik},
title = {Modeling within-word and cross-word pronunciation
variation to improve the performance of a {D}utch {CSR}},
booktitle = {Proc. of ICPhS '99},
pages = {1665-1668},
address = {San Francisco},
abstract = {This paper describes how the performance of a
continuous speech recognizer for Dutch has been
improved by modeling within-word and cross-word
pronunciation variation. Within-word variants were
automatically generated by applying five phonological
rules to the words in the lexicon. For the within-word
method, a significant improvement is found compared to
the baseline. Cross-word pronunciation variation was
modeled using two different methods: 1) adding
cross-word variants directly to the lexicon, 2) only
adding multi-words and their variants to the lexicon.
Overall, cross-word method 2 leads to better results
than cross-word method 1. The best results were
obtained when cross-word method 2 was combined with the
within-word method: a relative improvement of 8.8\% WER
was found compared to the baseline.},
categories = {asr, pm, VIOS, Nijmegen},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/kessens.1999.1.pdf},
year = 1999
}
@inproceedings{Hitzeman_1999_a,
author = {Janet Hitzeman and Alan W. Black and Paul Taylor and
Chris Mellish and Jon Oberlander},
title = {An Annotation Scheme for Concept-to-Speech Synthesis},
booktitle = {Proceedings of the European Workshop on Natural
Language Generation},
pages = {59-66},
address = {Toulouse, France},
categories = {synthesis, intonation, sole},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Hitzeman_1999_a.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Hitzeman_1999_a.ps},
year = 1999
}
@inproceedings{robinson-eurospeech99,
author = {T.~Robinson and D.~Abberley and D.~Kirby and S.~Renals},
title = {Recognition, indexing and retrieval of {British}
broadcast news with the {THISL} SYSTEM},
booktitle = {Proc. Eurospeech},
pages = {1067--1070},
address = {Budapest},
abstract = {This paper described the THISL spoken document
retrieval system for British and North American
Broadcast News. The system is based on the Abbot large
vocabulary speech recognizer and a probabilistic text
retrieval system. We discuss the development of a
realtime British English Broadcast News system, and its
integration into a spoken document retrieval system.
Detailed evaluation is performed using a similar North
American Broadcast News system, to take advantage of
the TREC SDR evaluation methodology. We report results
on this evaluation, with particular reference to the
effect of query expansion and of automatic segmentation
algorithms.},
categories = {thisl,bnews,trec,ir,recognition,sheffield},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/eurospeech99-thisl.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/eurospeech99-thisl.ps.gz},
year = 1999
}
@inproceedings{Keeni1999IJCNN,
author = {Kanad Keeni and Kenji Nakayama and Hiroshi Shimodaira},
title = {{Estimation of Initial Weights and Hidden Units for
Fast Learning of Multi-layer Neural Networks for
Pattern Classification}},
booktitle = {IEEE International Joint Conference on Neural Networks
(IJCNN'99)},
categories = {ann, jaist},
journal = {},
month = jul,
year = 1999
}
@inproceedings{Wright_1999_a,
author = {H. Wright and Massimo Poesio and Stephen Isard},
title = {Using high level dialogue information for dialogue act
recognition using prosodic features},
booktitle = {Proceedings of an {ESCA} Tutorial and Research
Workshop on Dialogue and Prosody},
pages = {139-143},
address = {Eindhoven, The Netherlands},
categories = {dialogue, prosody, asr},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Wright_1999_a.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Wright_1999_a.ps},
year = 1999
}
@phdthesis{LIN99-phd,
author = {Lincoln, M.},
title = {Characterization of Speakers for Improved Automatic
Speech Recognition},
school = {University of East Anglia},
abstract = {Automatic speech recognition technology is becoming
increasingly widespread in many applications. For
dictation tasks, where a single talker is to use the
system for long periods of time, the high recognition
accuracies obtained are in part due to the user
performing a lengthy enrolment procedure to tune the
parameters of the recogniser to their particular voice
characteristics and speaking style. Interactive speech
systems, where the speaker is using the system for only
a short period of time (for example to obtain
information) do not have the luxury of long enrolments
and have to adapt rapidly to new speakers and speaking
styles. This thesis discusses the variations between
speakers and speaking styles which result in decreased
recognition performance when there is a mismatch
between the talker and the systems models. An
unsupervised method to rapidly identify and normalise
differences in vocal tract length is presented and
shown to give improvements in recognition accuracy for
little computational overhead. Two unsupervised methods
of identifying speakers with similar speaking styles
are also presented. The first, a data-driven technique,
is shown to accurately classify British and American
accented speech, and is also used to improve
recognition accuracy by clustering groups of similar
talkers. The second uses the phonotactic information
available within pronunciation dictionaries to model
British and American accented speech. This model is
then used to rapidly and accurately classify speakers.},
categories = {adaptation, ASR, speaker characteristics, BT, UEA},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/thesis.pdf},
year = 1999
}
@inproceedings{gotoh-esca99,
author = {Y.~Gotoh and S.~Renals},
title = {Statistical annotation of named entities in spoken
audio},
booktitle = {Proc. ESCA Workshop on Accessing Information In Spoken
Audio},
pages = {43--48},
address = {Cambridge},
abstract = {In this paper we describe stochastic finite state
model for named entity (NE) identification, based on
explicit word-level n-gram relations. NE categories are
incorporated in the model as word attributes. We
present an overview of the approach, describing how the
extensible vocabulary model may be used for NE
identification. We report development and evaluation
results on a North American Broadcast News task. This
approach resulted in average precision and recall
scores of around 83\% on hand transcribed data, and
73\% on the SPRACH recogniser output. We also present
an error analysis and a comparison of our approach with
an alternative statistical approach.},
categories = {sprach,stobs,ie,lm,bnews,sheffield},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/esca99-ne.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/esca99-ne.ps.gz},
year = 1999
}
@inproceedings{carreira-icphs99,
author = {M.~Carreira-Perpiñán and S.~Renals},
title = {A latent-variable modelling approach to the
acoustic-to-articulatory mapping problem},
booktitle = {Proc. 14th Int. Congress of Phonetic Sciences},
pages = {2013-2016},
address = {San Francisco},
abstract = {We present a latent variable approach to the
acoustic-to-articulatory mapping problem, where
different vocal tract configurations can give rise to
the same acoustics. In latent variable modelling, the
combined acoustic and articulatory data are assumed to
have been generated by an underlying low-dimensional
process. A parametric probabilistic model is estimated
and mappings are derived from the respective
conditional distributions. This has the advantage over
other methods, such as articulatory codebooks or neural
networks, of directly addressing the nonuniqueness
problem. We demonstrate our approach with
electropalatographic and acoustic data from the ACCOR
database.},
categories = {ml,lv,artic,sheffield},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/icphs99.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/icphs99.ps.gz},
year = 1999
}
@inproceedings{fitt_icphs99,
author = {Sue Fitt},
title = {The treatment of vowels preceding 'r' in a keyword
lexicon of {E}nglish},
booktitle = {Proc. ICPhS 1999},
abstract = {Work is progressing on a keyword lexicon aimed at
enabling the synthesis of various regional accents of
English. This paper focuses on a particular issue, that
of vowels before orthographic 'r'. These vowels are
discussed with respect to rhotic and non-rhotic
accents, in terms of both keyword sets and phonetic
realisation. Criteria for the use of keysymbols are
discussed, and it is noted that these criteria result
in inclusion of post-vocalic /r/ in the lexicon, with
deletion by rule for non-rhotic accents. It is noted
that some keyvowels in our original set have had to be
split, while others may prove to be reduncant.},
categories = {speech synthesis, lexicon, accents, regional
pronunciation, rhotic},
month = aug,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Fitt_1999_b.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Fitt_1999_b.ps},
year = 1999
}
@article{renals-sap99,
author = {S.~Renals and M.~Hochberg},
title = {Start-synchronous search for large vocabulary
continuous speech recognition},
journal = {IEEE Trans. on Speech and Audio Processing},
volume = {7},
pages = {542--553},
abstract = {In this paper, we present a novel, efficient search
strategy for large vocabulary continuous speech
recognition. The search algorithm, based on a stack
decoder framework, utilizes phone-level posterior
probability estimates (produced by a connectionist/HMM
acoustic model) as a basis for phone deactivation
pruning - a highly efficient method of reducing the
required computation. The single-pass algorithm is
naturally factored into the time-asynchronous
processing of the word sequence and the
time-synchronous processing of the HMM state sequence.
This enables the search to be decoupled from the
language model while still maintaining the
computational benefits of time-synchronous processing.
The incorporation of the language model in the search
is discussed and computationally cheap approximations
to the full language model are introduced. Experiments
were performed on the North American Business News task
using a 60,000 word vocabulary and a trigram language
model. Results indicate that the computational cost of
the search may be reduced by more than a factor of 40
with a relative search error of less than 2\% using the
techniques discussed in the paper.},
categories = {sprach,recognition,search,bnews,sheffield},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/sap99-preprint.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/sap99-preprint.ps.gz},
year = 1999
}
@inproceedings{clark_icphs99,
author = {Robert A. J. Clark},
title = {Using Prosodic Structure to Improve Pitch Range
Variation in Text to Speech Synthesis},
booktitle = {Proc. {XIV}th international congress of phonetic
sciences},
volume = 1,
pages = {69--72},
categories = {synthesis, prosody, intonation, festival},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/clark_icphs99.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/clark_icphs99.ps},
year = 1999
}
@inproceedings{gotoh-icassp99,
author = {Y.~Gotoh and S.~Renals and G.~Williams},
title = {Named entity tagged language models},
booktitle = {Proc IEEE ICASSP},
pages = {513--516},
address = {Phoenix AZ},
abstract = {We introduce Named Entity (NE) Language Modelling, a
stochastic finite state machine approach to identifying
both words and NE categories from a stream of spoken
data. We provide an overview of our approach to NE
tagged language model (LM) generation together with
results of the application of such a LM to the task of
out-of-vocabulary (OOV) word reduction in large
vocabulary speech recognition. Using the Wall Street
Journal and Broadcast News corpora, it is shown that
the tagged LM was able to reduce the overall word error
rate by 14\%, detecting up to 70\% of previously OOV
words. We also describe an example of the direct
tagging of spoken data with NE categories.},
categories = {sprach,ie,lm,bnews,sheffield},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/icassp99.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/icassp99.ps.gz},
year = 1999
}
@inproceedings{renals-eurospeech99,
author = {S.~Renals and Y.~Gotoh},
title = {Integrated transcription and identification of named
entities in broadcast speech},
booktitle = {Proc. Eurospeech},
pages = {1039--1042},
address = {Budapest},
abstract = {This paper presents an approach to integrating
functions for both transcription and named entity (NE)
identification into a large vocabulary continuous
speech recognition system. It builds on NE tagged
language modelling approach, which was recently applied
for development of the statistical NE annotation
system. We also present results for proper name
identification experiment using the Hub-4E open
evaluation data.},
categories = {sprach,stobs,ie,lm,bnews,sheffield},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/eurospeech99-ne.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/eurospeech99-ne.ps.gz},
year = 1999
}
@inproceedings{clarkdusterhoff_eurospeech99,
author = {Robert. A. J. Clark and Kurt E. Dusterhoff},
title = {Objective Methods for Evaluating Synthetic Intonation},
booktitle = {Proc. {E}urospeech 1999},
volume = 4,
pages = {1623--1626},
categories = {synthesis, prosody, intonation},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/clarkdusterhoff_eurospeech99.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/clarkdusterhoff_eurospeech99.ps},
year = 1999
}
@inproceedings{richmond99,
author = {Richmond, K.},
title = {Estimating Velum Height from Acoustics During
Continuous Speech},
booktitle = {Proc. Eurospeech},
volume = 1,
pages = {149--152},
address = {Budapest, Hungary},
abstract = {This paper reports on present work, in which a
recurrent neural network is trained to estimate `velum
height' during continuous speech. Parallel
acoustic-articulatory data comprising more than 400
read {TIMIT} sentences is obtained using
electromagnetic articulography (EMA). This data is
processed and used as training data for a range of
neural network sizes. The network demonstrating the
highest accuracy is identified. This performance is
then evaluated in detail by analysing the network's
output for each phonetic segment contained in 50
hand-labelled utterances set aside for testing
purposes.},
categories = {artic, ann, mlp, inversion, mocha, edinburgh},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Richmond_1999_a.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Richmond_1999_a.ps},
year = 1999
}
@article{williams-csl99,
author = {G.~Williams and S.~Renals},
title = {Confidence measures from local posterior probability
estimates},
journal = {Computer Speech and Language},
volume = {13},
pages = {395--411},
abstract = {In this paper we introduce a set of related confidence
measures for large vocabulary continuous speech
recognition (LVCSR) based on local phone posterior
probability estimates output by an acceptor HMM
acoustic model. In addition to their computational
efficiency, these confidence measures are attractive as
they may be applied at the state-, phone-, word- or
utterance-levels, potentially enabling discrimination
between different causes of low confidence recognizer
output, such as unclear acoustics or mismatched
pronunciation models. We have evaluated these
confidence measures for utterance verification using a
number of different metrics. Experiments reveal several
trends in `profitability of rejection', as measured by
the unconditional error rate of a hypothesis test.
These trends suggest that crude pronunciation models
can mask the relatively subtle reductions in confidence
caused by out-of-vocabulary (OOV) words and
disfluencies, but not the gross model mismatches
elicited by non-speech sounds. The observation that a
purely acoustic confidence measure can provide improved
performance over a measure based upon both acoustic and
language model information for data drawn from the
Broadcast News corpus, but not for data drawn from the
North American Business News corpus suggests that the
quality of model fit offered by a trigram language
model is reduced for Broadcast News data. We also argue
that acoustic confidence measures may be used to inform
the search for improved pronunciation models.},
categories = {recognition,conf,hybrid,bnews,sheffield},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/csl99-preprint.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/csl99-preprint.ps.gz},
year = 1999
}
@inproceedings{Wester-ICPhS-99,
author = {M. Wester and J.M. Kessens},
title = {Comparison between expert listeners and continuous
speech recognizers in selecting pronunciation variants},
booktitle = {Proc. of ICPhS '99},
pages = {723-726},
address = {San Francisco},
abstract = {In this paper, the performance of an automatic
transcription tool is evaluated. The transcription tool
is a continuous speech recognizer (CSR) which can be
used to select pronunciation variants (i.e. detect
insertions and deletions of phones). The performance of
the CSR was compared to a reference transcription based
on the judgments of expert listeners. We investigated
to what extent the degree of agreement between the
listeners and the CSR was affected by employing various
sets of phone models (PMs). Overall, the PMs perform
more similarly to the listeners when pronunciation
variation is modeled. However, the various sets of PMs
lead to different results for insertion and deletion
processes. Furthermore, we found that to a certain
degree, word error rates can be used to predict which
set of PMs to use in the transcription tool.},
categories = {asr, pm, VIOS, Nijmegen},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/wester.1999.1.pdf},
year = 1999
}
@inproceedings{Rokui1999ICANN09,
author = {Jun Rokui and Hiroshi Shimodaira},
title = {{Multistage Building Learning based on
Misclassification Measure}},
booktitle = {9-th International Conference on Artificial Neural
Networks, Edinburgh, UK},
categories = {nn, mce, jaist},
journal = {},
month = sep,
year = 1999
}
@inproceedings{Dusterhoff_1999_b,
author = {Kurt E. Dusterhoff},
title = {Automatic Intonation Analysis Using Acoustic Data},
booktitle = {Proceedings, ESCA TRW on Dialogue and Prosody},
address = {Eindhoven},
categories = {intonation, prosody, recognition},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Dusterhoff_1999_b.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Dusterhoff_1999_b.ps},
year = 1999
}
@inproceedings{Dusterhoff_1999_a,
author = {Kurt E. Dusterhoff and Alan W. Black and Paul A.
Taylor},
title = {Using Decision Trees within the Tilt Intonation Model
to Predict F0 Contours},
booktitle = {Eurospeech 99},
address = {Budapest},
categories = {intonation, synthesis, festival},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Dusterhoff_1999_a.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Dusterhoff_1999_a.ps},
year = 1999
}
@inproceedings{Williams_1999_a,
author = {Briony Williams},
title = {A {W}elsh speech database: preliminary results},
booktitle = {Eurospeech 99},
address = {Eurospeech 99, Budapest, Hungary},
categories = {database, phonetics, welsh, welshdata},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Williams_1999_a.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Williams_1999_a.ps},
year = 1999
}
@inproceedings{Keeni1999ICCIMA,
author = {Kanad Keeni and Kenji Nakayama and Hiroshi Shimodaira},
title = {{A Training Scheme for Pattern Classification Using
Multi-layer Feed-forward Neural Networks}},
booktitle = {IEEE International Conference on Computational
Intelligence and Multimedia Applications},
pages = {307--311},
categories = {ann, jaist},
journal = {},
month = sep,
year = 1999
}
@article{cs-CL-9907021,
author = {Günther Gšrz and Jšrg Spilker and Volker Strom and
Hans Weber},
title = {Architectural Considerations for Conversational
Systems -- The Verbmobil/INTARC Experience},
journal = {proceedings of First International Workshop on Human
Computer Conversation},
volume = {cs.CL/9907021},
abstract = { The paper describes the speech to speech translation
system INTARC, developed during the first phase of the
Verbmobil project. The general design goals of the
INTARC system architecture were time synchronous
processing as well as incrementality and interactivity
as a means to achieve a higher degree of robustness and
scalability. Interactivity means that in addition to
the bottom-up (in terms of processing levels) data flow
the ability to process top-down restrictions
considering the same signal segment for all processing
levels. The construction of INTARC 2.0, which has been
operational since fall 1996, followed an engineering
approach focussing on the integration of symbolic
(linguistic) and stochastic (recognition) techniques
which led to a generalization of the concept of a ``one
pass'' beam search.},
address = {Bellagio, Italy},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/INTARC99.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/INTARC99.ps},
year = 1999
}
@inproceedings{mayoturk:99,
author = {Mayo, C.},
title = {The development of phonemic awareness and perceptual
weighting in relation to early and later literacy
acquisition},
booktitle = {20th Annual Child Phonology Conference, Bangor, Wales},
categories = {speech perception, development, cue weighting,
phonemic awareness, literacy},
year = 1999
}
@article{gotoh-nle99,
author = {Y.~Gotoh and S.~Renals},
title = {Topic-based mixture language modelling},
journal = {Journal of Natural Language Engineering},
volume = {5},
pages = {355--375},
abstract = {This paper describes an approach for constructing a
mixture of language models based on simple statistical
notions of semantics using probabilistic models
developed for information retrieval. The approach
encapsulates corpus-derived semantic information and is
able to model varying styles of text. Using such
information, the corpus texts are clustered in an
unsupervised manner and a mixture of topic-specific
language models is automatically created. The principal
contribution of this work is to characterise the
document space resulting from information retrieval
techniques and to demonstrate the approach for mixture
language modelling. A comparison is made between manual
and automatic clustering in order to elucidate how the
global content information is expressed in the space.
We also compare (in terms of association with manual
clustering and language modelling accuracy) alternative
term-weighting schemes and the effect of singular
valued decomposition dimension reduction (latent
semantic analysis). Test set perplexity results using
the British National Corpus indicate that the approach
can improve the potential of statistical language
modelling. Using an adaptive procedure, the
conventional model may be tuned to track text data with
a slight increase in computational cost.},
categories = {sprach,stobs,lm,bnc,sheffield},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/jnle99-preprint.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/jnle99-preprint.ps.gz},
year = 1999
}
@inproceedings{Taylor_1999_a,
author = {Paul Taylor and Alan W Black},
title = {Speech Synthesis by Phonological Structure Matching},
booktitle = {Eurospeech99},
address = {Budapest, Hungary},
categories = {synthesis, unit selection, waveform generation,
unisyn, festival},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Taylor_1999_a.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Taylor_1999_a.ps},
year = 1999
}
@inproceedings{renals-mmsp99,
author = {S.~Renals and D.~Abberley and D.~Kirby and T.~Robinson},
title = {The {THISL} System for Indexing and Retrieval of
Broadcast News},
booktitle = {Proc. IEEE Workshop on Multimedia Signal Processing},
pages = {77--82},
address = {Copenhagen},
abstract = {This paper describes the THISL news retrieval system
which maintains an archive of BBC radio and television
news recordings. The system uses the Abbot large
vocabulary continuous speech recognition system to
transcribe news broadcasts, and the thislIR text
retrieval system to index and access the transcripts.
Decoding and indexing is performed automatically, and
the archive is updated with three hours of new material
every day. A web-based interface to the retrieval
system has been devised to facilitate access to the
archive.},
categories = {thisl,bnews,trec,ir,recognition,sheffield},
http = {http://homepages.inf.ed.ac.uk/srenals/pubs/1999/mmsp99-54/},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/mmsp99.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/mmsp99.ps.gz},
year = 1999
}
@inproceedings{McKenna_1999_a,
author = {John McKenna and Stephen Isard},
title = {Tailoring Kalman Filtering Towards Speaker
Characterisation},
booktitle = {Proc. {E}urospeech '99},
volume = 6,
pages = {2793-2796},
address = {Budapest},
categories = {signal processing, synthesis},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/McKenna_1999_a.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/McKenna_1999_a.ps},
year = 1999
}
@inproceedings{abberley-esca99,
author = {D.~Abberley and D.~Kirby and S.~Renals and T.~Robinson},
title = {The {THISL} broadcast news retrieval system},
booktitle = {Proc. ESCA Workshop on Accessing Information In Spoken
Audio},
pages = {19--24},
address = {Cambridge},
abstract = {This paper described the THISL spoken document
retrieval system for British and North American
Broadcast News. The system is based on the
\textsc{Abbot} large vocabulary speech recognizer,
using a recurrent network acoustic model, and a
probabilistic text retrieval system. We discuss the
development of a realtime British English Broadcast
News system, and its integration into a spoken document
retrieval system. Detailed evaluation is performed
using a similar North American Broadcast News system,
to take advantage of the TREC SDR evaluation
methodology. We report results on this evaluation, with
particular reference to the effect of query expansion
and of automatic segmentation algorithms.},
categories = {thisl,bnews,trec,ir,recognition,sheffield},
http = {http://homepages.inf.ed.ac.uk/srenals/pubs/1999/esca99-thisl/},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/esca99-thisl.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/esca99-thisl.ps.gz},
year = 1999
}
@inproceedings{king:wrench:icphs1999,
author = {Simon King and Alan Wrench},
title = {Dynamical System Modelling of Articulator Movement},
booktitle = {Proc. {ICPhS} 99},
pages = {2259-2262},
address = {San Francisco},
abstract = {We describe the modelling of articulatory movements
using (hidden) dynamical system models trained on
Electro-Magnetic Articulograph (EMA) data. These models
can be used for automatic speech recognition and to
give insights into articulatory behaviour. They belong
to a class of continuous-state Markov models, which we
believe can offer improved performance over
conventional Hidden Markov Models (HMMs) by better
accounting for the continuous nature of the underlying
speech production process -- that is, the movements of
the articulators. To assess the performance of our
models, a simple speech recognition task was used, on
which the models show promising results.},
categories = {asr, artic, ema},
month = aug,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/King_Wrench_icphs1999.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/King_Wrench_icphs1999.ps},
year = 1999
}
@inproceedings{renals-darpa99,
author = {S.~Renals and Y.~Gotoh and R.~Gaizauskas and
M.~Stevenson},
title = {The {SPRACH/LaSIE} system for named entity
identification in broadcast news},
booktitle = {Proc. DARPA Broadcast News Workshop},
pages = {47--50},
abstract = {We have developed two conceptually different systems
that are able to identify named entities from spoken
audio. One (referred to as SPRACH-S) has a stochastic
finite state machine structure for use with an acoustic
model that identifies both words and named entities
from speech data. The other (referred to as SPRACH-R)
is a rule-based system which uses matching against
stored name lists, part-of-speech tagging, and light
phrasal parsing with specialised named entity grammars.
We provide an overview of the two approaches and
present results on the Hub-4E IE-NE evaluation task.},
categories = {sprach,stobs,ie,lm,bnews,sheffield},
http = {http://homepages.inf.ed.ac.uk/srenals/pubs/1999/darpa99-ne.html},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/darpa99-ne.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/darpa99-ne.ps.gz},
year = 1999
}
@inproceedings{abberley-trec99,
author = {D.~Abberley and S.~Renals and G.~Cook and T.~Robinson},
title = {Retrieval of broadcast news documents with the {THISL}
system},
booktitle = {Proc. Seventh Text Retrieval Conference (TREC--7)},
pages = {181--190},
abstract = {This paper describes the THISL system that
participated in the TREC-7 evaluation, Spoken Document
Retrieval (SDR) Track, and presents the results
obtained, together with some analysis. The THISL system
is based on the {\sc Abbot} speech recognition system
and the thislIR text retrieval system. In this
evaluation we were concerned with investigating the
suitability for SDR of a recognizer running at less
than ten times realtime, the use of multiple
transcriptions and word graphs, the effect of simple
query expansion algorithms and the effect of varying
standard IR parameters.},
categories = {thisl,bnews,trec,ir,recognition,eval,sheffield},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/trec7.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/trec7.ps.gz},
year = 1999
}