1998.bib
@comment{{This file has been generated by bib2bib 1.92}}
@comment{{Command line: /home/korin/bibtex2html-1.92-LINUX/bib2bib -oc /home/korin/projects/publications/new_output/transitdata/1998-citations -ob /home/korin/projects/publications/new_output/transitdata/1998.bib -c 'year : "1998"' /home/korin/projects/publications/filtlists/full_publications_list.bib}}
@inproceedings{Hitzeman_1998_c,
author = {Janet Hitzeman and Massimo Poesio},
title = {Long Distance Pronominalization and Global Focus},
booktitle = {COLING-ACL '98},
volume = 1,
pages = {550-556},
address = {Montreal, Quebec, Canada},
categories = {text analysis, sole},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Hitzeman_1998_c.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Hitzeman_1998_c.ps},
school = {Universite de Montreal},
year = 1998
}
@inproceedings{Shimodaira1998ICSLP,
author = {Hiroshi Shimodaira and Jun Rokui and Mitsuru Nakai},
title = {{Improving The Generalization Performance Of The
MCE/GPD Learning}},
booktitle = {ICSLP'98, Australia},
abstract = { A novel method to prevent the over-fitting effect and
improve the generalization performance of the Minimum
Classification Error (MCE) / Generalized Probabilistic
Descent (GPD) learning is proposed. The MCE/GPD method,
which is one of the newest discriminative-learning
approaches proposed by Katagiri and Juang in 1992,
results in better recognition performance in various
areas of pattern recognition than the
maximum-likelihood (ML) based approach where a
posteriori probabilities are estimated. Despite its
superiority in recognition performance, it still
suffers from the problem of over-fitting to the
training samples as it is with other learning
algorithms. In the present study, a regularization
technique is employed to the MCE method to overcome
this problem. Feed-forward neural networks are employed
as a recognition platform to evaluate the recognition
performance of the proposed method. Recognition
experiments are conducted on several sorts of datasets.
The proposed method shows better generalization
performance than the original one. },
categories = {lifelike-agent, jaist},
month = dec,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Shimodaira1998ICSLP.pdf},
year = 1998
}
@inproceedings{Keeni1998ICPR,
author = {Kanad Keeni and Kenji Nakayama and Hiroshi Shimodaira},
title = {{Automatic Generation of Initial Weights and
Estimation of Hidden Units for Pattern Classification
Using Neural Networks}},
booktitle = {14th International Conference on Pattern Recognition
(ICPR'98)},
pages = {1568--1571},
categories = {ann, jaist},
month = aug,
year = 1998
}
@inproceedings{abberley-icassp98,
author = {D.~Abberley and S.~Renals and G.~Cook},
title = {Retrieval of broadcast news documents with the {THISL}
system},
booktitle = {Proc IEEE ICASSP},
pages = {3781--3784},
address = {Seattle},
abstract = {This paper describes a spoken document retrieval
system, combining the Abbot large vocabulary continuous
speech recognition (LVCSR) system developed by
Cambridge University, Sheffield University and
SoftSound, and the PRISE information retrieval engine
developed by NIST. The system was constructed to enable
us to participate in the TREC 6 Spoken Document
Retrieval experimental evaluation. Our key aims in this
work wer e to produce a complete system for the SDR
task, to investigate the effect of a word error rate of
30-50\% on retrieval performance and to investigate the
integration of LVCSR and word spotting in a retrieval
task.},
categories = {thisl,bnews,trec,ir,recognition,sheffield},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/icassp98.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/icassp98.ps.gz},
year = 1998
}
@article{Taylor_1998_b,
author = {Paul A. Taylor and S. King and S. D. Isard and H.
Wright},
title = {Intonation and Dialogue Context as Constraints for
Speech Recognition},
journal = {Language and Speech},
volume = 41,
number = {3},
pages = {493-512},
categories = {asr, intonation, dialogue, lm, id4s},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Taylor_1998_b.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Taylor_1998_b.ps},
year = 1998
}
@inproceedings{renals-twente98,
author = {S.~Renals and D.~Abberley},
title = {The {THISL} spoken document retrieval system},
booktitle = {Proc. 14th Twente Workshop on Language Technology},
pages = {129--140},
abstract = {THISL is an ESPRIT Long Term Research Project focused
the development and construction of a system to items
from an archive of television and radio news
broadcasts. In this paper we outline our spoken
document retrieval system based on the Abbot speech
recognizer and a text retrieval system based on Okapi
term-weighting . The system has been evaluated as part
of the TREC-6 and TREC-7 spoken document retrieval
evaluations and we report on the results of the TREC-7
evaluation based on a document collection of 100 hours
of North American broadcast news.},
categories = {thisl,bnews,trec,ir,recognition,sheffield},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/twente98.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/twente98.ps.gz},
year = 1998
}
@inproceedings{king:stephenson:isard:taylor:strachan:icslp1998,
author = {Simon King and Todd Stephenson and Stephen Isard and
Paul Taylor and Alex Strachan},
title = {Speech Recognition via Phonetically Featured Syllables},
booktitle = {Proc. {ICSLP} `98},
pages = {1031-1034},
address = {Sydney, Australia},
abstract = {We describe a speech recogniser which uses a speech
production-motivated phonetic-feature description of
speech. We argue that this is a natural way to describe
the speech signal and offers an efficient intermediate
parameterisation for use in speech recognition. We also
propose to model this description at the syllable
rather than phone level. The ultimate goal of this work
is to generate syllable models whose parameters
explicitly describe the trajectories of the phonetic
features of the syllable. We hope to move away from
Hidden Markov Models (HMMs) of context-dependent phone
units. As a step towards this, we present a preliminary
system which consists of two parts: recognition of the
phonetic features from the speech signal using a neural
network; and decoding of the feature-based description
into phonemes using HMMs.},
categories = {asr},
month = dec,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/King_Stephenson_Isard_Taylor_Strachan_icslp1998.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/King_Stephenson_Isard_Taylor_Strachan_icslp1998.ps},
year = 1998
}
@inproceedings{carreira-nnsp98,
author = {M.~Carreira-Perpiñán and S.~Renals},
title = {Experimental evaluation of latent variable models for
dimensionality reduction},
booktitle = {IEEE Proc. Neural Networks for Signal Processing},
volume = {8},
pages = {165--173},
address = {Cambridge},
abstract = {We use electropalatographic (EPG) data as a test bed
for dimensionality reduction methods based in latent
variable modelling, in which an underlying lower
dimension representation is inferred directly from the
data. Several models (and mixtures of them) are
investigated, including factor analysis and the
generative topographic mapping (GTM). Experiments
indicate that nonlinear latent variable modelling
reveals a low-dimensional structure in the data
inaccessible to the investigated linear models.},
categories = {ml,lv,artic,sheffield},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/nnsp98.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/nnsp98.ps.gz},
year = 1998
}
@inproceedings{mayoturk:98,
author = {Mayo, C.},
title = {The developmental relationship between perceptual
weighting and phonemic awareness},
booktitle = {LabPhon 6, University of York, UK},
categories = {speech perception, development, cue weighting,
phonemic awareness, literacy},
year = 1998
}
@inproceedings{wester-98-sposs,
author = {M. Wester and J.M. Kessens and C. Cucchiarini and H.
Strik},
title = {Selection of Pronunciation Variants in Spontaneous
Speech: Comparing the Performance of Man and Machine},
booktitle = {Proc. of the ESCA Workshop on the Sound Patterns of
Spontaneous Speech: Production and Perception},
pages = {157-160},
address = {Aix-en-Provence},
abstract = {Dans cet article, les performances d'un outil de
transcription automatique sont évaluées. L'outil de
transcription est un reconnaisseur de parole continue
(CSR) fonctionnant en mode de reconnaissance forcée.
Pour l'évaluation les performances du CSR ont été
comparées à celles de neuf auditeurs experts. La
machine et l'humain ont effectué exactement la même
tâche: décider si un segment était présent ou non
dans 467 cas. Il s'est avéré que les performances du
CSR étaient comparables à celle des experts.},
categories = {asr, pm, VIOS, Nijmegen},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/wester.1998.5.pdf},
year = 1998
}
@inproceedings{Jang_1998_a,
author = {Tae-Yeoub Jang and Minsuck Song and Kiyeong Lee},
title = {Disambiguation of Korean Utterances Using Automatic
Intonation Recognition},
booktitle = {Proceedings of ICSLP98},
volume = 3,
pages = {603-606},
address = {Sydney, Australia},
categories = {intonation, prosody, recognition},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Jang_1998_a.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Jang_1998_a.ps},
year = 1998
}
@inproceedings{Wright_1998_b,
author = {Helen Wright},
title = {Automatic Utterance Type Detection Using
Suprasegmental Features},
booktitle = {ICSLP'98},
volume = 4,
pages = {1403},
address = {Sydney, Australia},
categories = {recognition, prosody, intonation, dialogue},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Wright_1998_b.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Wright_1998_b.ps},
year = 1998
}
@inproceedings{Sproat_1998_b,
author = {Richard Sproat and Andrew Hunt and Mari Ostendorf and
Paul Taylor and Alan Black and Kevin Lenzo},
title = {Sable: a standard for {TTS} markup},
booktitle = {Third {ESCA} workshop on speech synthesis},
pages = {27-30},
address = {Jenolan Caves, Blue Mountains, Australia},
categories = {markup, sable},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Sproat_1998_b.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Sproat_1998_b.ps},
year = 1998
}
@inproceedings{Syrdal_1998_a,
author = {Ann Syrdal and Gregor Moehler and Kurt Dusterhoff and
Alistair Conkie and Alan W Black},
title = {Three Methods of Intonation Modeling},
booktitle = {3rd ESCA Workshop on Speech Synthesis},
pages = {305-310},
address = {Jenolan Caves},
categories = {intonation, synthesis, satissfy, festival},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Syrdal_1998_a.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Syrdal_1998_a.ps},
year = 1998
}
@inproceedings{ODonnell_1998_a,
author = {Michael O'Donnell and Alistair Knott and Janet
Hitzeman and Hua Cheng},
title = {Integrating Referring and Informing in NP Planning},
booktitle = {Coling-ACL Workshop on the Computational Treatment of
Nominals},
address = {Montreal, Quebec, Canada},
categories = {ilex},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/ODonnell_1998_a.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/ODonnell_1998_a.ps},
school = {Universite de Montreal},
year = 1998
}
@article{Taylor_1998_f,
author = {Paul Taylor and Alan Black},
title = {Assigning Phrase Breaks from Part of Speech Sequences},
journal = {Computer Speech and Language},
volume = 12,
pages = {99-117},
categories = {prosody, language modelling, text analysis, synthesis,
phrasing, satissfy},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Taylor_1998_f.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Taylor_1998_f.ps},
year = 1998
}
@article{Iida1998IEICE06,
author = {Eiji Iida and Susumu Kunifuji and Hiroshi Shimodaira
and Masayuki Kimura},
title = {{A Scale-Down Solution of N^2-1 Puzzle}},
journal = {Trans. IEICE(D-I)},
volume = {J81-D-I},
number = {6},
pages = {604--614},
note = {(in Japanese)},
categories = {puzzle, jaist},
month = jun,
year = 1998
}
@inproceedings{Taylor_1998_e,
author = {Paul A Taylor},
title = {The {T}ilt Intonation Model},
booktitle = {I{CSLP}98},
address = {Sydney},
categories = {intonation, prosody},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Taylor_1998_e.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Taylor_1998_e.ps},
year = 1998
}
@inproceedings{barker-icslp98,
author = {J.~Barker and G.~Williams and S.~Renals},
title = {Acoustic confidence measures for segmenting broadcast
news},
booktitle = {Proc. ICSLP},
pages = {2719--2722},
address = {Sydney},
abstract = {In this paper we define an acoustic confidence measure
based on the estimates of local posterior probabilities
produced by a HMM/ANN large vocabulary continuous
speech recognition system. We use this measure to
segment continuous audio into regions where it is and
is not appropriate to expend recognition effort. The
segmentation is computationally inexpensive and
provides reductions in both overall word error rate and
decoding time. The technique is evaluated using
material from the Broadcast News corpus.},
categories = {recognition,conf,hybrid,bnews,segmentation,sheffield},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/icslp98-seg.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/icslp98-seg.ps.gz},
year = 1998
}
@inproceedings{Taylor_1998_d,
author = {Paul A Taylor and Alan Black and Richard Caley},
title = {The Architecture of the Festival Speech Synthesis
System},
booktitle = {The Third ESCA Workshop in Speech Synthesis},
pages = {147-151},
address = {Jenolan Caves, Australia},
categories = {systems, synthesis, festival},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Taylor_1998_d.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Taylor_1998_d.ps},
year = 1998
}
@inproceedings{Dusterhoff_1998_a,
author = {K. Dusterhoff},
title = {An investigation into the effectiveness of
sub-syllable acoustics in automatic intonantion
analysis},
booktitle = {Proceedings of University of Edinburgh
Linguistics/Applied Linguistics Postgraduate Conference},
categories = {intonation, automatic analysis},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Dusterhoff_1998_a.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Dusterhoff_1998_a.ps},
year = 1998
}
@inproceedings{abberley-trec98,
author = {D.~Abberley and S.~Renals and G.~Cook and T.~Robinson},
title = {The 1997 {THISL} spoken document retrieval system},
booktitle = {Proc. Sixth Text Retrieval Conference (TREC--6)},
pages = {747--752},
abstract = {The THISL spoken document retrieval system is based on
the Abbot Large Vocabulary Continuous Speech
Recognition (LVCSR) system developed by Cambridge
University, Sheffield University and SoftSound, and
uses PRISE (NIST) for indexing and retrieval. We
participated in full SDR mode. Our approach was to
transcribe the spoken documents at the word level using
Abbot, indexing the resulting text transcriptions using
PRISE. The LVCSR system uses a recurrent network-based
acoustic model (with no adaptation to different
conditions) trained on the 50 hour Broadcast News
training set, a 65,000 word vocabulary and a trigram
language model derived from Broadcast News text. Words
in queries which were out-of-vocabulary (OOV) were word
spotted at query time (utilizing the posterior phone
probabilities output by the acoustic model), added to
the transcriptions of the relevant documents and the
collection was then re-indexed. We generated
pronunciations at run-time for OOV words using the
Festival TTS system (University of Edinburgh).},
categories = {thisl,bnews,trec,ir,recognition,eval,sheffield},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/trec6.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/trec6.ps.gz},
year = 1998
}
@inproceedings{Lin98,
author = {Lincoln, M. and Cox, S.J. and Ringland, S.},
title = {A Comparison of Two Unsupervised Approaches to Accent
Identification},
booktitle = {Int. Conf. on Spoken Language Processing},
pages = {109-112},
address = {Sydney},
abstract = {The ability to automatically identify a speaker's
accent would be very useful for a speech recognition
system as it would enable the system to use both a
pronunciation dictionary and speech models speci c to
the accent, techniques which have been shown to improve
accuracy. Here, we describe some experiments in
unsupervised accent classi cation. Two techniques have
been investigated to classify British- and
Americanaccented speech: an acoustic approach, in which
we analyse the pattern of usage of the distributions in
the recogniser by a speaker to decide on his most
probable accent, and a high-level approach in which we
use a phonotactic model for classi cation of the
accent. Results show that both techniques give
excellent performance on this task which is maintained
when testing is done on data from an independent
dataset.},
categories = {accent identification, speaker characteristics, BT,
UEA},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/lincoln-icslp-98.pdf},
year = 1998
}
@phdthesis{king:thesis1998,
author = {Simon King},
title = {Using Information Above the Word Level for Automatic
Speech Recognition},
school = {University of Edinburgh},
abstract = {This thesis introduces a general method for using
information at the utterance level and across
utterances for automatic speech recognition. The method
involves classification of utterances into types. Using
constraints at the utterance level via this
classification method allows information sources to be
exploited which cannot necessarily be used directly for
word recognition. The classification power of three
sources of information is investigated: the language
model in the speech recogniser, dialogue context and
intonation. The method is applied to a challenging
task: the recognition of spontaneous dialogue speech.
The results show success in automatic utterance type
classification, and subsequent word error rate
reduction over a baseline system, when all three
information sources are probabilistically combined.},
categories = {asr, lm, intonation, dialogue, systems},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/King_thesis1998.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/King_thesis1998.ps},
year = 1998
}
@inproceedings{williams-icslp98,
author = {G.~Williams and S.~Renals},
title = {Confidence measures derived from an acceptor {HMM}},
booktitle = {Proc. ICSLP},
pages = {831--834},
address = {Sydney},
abstract = {In this paper we define a number of confidence
measures derived from an acceptor HMM and evaluate
their performance for the task of utterance
verification using the North American Business News
(NAB) and Broadcast News (BN) corpora. Results are
presented for decodings made at both the word and phone
level which show the relative profitability of
rejection provided by the diverse set of confidence
measures. The results indicate that language model
dependent confidence measures have reduced performance
on BN data relative to that for the more grammatically
constrained NAB data. An explanation linking the
observations that rejection is more profitable for
noisy acoustics, for a reduced vocabulary and at the
phone level is also given.},
categories = {recognition,conf,hybrid,bnews,sheffield},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/icslp98-conf.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/icslp98-conf.ps.gz},
year = 1998
}
@inproceedings{wester-98-icslp,
author = {M. Wester and J.M. Kessens and H. Strik},
title = {Modeling pronunciation variation for a {D}utch {CSR}:
testing three methods},
booktitle = {Proc. ICSLP '98},
pages = {2535-2538},
address = {Sydney},
abstract = {This paper describes how the performance of a
continuous speech recognizer for Dutch has been
improved by modeling pronunciation variation. We used
three methods to model pronunciation variation. First,
within-word variation was dealt with. Phonological
rules were applied to the words in the lexicon, thus
automatically generating pronunciation variants.
Secondly, cross-word pronunciation variation was
modeled using two different approaches. The first
approach was to model cross-word processes by adding
the variants as separate words to the lexicon and in
the second approach this was done by using multi-words.
For each of the methods, recognition experiments were
carried out. A significant improvement was found for
modeling within-word variation. Furthermore, modeling
crossword processes using multi-words leads to
significantly better results than modeling them using
separate words in the lexicon.},
categories = {asr, pm, VIOS, Nijmegen},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/wester.1998.3.pdf},
year = 1998
}
@inproceedings{wester-98-kerkrade,
author = { M. Wester and J.M. Kessens and H. Strik},
title = {Improving the Performance of a {D}utch {CSR} by
Modeling Pronunciation Variation},
booktitle = {Proc. of the Workshop Modeling Pronunciation Variation
for Automatic Speech Recognition},
pages = {145-150},
address = {Kerkrade},
abstract = {This paper describes how the performance of a
continuous speech recognizer for Dutch has been
improved by modeling pronunciation variation. We used
three methods in order to model pronunciation
variation. First, withinword variation was dealt with.
Phonological rules were applied to the words in the
lexicon, thus automatically generating pronunciation
variants. Secondly, cross-word pronunciation variation
was accounted for by adding multi-words and their
variants to the lexicon. Thirdly, probabilities of
pronunciation variants were incorporated in the
language model (LM), and thresholds were used to choose
which pronunciation variants to add to the LMs. For
each of the methods, recognition experiments were
carried out. A significant improvement in error rates
was measured.},
categories = {asr, pm, VIOS, Nijmegen},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/wester.1998.4.pdf},
year = 1998
}
@phdthesis{fitt_thesis98,
author = {Sue Fitt},
title = {Processing unfamiliar words - a study in the
perception and production of native and foreign
placenames},
school = {The Centre for Speech Technology Research, Edinburgh
University},
abstract = {This thesis sets out to examine some of the linguistic
processes which take place when speakers are faced with
unfamiliar and potentially foreign place names, and the
possible psycholinguistic origins of these processes.
It is concluded that lexical networks are used to map
from input to output, and that phonological rule-based
models do not fully account for the data. Previous
studies of nativisation have tended to catalogue the
phonological and spelling changes which have taken
place in historical examples, and explanations have
generally been limited to comparison of details of the
borrowed and borrowing languages, rather than being set
in a solid linguistic framework describing the ways in
which speakers and readers process words. There have
been psycholinguistic studies of unfamiliar words, but
these have generally ignored the foreign dimension, and
have been limited in scope. Traditional linguistic
work, meanwhile, focuses on descriptions, either
abstract or more related to mental processes, of the
language that we know and use every day. Studies of
foreign language learning also have a rather different
focus from the current work, as they examine what
happens when we attempt, over a period of time, to
acquire new sounds, vocabulary and grammar. This study
takes an experimental approach to nativisation,
presenting Edinburgh secondary school pupils with a
series of unfamiliar spoken and written European town
names, and asking them to reproduce the names either in
writing or speech, along with a judgement of origin.
The resulting pronunciations and spellings are examined
for accuracy, errors and changes, both in perception
and production. Different explanations of the output
are considered, and it is concluded that models which
apply a set of linguistic rules to the input in order
to generate an output cannot account for the variety of
data produced. Lexicon-based models, on the other hand,
using activation of known words or word-sets, and
analogy with word-parts, are more able to explain both
the details of individual responses and the variety of
responses across subjects.},
categories = {pronunciation, onomastics, names, phonology,
pseudowords, orthography},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Fitt_1998_c.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/thesis.ps},
year = 1998
}
@inproceedings{wester-98-sd,
author = {M. Wester and J.M. Kessens and H. Strik},
title = {Two automatic approaches for analyzing the frequency
of connected speech processes in {D}utch},
booktitle = {Proc. ICSLP Student Day '98},
pages = {3351-3356},
address = {Sydney},
abstract = {This paper describes two automatic approaches used to
study connected speech processes (CSPs) in Dutch. The
first approach was from a linguistic point of view -
the top-down method. This method can be used for
verification of hypotheses about CSPs. The second
approach - the bottom-up method - uses a constrained
phone recognizer to generate phone transcriptions. An
alignment was carried out between the two
transcriptions and a reference transcription. A
comparison between the two methods showed that 68\%
agreement was achieved on the CSPs. Although phone
accuracy is only 63\%, the bottom-up approach is useful
for studying CSPs. From the data generated using the
bottom-up method, indications of which CSPs are present
in the material can be found. These indications can be
used to generate hypotheses which can then be tested
using the top-down method.},
categories = {asr, pm, VIOS, Nijmegen},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/wester.1998.2.pdf},
year = 1998
}
@inproceedings{Keeni1998ICCLSDP,
author = {Kanad Keeni and Hiroshi Shimodaira and Kenji Nakayama
and Kazunori Kotani},
title = {{On Parameter Initialization of Multi-layer
Feed-forward Neural Networks for Pattern Recognition}},
booktitle = {International Conference on Computational Linguistics,
Speech and Document Processing (ICCLSDP-'98), Calcutta,
India},
pages = {D8--12},
categories = {ann, jaist},
month = feb,
year = 1998
}
@inproceedings{Shimodaira1998SPR,
author = {Hiroshi Shimodaira and Jun Rokui and Mitsuru Nakai},
title = {{Modified Minimum Classification Error Learning and
Its Application to Neural Networks}},
booktitle = {2nd International Workshop on Statistical Techniques
in Pattern Recognition (SPR'98), Sydney, Australia},
pages = {},
abstract = { A novel method to improve the generalization
performance of the Minimum Classification Error (MCE) /
Generalized Probabilistic Descent (GPD) learning is
proposed. The MCE/GPD learning proposed by Juang and
Katagiri in 1992 results in better recognition
performance than the maximum-likelihood (ML) based
learning in various areas of pattern recognition.
Despite its superiority in recognition performance, as
well as other learning algorithms, it still suffers
from the problem of ``over-fitting'' to the training
samples. In the present study, a regularization
technique has been employed to the MCE learning to
overcome this problem. Feed-forward neural networks are
employed as a recognition platform to evaluate the
recognition performance of the proposed method.
Recognition experiments are conducted on several sorts
of data sets. },
categories = {mce, ann, jaist},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/spr98.pdf},
year = 1998
}
@inproceedings{Hitzeman_1998_d,
author = {Janet Hitzeman and Alan W. Black and Paul Taylor and
Chris Mellish and Jon Oberlander},
title = {On the Use of Automatically Generated Discourse-Level
Information in a Concept-to-Speech Synthesis System},
booktitle = {ICSLP98},
volume = 6,
pages = {2763-2768},
address = {Sydney, Australia},
categories = {intonation, synthesis, sole},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Hitzeman_1998_d.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Hitzeman_1998_d.ps},
year = 1998
}
@inproceedings{Williams_1998_b,
author = {Briony Williams},
title = {Levels of annotation for a {W}elsh speech database for
phonetic research},
booktitle = {Workshop on Language Resources for European Minority
Languages, Granada, Spain, May 27 1998},
address = {Workshop on Language Resources for European Minority
Languages, Granada, Spain, May 27 1998},
categories = {database, phonetics, welshdata},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Williams_1998_b.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Williams_1998_b.ps},
year = 1998
}
@inproceedings{Molloy_1998_a,
author = {Laurence Molloy and Stephen Isard},
title = {Suprasegmental Duration Modelling with Elastic
Constraints in Automatic Speech Recognition},
booktitle = {ICSLP},
volume = 7,
pages = {2975-2978},
address = {Sydney, Australia},
categories = {suprasegmentals, duration, asr, phonetics, prosody},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Molloy_1998_a.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Molloy_1998_a.ps},
year = 1998
}
@inproceedings{Nakai1998ICSLP,
author = {Mitsuru Nakai and Hiroshi Shimodaira},
title = {{The Use of F0 Reliability Function for Prosodic
Command Analysis on F0 Contour Generation Model}},
booktitle = {Proc. ICSLP'98},
categories = {asr, atr, jaist},
month = dec,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Nakai1998ICSLP.pdf},
year = 1998
}
@phdthesis{DissStrom,
author = {V. Strom},
title = {Automatische Erkennung von Satzmodus, Akzentuierung
und Phrasengrenzen},
school = {University of Bonn},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Diss.Volker.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Diss.Volker.ps},
year = 1998
}
@inproceedings{Pagel_1998_a,
author = {Vincent Pagel and Kevin Lenzo and Alan W Black},
title = {Letter to sound rules for accented lexicon compression},
booktitle = {ICSLP98},
volume = 5,
pages = {2015-2020},
categories = {pronunciation, synthesis, satissfy, festival},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Pagel_1998_a.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Pagel_1998_a.ps},
year = 1998
}
@inproceedings{Keeni1998ICONIP,
author = {Kanad Keeni and Kenji Nakayama and Hiroshi Shimodaira},
title = {{Automatic Generation of Initial Weights and Target
Outputs of Multi-layer Neural Networks and its
Application to Pattern Classification}},
booktitle = {International Conference on Neural Information
Processing (ICONIP'98)},
pages = {1622--1625},
categories = {ann, jaist},
journal = {},
month = oct,
year = 1998
}
@inproceedings{shig98,
author = {Yoshinori Shiga and Hiroshi Matsuura and Tsuneo Nitta},
title = {Segmental duration control based on an articulatory
model},
booktitle = {Proc. ICSLP},
volume = 5,
pages = {2035--2038},
abstract = {This paper proposes a new method that determines
segmental duration for text-to-speech conversion based
on the movement of articulatory organs which compose an
articulatory model. The articulatory model comprises
four time-variable articulatory parameters representing
the conditions of articulatory organs whose physical
restriction seems to significantly influence the
segmental duration. The parameters are controlled
according to an input sequence of phonetic symbols,
following which segmental duration is determined based
on the variation of the articulatory parameters. The
proposed method is evaluated through an experiment
using a Japanese speech database that consists of 150
phonetically balanced sentences. The results indicate
that the mean square error of predicted segmental
duration is approximately 15[ms] for the closed set and
15--17[ms] for the open set. The error is within
20[ms], the level of acceptability for distortion of
segmental duration without loss of naturalness, and
hence the method is proved to effectively predict
segmental duration.},
categories = {speech, synthesis, duration, articulatory model,
toshiba},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/shiga_icslp98.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/shiga_icslp98.ps},
year = 1998
}
@article{carreira-specom98,
author = {M.~Carreira-Perpiñán and S.~Renals},
title = {Dimensionality reduction of electropalatographic data
using latent variable models},
journal = {Speech Communication},
volume = {26},
pages = {259--282},
abstract = {We consider the problem of obtaining a reduced
dimension representation of electropalatographic (EPG)
data. An unsupervised learning approach based on latent
variable modelling is adopted, in which an underlying
lower dimension representation is inferred directly
from the data. Several latent variable models are
investigated, including factor analysis and the
generative topographic mapping (GTM). Experiments were
carried out using a subset of the EUR-ACCOR database,
and the results indicate that these automatic methods
capture important, adaptive structure in the EPG data.
Nonlinear latent variable modelling clearly outperforms
the investigated linear models in terms of
log-likelihood and reconstruction error and suggests a
substantially smaller intrinsic dimensionality for the
EPG data than that claimed by previous studies. A
two-dimensional representation is produced with
applications to speech therapy, language learning and
articulatory dynamics.},
categories = {ml,lv,artic,sheffield},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/specom98.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/specom98.ps.gz},
year = 1998
}
@inproceedings{Stolcke_1998_b,
author = {Andreas Stolcke and E. Shriberg and R. Bates and P.
Taylor and K. Ries and D. Jurafsky and N. Coccaro and
R. Martin and M. Meteer and C. Van Ess-Dykema},
title = {Dialog Act modelling for Conversational Speech},
booktitle = {AAAI Spring Symposium on Applying Machine Learning to
Discourse Processing},
categories = {prosody, recognition, language modelling, intonation,
dialogue, satissfy},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Stolcke_1998_b.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Stolcke_1998_b.ps},
year = 1998
}
@inproceedings{williams-escapron98,
author = {G.~Williams and S.~Renals },
title = {Confidence measures for evaluating pronunciation
models},
booktitle = {ESCA Workshop on Modeling pronunciation variation for
automatic speech recognition},
pages = {151--155},
address = {Kerkrade, Netherlands},
abstract = {In this paper, we investigate the use of confidence
measures for the evaluation of pronunciation models and
the employment of these evaluations in an automatic
baseform learning process. The confidence measures and
pronunciation models are obtained from the Abbot hybrid
Hidden Markov Model/Artificial Neural Network Large
Vocabulary Continuous Speech Recognition system.
Experiments were carried out for a number of baseform
learning schemes using the ARPA North American Business
News and the Broadcast News corpora from which it was
found that a confidence measure based scheme provided
the largest reduction in Word Error Rate.},
categories = {recognition,conf,hybrid,abbot,wsj,bnews,pron,sheffield},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/esca98.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/esca98.ps.gz},
year = 1998
}
@inproceedings{mayoturk:98b,
author = {Mayo, C.},
title = {A longitudinal study of perceptual weighting and
phonemic awarenes},
booktitle = {Chicago Linguistics Society 34},
categories = {speech perception, development, cue weighting,
phonemic awareness, literacy},
year = 1998
}
@inproceedings{wester-98-voicedata,
author = {M. Wester},
title = {Automatic Classification of Voice Quality: Comparing
Regression Models and Hidden {M}arkov Models},
booktitle = {Proc. of VOICEDATA98, Symposium on Databases in Voice
Quality Research and Education},
pages = {92-97},
address = {Utrecht},
abstract = {In this paper, two methods for automatically
classifying voice quality are compared: regression
analysis and hidden Markov models (HMMs). The findings
of this research show that HMMs can be used to classify
voice quality. The HMMs performed better than the
regression models in classifying breathiness and
overall degree of deviance, and the two methods showed
similar results on the roughness scale. However, the
results are not spectacular. This is mainly due to the
type of material that was available and the number of
listeners who assessed the material. Nonetheless, I
argue in this paper that these findings are interesting
because they are a promising step towards developing a
system for classifying voice quality.},
categories = {voice quality, Nijmegen},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/wester.1998.1.pdf},
year = 1998
}
@inproceedings{Black_1998_a,
author = {Alan W Black and Kevin Lenzo and Vincent Pagel},
title = {Issues in Building General Letter to Sound Rules},
booktitle = {The Third ESCA Workshop in Speech Synthesis },
pages = {77-80},
categories = {pronunciation, synthesis, satissfy, festival},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Black_1998_a.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Black_1998_a.ps},
year = 1998
}
@article{Shriberg_1998_a,
author = {Elizabeth Shriberg and R. Bates and P. Taylor and A.
Stolcke and K. Ries and D. Jurafsky and N. Coccaro and
R. Martin and M. Meteer and C. Van Ess-Dykema},
title = {Can Prosody Aid the Automatic Classification of Dialog
Acts in Conversational Speech?},
journal = {Language and Speech},
volume = 41,
number = {3-4},
categories = {prosody, recognition, language modelling, intonation,
dialogue, satissfy},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Shriberg_1998_a.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Shriberg_1998_a.ps},
year = 1998
}
@inproceedings{Rokui1998ICONIP,
author = {Jun Rokui and Hiroshi Shimodaira},
title = {{Modified Minimum Classification Error Learning and
Its Application to Neural Networks}},
booktitle = {ICONIP'98, Kitakyushu, Japan},
categories = {ann, mce, jaist},
month = oct,
year = 1998
}
@inbook{Williams_1998_a,
author = {Briony Williams},
title = {The phonetic manifestation of stress in {W}elsh},
booktitle = {Word prosodic systems in the languages of {E}urope
(ed. h. van der hulst)},
categories = {prosody, phonetics, welshdata},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Williams_1998_a.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Williams_1998_a.ps},
year = 1998
}
@inproceedings{Iida1998IIZUKA,
author = {Eiji Iida and Hiroshi Shimodaira and Susumu Kunifuji
and Masayuki Kimura},
title = {{A system to Perform Human Problem Solving}},
booktitle = {The 5th International Conference on Soft Computing and
Information / Intelligent Systems (IIZUKA'98)},
pages = {},
categories = {jaist},
month = oct,
year = 1998
}
@inproceedings{fitt_isard_icslp98,
author = {Sue Fitt and Steve Isard},
title = {Representing the environments for phonological
processes in an accent-independent lexicon for
synthesis of {E}nglish},
booktitle = {Proc. ICSLP 1998},
volume = 3,
pages = {847-850},
address = {Sydney, Australia},
abstract = {This paper reports on work developing an
accent-independent lexicon for use in synthesising
speech in English. Lexica which use phonemic
transcriptions are only suitable for one accent, and
developing a lexicon for a new accent is a long and
laborious process. Potential solutions to this problem
include the use of conversion rules to generate lexica
of regional pronunciations from standard accents and
encoding of regional variation by means of keywords.
The latter proposal forms the basis of the current
work. However, even if we use a keyword system for
lexical transcription there are a number of remaining
theoretical and methodological problems if we are to
synthesise and recognise accents to a high degree of
accuracy; these problems are discussed in the following
paper.},
categories = {speech synthesis, lexicon, accents, regional
pronunciation, phonology},
month = dec,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Fitt_1998_b.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Fitt_1998_b.ps},
year = 1998
}
@inproceedings{Kessens98,
author = {J.M. Kessens and M. Wester and C. Cucchiarini and H.
Strik},
title = {The Selection of Pronunciation Variants: Comparing the
Performance of Man and Machine},
booktitle = {Proc. of ICSLP '98},
pages = {2715-2718},
address = {Sydney},
abstract = {In this paper the performance of an automatic
transcription tool is evaluated. The transcription tool
is a Continuous Speech Recognizer (CSR) running in
forced recognition mode. For evaluation the performance
of the CSR was compared to that of nine expert
listeners. Both man and the machine carried out exactly
the same task: deciding whether a segment was present
or not in 467 cases. It turned out that the performance
of the CSR is comparable to that of the experts.},
categories = {asr, pm, VIOS, Nijmegen},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/kessens.1998.1.pdf},
year = 1998
}
@inproceedings{Sproat_1998_a,
author = {Richard Sproat and Andrew Hunt and Mari Ostendorf and
Paul Taylor and Alan Black and Kevin Lenzo},
title = {Sable: a standard for {TTS} markup},
booktitle = {I{CSLP}98},
volume = 5,
pages = {1719-1724},
address = {Sydney, Australia},
categories = {markup, sable},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Sproat_1998_a.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Sproat_1998_a.ps},
year = 1998
}