1998.bib

@comment{{This file has been generated by bib2bib 1.92}}
@comment{{Command line: /home/korin/bibtex2html-1.92-LINUX/bib2bib -oc /home/korin/projects/publications/new_output/transitdata/1998-citations -ob /home/korin/projects/publications/new_output/transitdata/1998.bib -c 'year : "1998"' /home/korin/projects/publications/filtlists/full_publications_list.bib}}
@inproceedings{Hitzeman_1998_c,
  author = {Janet Hitzeman and Massimo Poesio},
  title = {Long Distance Pronominalization and Global Focus},
  booktitle = {COLING-ACL '98},
  volume = 1,
  pages = {550-556},
  address = {Montreal, Quebec, Canada},
  categories = {text analysis, sole},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Hitzeman_1998_c.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Hitzeman_1998_c.ps},
  school = {Universite de Montreal},
  year = 1998
}
@inproceedings{Shimodaira1998ICSLP,
  author = {Hiroshi Shimodaira and Jun Rokui and Mitsuru Nakai},
  title = {{Improving The Generalization Performance Of The
                   MCE/GPD Learning}},
  booktitle = {ICSLP'98, Australia},
  abstract = { A novel method to prevent the over-fitting effect and
                   improve the generalization performance of the Minimum
                   Classification Error (MCE) / Generalized Probabilistic
                   Descent (GPD) learning is proposed. The MCE/GPD method,
                   which is one of the newest discriminative-learning
                   approaches proposed by Katagiri and Juang in 1992,
                   results in better recognition performance in various
                   areas of pattern recognition than the
                   maximum-likelihood (ML) based approach where a
                   posteriori probabilities are estimated. Despite its
                   superiority in recognition performance, it still
                   suffers from the problem of over-fitting to the
                   training samples as it is with other learning
                   algorithms. In the present study, a regularization
                   technique is employed to the MCE method to overcome
                   this problem. Feed-forward neural networks are employed
                   as a recognition platform to evaluate the recognition
                   performance of the proposed method. Recognition
                   experiments are conducted on several sorts of datasets.
                   The proposed method shows better generalization
                   performance than the original one. },
  categories = {lifelike-agent, jaist},
  month = dec,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Shimodaira1998ICSLP.pdf},
  year = 1998
}
@inproceedings{Keeni1998ICPR,
  author = {Kanad Keeni and Kenji Nakayama and Hiroshi Shimodaira},
  title = {{Automatic Generation of Initial Weights and
                   Estimation of Hidden Units for Pattern Classification
                   Using Neural Networks}},
  booktitle = {14th International Conference on Pattern Recognition
                   (ICPR'98)},
  pages = {1568--1571},
  categories = {ann, jaist},
  month = aug,
  year = 1998
}
@inproceedings{abberley-icassp98,
  author = {D.~Abberley and S.~Renals and G.~Cook},
  title = {Retrieval of broadcast news documents with the {THISL}
                   system},
  booktitle = {Proc IEEE ICASSP},
  pages = {3781--3784},
  address = {Seattle},
  abstract = {This paper describes a spoken document retrieval
                   system, combining the Abbot large vocabulary continuous
                   speech recognition (LVCSR) system developed by
                   Cambridge University, Sheffield University and
                   SoftSound, and the PRISE information retrieval engine
                   developed by NIST. The system was constructed to enable
                   us to participate in the TREC 6 Spoken Document
                   Retrieval experimental evaluation. Our key aims in this
                   work wer e to produce a complete system for the SDR
                   task, to investigate the effect of a word error rate of
                   30-50\% on retrieval performance and to investigate the
                   integration of LVCSR and word spotting in a retrieval
                   task.},
  categories = {thisl,bnews,trec,ir,recognition,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/icassp98.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/icassp98.ps.gz},
  year = 1998
}
@article{Taylor_1998_b,
  author = {Paul A. Taylor and S. King and S. D. Isard and H.
                   Wright},
  title = {Intonation and Dialogue Context as Constraints for
                   Speech Recognition},
  journal = {Language and Speech},
  volume = 41,
  number = {3},
  pages = {493-512},
  categories = {asr, intonation, dialogue, lm, id4s},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Taylor_1998_b.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Taylor_1998_b.ps},
  year = 1998
}
@inproceedings{renals-twente98,
  author = {S.~Renals and D.~Abberley},
  title = {The {THISL} spoken document retrieval system},
  booktitle = {Proc. 14th Twente Workshop on Language Technology},
  pages = {129--140},
  abstract = {THISL is an ESPRIT Long Term Research Project focused
                   the development and construction of a system to items
                   from an archive of television and radio news
                   broadcasts. In this paper we outline our spoken
                   document retrieval system based on the Abbot speech
                   recognizer and a text retrieval system based on Okapi
                   term-weighting . The system has been evaluated as part
                   of the TREC-6 and TREC-7 spoken document retrieval
                   evaluations and we report on the results of the TREC-7
                   evaluation based on a document collection of 100 hours
                   of North American broadcast news.},
  categories = {thisl,bnews,trec,ir,recognition,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/twente98.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/twente98.ps.gz},
  year = 1998
}
@inproceedings{king:stephenson:isard:taylor:strachan:icslp1998,
  author = {Simon King and Todd Stephenson and Stephen Isard and
                   Paul Taylor and Alex Strachan},
  title = {Speech Recognition via Phonetically Featured Syllables},
  booktitle = {Proc. {ICSLP} `98},
  pages = {1031-1034},
  address = {Sydney, Australia},
  abstract = {We describe a speech recogniser which uses a speech
                   production-motivated phonetic-feature description of
                   speech. We argue that this is a natural way to describe
                   the speech signal and offers an efficient intermediate
                   parameterisation for use in speech recognition. We also
                   propose to model this description at the syllable
                   rather than phone level. The ultimate goal of this work
                   is to generate syllable models whose parameters
                   explicitly describe the trajectories of the phonetic
                   features of the syllable. We hope to move away from
                   Hidden Markov Models (HMMs) of context-dependent phone
                   units. As a step towards this, we present a preliminary
                   system which consists of two parts: recognition of the
                   phonetic features from the speech signal using a neural
                   network; and decoding of the feature-based description
                   into phonemes using HMMs.},
  categories = {asr},
  month = dec,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/King_Stephenson_Isard_Taylor_Strachan_icslp1998.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/King_Stephenson_Isard_Taylor_Strachan_icslp1998.ps},
  year = 1998
}
@inproceedings{carreira-nnsp98,
  author = {M.~Carreira-Perpiñán and S.~Renals},
  title = {Experimental evaluation of latent variable models for
                   dimensionality reduction},
  booktitle = {IEEE Proc. Neural Networks for Signal Processing},
  volume = {8},
  pages = {165--173},
  address = {Cambridge},
  abstract = {We use electropalatographic (EPG) data as a test bed
                   for dimensionality reduction methods based in latent
                   variable modelling, in which an underlying lower
                   dimension representation is inferred directly from the
                   data. Several models (and mixtures of them) are
                   investigated, including factor analysis and the
                   generative topographic mapping (GTM). Experiments
                   indicate that nonlinear latent variable modelling
                   reveals a low-dimensional structure in the data
                   inaccessible to the investigated linear models.},
  categories = {ml,lv,artic,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/nnsp98.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/nnsp98.ps.gz},
  year = 1998
}
@inproceedings{mayoturk:98,
  author = {Mayo, C.},
  title = {The developmental relationship between perceptual
                   weighting and phonemic awareness},
  booktitle = {LabPhon 6, University of York, UK},
  categories = {speech perception, development, cue weighting,
                   phonemic awareness, literacy},
  year = 1998
}
@inproceedings{wester-98-sposs,
  author = {M. Wester and J.M. Kessens and C. Cucchiarini and H.
                   Strik},
  title = {Selection of Pronunciation Variants in Spontaneous
                   Speech: Comparing the Performance of Man and Machine},
  booktitle = {Proc. of the ESCA Workshop on the Sound Patterns of
                   Spontaneous Speech: Production and Perception},
  pages = {157-160},
  address = {Aix-en-Provence},
  abstract = {Dans cet article, les performances d'un outil de
                   transcription automatique sont évaluées. L'outil de
                   transcription est un reconnaisseur de parole continue
                   (CSR) fonctionnant en mode de reconnaissance forcée.
                   Pour l'évaluation les performances du CSR ont été
                   comparées à celles de neuf auditeurs experts. La
                   machine et l'humain ont effectué exactement la même
                   tâche: décider si un segment était présent ou non
                   dans 467 cas. Il s'est avéré que les performances du
                   CSR étaient comparables à celle des experts.},
  categories = {asr, pm, VIOS, Nijmegen},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/wester.1998.5.pdf},
  year = 1998
}
@inproceedings{Jang_1998_a,
  author = {Tae-Yeoub Jang and Minsuck Song and Kiyeong Lee},
  title = {Disambiguation of Korean Utterances Using Automatic
                   Intonation Recognition},
  booktitle = {Proceedings of ICSLP98},
  volume = 3,
  pages = {603-606},
  address = {Sydney, Australia},
  categories = {intonation, prosody, recognition},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Jang_1998_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Jang_1998_a.ps},
  year = 1998
}
@inproceedings{Wright_1998_b,
  author = {Helen Wright},
  title = {Automatic Utterance Type Detection Using
                   Suprasegmental Features},
  booktitle = {ICSLP'98},
  volume = 4,
  pages = {1403},
  address = {Sydney, Australia},
  categories = {recognition, prosody, intonation, dialogue},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Wright_1998_b.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Wright_1998_b.ps},
  year = 1998
}
@inproceedings{Sproat_1998_b,
  author = {Richard Sproat and Andrew Hunt and Mari Ostendorf and
                   Paul Taylor and Alan Black and Kevin Lenzo},
  title = {Sable: a standard for {TTS} markup},
  booktitle = {Third {ESCA} workshop on speech synthesis},
  pages = {27-30},
  address = {Jenolan Caves, Blue Mountains, Australia},
  categories = {markup, sable},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Sproat_1998_b.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Sproat_1998_b.ps},
  year = 1998
}
@inproceedings{Syrdal_1998_a,
  author = {Ann Syrdal and Gregor Moehler and Kurt Dusterhoff and
                   Alistair Conkie and Alan W Black},
  title = {Three Methods of Intonation Modeling},
  booktitle = {3rd ESCA Workshop on Speech Synthesis},
  pages = {305-310},
  address = {Jenolan Caves},
  categories = {intonation, synthesis, satissfy, festival},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Syrdal_1998_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Syrdal_1998_a.ps},
  year = 1998
}
@inproceedings{ODonnell_1998_a,
  author = {Michael O'Donnell and Alistair Knott and Janet
                   Hitzeman and Hua Cheng},
  title = {Integrating Referring and Informing in NP Planning},
  booktitle = {Coling-ACL Workshop on the Computational Treatment of
                   Nominals},
  address = {Montreal, Quebec, Canada},
  categories = {ilex},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/ODonnell_1998_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/ODonnell_1998_a.ps},
  school = {Universite de Montreal},
  year = 1998
}
@article{Taylor_1998_f,
  author = {Paul Taylor and Alan Black},
  title = {Assigning Phrase Breaks from Part of Speech Sequences},
  journal = {Computer Speech and Language},
  volume = 12,
  pages = {99-117},
  categories = {prosody, language modelling, text analysis, synthesis,
                   phrasing, satissfy},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Taylor_1998_f.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Taylor_1998_f.ps},
  year = 1998
}
@article{Iida1998IEICE06,
  author = {Eiji Iida and Susumu Kunifuji and Hiroshi Shimodaira
                   and Masayuki Kimura},
  title = {{A Scale-Down Solution of N^2-1 Puzzle}},
  journal = {Trans. IEICE(D-I)},
  volume = {J81-D-I},
  number = {6},
  pages = {604--614},
  note = {(in Japanese)},
  categories = {puzzle, jaist},
  month = jun,
  year = 1998
}
@inproceedings{Taylor_1998_e,
  author = {Paul A Taylor},
  title = {The {T}ilt Intonation Model},
  booktitle = {I{CSLP}98},
  address = {Sydney},
  categories = {intonation, prosody},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Taylor_1998_e.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Taylor_1998_e.ps},
  year = 1998
}
@inproceedings{barker-icslp98,
  author = {J.~Barker and G.~Williams and S.~Renals},
  title = {Acoustic confidence measures for segmenting broadcast
                   news},
  booktitle = {Proc. ICSLP},
  pages = {2719--2722},
  address = {Sydney},
  abstract = {In this paper we define an acoustic confidence measure
                   based on the estimates of local posterior probabilities
                   produced by a HMM/ANN large vocabulary continuous
                   speech recognition system. We use this measure to
                   segment continuous audio into regions where it is and
                   is not appropriate to expend recognition effort. The
                   segmentation is computationally inexpensive and
                   provides reductions in both overall word error rate and
                   decoding time. The technique is evaluated using
                   material from the Broadcast News corpus.},
  categories = {recognition,conf,hybrid,bnews,segmentation,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/icslp98-seg.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/icslp98-seg.ps.gz},
  year = 1998
}
@inproceedings{Taylor_1998_d,
  author = {Paul A Taylor and Alan Black and Richard Caley},
  title = {The Architecture of the Festival Speech Synthesis
                   System},
  booktitle = {The Third ESCA Workshop in Speech Synthesis},
  pages = {147-151},
  address = {Jenolan Caves, Australia},
  categories = {systems, synthesis, festival},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Taylor_1998_d.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Taylor_1998_d.ps},
  year = 1998
}
@inproceedings{Dusterhoff_1998_a,
  author = {K. Dusterhoff},
  title = {An investigation into the effectiveness of
                   sub-syllable acoustics in automatic intonantion
                   analysis},
  booktitle = {Proceedings of University of Edinburgh
                   Linguistics/Applied Linguistics Postgraduate Conference},
  categories = {intonation, automatic analysis},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Dusterhoff_1998_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Dusterhoff_1998_a.ps},
  year = 1998
}
@inproceedings{abberley-trec98,
  author = {D.~Abberley and S.~Renals and G.~Cook and T.~Robinson},
  title = {The 1997 {THISL} spoken document retrieval system},
  booktitle = {Proc. Sixth Text Retrieval Conference (TREC--6)},
  pages = {747--752},
  abstract = {The THISL spoken document retrieval system is based on
                   the Abbot Large Vocabulary Continuous Speech
                   Recognition (LVCSR) system developed by Cambridge
                   University, Sheffield University and SoftSound, and
                   uses PRISE (NIST) for indexing and retrieval. We
                   participated in full SDR mode. Our approach was to
                   transcribe the spoken documents at the word level using
                   Abbot, indexing the resulting text transcriptions using
                   PRISE. The LVCSR system uses a recurrent network-based
                   acoustic model (with no adaptation to different
                   conditions) trained on the 50 hour Broadcast News
                   training set, a 65,000 word vocabulary and a trigram
                   language model derived from Broadcast News text. Words
                   in queries which were out-of-vocabulary (OOV) were word
                   spotted at query time (utilizing the posterior phone
                   probabilities output by the acoustic model), added to
                   the transcriptions of the relevant documents and the
                   collection was then re-indexed. We generated
                   pronunciations at run-time for OOV words using the
                   Festival TTS system (University of Edinburgh).},
  categories = {thisl,bnews,trec,ir,recognition,eval,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/trec6.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/trec6.ps.gz},
  year = 1998
}
@inproceedings{Lin98,
  author = {Lincoln, M. and Cox, S.J. and Ringland, S.},
  title = {A Comparison of Two Unsupervised Approaches to Accent
                   Identification},
  booktitle = {Int. Conf. on Spoken Language Processing},
  pages = {109-112},
  address = {Sydney},
  abstract = {The ability to automatically identify a speaker's
                   accent would be very useful for a speech recognition
                   system as it would enable the system to use both a
                   pronunciation dictionary and speech models speci c to
                   the accent, techniques which have been shown to improve
                   accuracy. Here, we describe some experiments in
                   unsupervised accent classi cation. Two techniques have
                   been investigated to classify British- and
                   Americanaccented speech: an acoustic approach, in which
                   we analyse the pattern of usage of the distributions in
                   the recogniser by a speaker to decide on his most
                   probable accent, and a high-level approach in which we
                   use a phonotactic model for classi cation of the
                   accent. Results show that both techniques give
                   excellent performance on this task which is maintained
                   when testing is done on data from an independent
                   dataset.},
  categories = {accent identification, speaker characteristics, BT,
                   UEA},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/lincoln-icslp-98.pdf},
  year = 1998
}
@phdthesis{king:thesis1998,
  author = {Simon King},
  title = {Using Information Above the Word Level for Automatic
                   Speech Recognition},
  school = {University of Edinburgh},
  abstract = {This thesis introduces a general method for using
                   information at the utterance level and across
                   utterances for automatic speech recognition. The method
                   involves classification of utterances into types. Using
                   constraints at the utterance level via this
                   classification method allows information sources to be
                   exploited which cannot necessarily be used directly for
                   word recognition. The classification power of three
                   sources of information is investigated: the language
                   model in the speech recogniser, dialogue context and
                   intonation. The method is applied to a challenging
                   task: the recognition of spontaneous dialogue speech.
                   The results show success in automatic utterance type
                   classification, and subsequent word error rate
                   reduction over a baseline system, when all three
                   information sources are probabilistically combined.},
  categories = {asr, lm, intonation, dialogue, systems},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/King_thesis1998.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/King_thesis1998.ps},
  year = 1998
}
@inproceedings{williams-icslp98,
  author = {G.~Williams and S.~Renals},
  title = {Confidence measures derived from an acceptor {HMM}},
  booktitle = {Proc. ICSLP},
  pages = {831--834},
  address = {Sydney},
  abstract = {In this paper we define a number of confidence
                   measures derived from an acceptor HMM and evaluate
                   their performance for the task of utterance
                   verification using the North American Business News
                   (NAB) and Broadcast News (BN) corpora. Results are
                   presented for decodings made at both the word and phone
                   level which show the relative profitability of
                   rejection provided by the diverse set of confidence
                   measures. The results indicate that language model
                   dependent confidence measures have reduced performance
                   on BN data relative to that for the more grammatically
                   constrained NAB data. An explanation linking the
                   observations that rejection is more profitable for
                   noisy acoustics, for a reduced vocabulary and at the
                   phone level is also given.},
  categories = {recognition,conf,hybrid,bnews,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/icslp98-conf.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/icslp98-conf.ps.gz},
  year = 1998
}
@inproceedings{wester-98-icslp,
  author = {M. Wester and J.M. Kessens and H. Strik},
  title = {Modeling pronunciation variation for a {D}utch {CSR}:
                   testing three methods},
  booktitle = {Proc. ICSLP '98},
  pages = {2535-2538},
  address = {Sydney},
  abstract = {This paper describes how the performance of a
                   continuous speech recognizer for Dutch has been
                   improved by modeling pronunciation variation. We used
                   three methods to model pronunciation variation. First,
                   within-word variation was dealt with. Phonological
                   rules were applied to the words in the lexicon, thus
                   automatically generating pronunciation variants.
                   Secondly, cross-word pronunciation variation was
                   modeled using two different approaches. The first
                   approach was to model cross-word processes by adding
                   the variants as separate words to the lexicon and in
                   the second approach this was done by using multi-words.
                   For each of the methods, recognition experiments were
                   carried out. A significant improvement was found for
                   modeling within-word variation. Furthermore, modeling
                   crossword processes using multi-words leads to
                   significantly better results than modeling them using
                   separate words in the lexicon.},
  categories = {asr, pm, VIOS, Nijmegen},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/wester.1998.3.pdf},
  year = 1998
}
@inproceedings{wester-98-kerkrade,
  author = { M. Wester and J.M. Kessens and H. Strik},
  title = {Improving the Performance of a {D}utch {CSR} by
                   Modeling Pronunciation Variation},
  booktitle = {Proc. of the Workshop Modeling Pronunciation Variation
                   for Automatic Speech Recognition},
  pages = {145-150},
  address = {Kerkrade},
  abstract = {This paper describes how the performance of a
                   continuous speech recognizer for Dutch has been
                   improved by modeling pronunciation variation. We used
                   three methods in order to model pronunciation
                   variation. First, withinword variation was dealt with.
                   Phonological rules were applied to the words in the
                   lexicon, thus automatically generating pronunciation
                   variants. Secondly, cross-word pronunciation variation
                   was accounted for by adding multi-words and their
                   variants to the lexicon. Thirdly, probabilities of
                   pronunciation variants were incorporated in the
                   language model (LM), and thresholds were used to choose
                   which pronunciation variants to add to the LMs. For
                   each of the methods, recognition experiments were
                   carried out. A significant improvement in error rates
                   was measured.},
  categories = {asr, pm, VIOS, Nijmegen},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/wester.1998.4.pdf},
  year = 1998
}
@phdthesis{fitt_thesis98,
  author = {Sue Fitt},
  title = {Processing unfamiliar words - a study in the
                   perception and production of native and foreign
                   placenames},
  school = {The Centre for Speech Technology Research, Edinburgh
                   University},
  abstract = {This thesis sets out to examine some of the linguistic
                   processes which take place when speakers are faced with
                   unfamiliar and potentially foreign place names, and the
                   possible psycholinguistic origins of these processes.
                   It is concluded that lexical networks are used to map
                   from input to output, and that phonological rule-based
                   models do not fully account for the data. Previous
                   studies of nativisation have tended to catalogue the
                   phonological and spelling changes which have taken
                   place in historical examples, and explanations have
                   generally been limited to comparison of details of the
                   borrowed and borrowing languages, rather than being set
                   in a solid linguistic framework describing the ways in
                   which speakers and readers process words. There have
                   been psycholinguistic studies of unfamiliar words, but
                   these have generally ignored the foreign dimension, and
                   have been limited in scope. Traditional linguistic
                   work, meanwhile, focuses on descriptions, either
                   abstract or more related to mental processes, of the
                   language that we know and use every day. Studies of
                   foreign language learning also have a rather different
                   focus from the current work, as they examine what
                   happens when we attempt, over a period of time, to
                   acquire new sounds, vocabulary and grammar. This study
                   takes an experimental approach to nativisation,
                   presenting Edinburgh secondary school pupils with a
                   series of unfamiliar spoken and written European town
                   names, and asking them to reproduce the names either in
                   writing or speech, along with a judgement of origin.
                   The resulting pronunciations and spellings are examined
                   for accuracy, errors and changes, both in perception
                   and production. Different explanations of the output
                   are considered, and it is concluded that models which
                   apply a set of linguistic rules to the input in order
                   to generate an output cannot account for the variety of
                   data produced. Lexicon-based models, on the other hand,
                   using activation of known words or word-sets, and
                   analogy with word-parts, are more able to explain both
                   the details of individual responses and the variety of
                   responses across subjects.},
  categories = {pronunciation, onomastics, names, phonology,
                   pseudowords, orthography},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Fitt_1998_c.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/thesis.ps},
  year = 1998
}
@inproceedings{wester-98-sd,
  author = {M. Wester and J.M. Kessens and H. Strik},
  title = {Two automatic approaches for analyzing the frequency
                   of connected speech processes in {D}utch},
  booktitle = {Proc. ICSLP Student Day '98},
  pages = {3351-3356},
  address = {Sydney},
  abstract = {This paper describes two automatic approaches used to
                   study connected speech processes (CSPs) in Dutch. The
                   first approach was from a linguistic point of view -
                   the top-down method. This method can be used for
                   verification of hypotheses about CSPs. The second
                   approach - the bottom-up method - uses a constrained
                   phone recognizer to generate phone transcriptions. An
                   alignment was carried out between the two
                   transcriptions and a reference transcription. A
                   comparison between the two methods showed that 68\%
                   agreement was achieved on the CSPs. Although phone
                   accuracy is only 63\%, the bottom-up approach is useful
                   for studying CSPs. From the data generated using the
                   bottom-up method, indications of which CSPs are present
                   in the material can be found. These indications can be
                   used to generate hypotheses which can then be tested
                   using the top-down method.},
  categories = {asr, pm, VIOS, Nijmegen},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/wester.1998.2.pdf},
  year = 1998
}
@inproceedings{Keeni1998ICCLSDP,
  author = {Kanad Keeni and Hiroshi Shimodaira and Kenji Nakayama
                   and Kazunori Kotani},
  title = {{On Parameter Initialization of Multi-layer
                   Feed-forward Neural Networks for Pattern Recognition}},
  booktitle = {International Conference on Computational Linguistics,
                   Speech and Document Processing (ICCLSDP-'98), Calcutta,
                   India},
  pages = {D8--12},
  categories = {ann, jaist},
  month = feb,
  year = 1998
}
@inproceedings{Shimodaira1998SPR,
  author = {Hiroshi Shimodaira and Jun Rokui and Mitsuru Nakai},
  title = {{Modified Minimum Classification Error Learning and
                   Its Application to Neural Networks}},
  booktitle = {2nd International Workshop on Statistical Techniques
                   in Pattern Recognition (SPR'98), Sydney, Australia},
  pages = {},
  abstract = { A novel method to improve the generalization
                   performance of the Minimum Classification Error (MCE) /
                   Generalized Probabilistic Descent (GPD) learning is
                   proposed. The MCE/GPD learning proposed by Juang and
                   Katagiri in 1992 results in better recognition
                   performance than the maximum-likelihood (ML) based
                   learning in various areas of pattern recognition.
                   Despite its superiority in recognition performance, as
                   well as other learning algorithms, it still suffers
                   from the problem of ``over-fitting'' to the training
                   samples. In the present study, a regularization
                   technique has been employed to the MCE learning to
                   overcome this problem. Feed-forward neural networks are
                   employed as a recognition platform to evaluate the
                   recognition performance of the proposed method.
                   Recognition experiments are conducted on several sorts
                   of data sets. },
  categories = {mce, ann, jaist},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/spr98.pdf},
  year = 1998
}
@inproceedings{Hitzeman_1998_d,
  author = {Janet Hitzeman and Alan W. Black and Paul Taylor and
                   Chris Mellish and Jon Oberlander},
  title = {On the Use of Automatically Generated Discourse-Level
                   Information in a Concept-to-Speech Synthesis System},
  booktitle = {ICSLP98},
  volume = 6,
  pages = {2763-2768},
  address = {Sydney, Australia},
  categories = {intonation, synthesis, sole},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Hitzeman_1998_d.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Hitzeman_1998_d.ps},
  year = 1998
}
@inproceedings{Williams_1998_b,
  author = {Briony Williams},
  title = {Levels of annotation for a {W}elsh speech database for
                   phonetic research},
  booktitle = {Workshop on Language Resources for European Minority
                   Languages, Granada, Spain, May 27 1998},
  address = {Workshop on Language Resources for European Minority
                   Languages, Granada, Spain, May 27 1998},
  categories = {database, phonetics, welshdata},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Williams_1998_b.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Williams_1998_b.ps},
  year = 1998
}
@inproceedings{Molloy_1998_a,
  author = {Laurence Molloy and Stephen Isard},
  title = {Suprasegmental Duration Modelling with Elastic
                   Constraints in Automatic Speech Recognition},
  booktitle = {ICSLP},
  volume = 7,
  pages = {2975-2978},
  address = {Sydney, Australia},
  categories = {suprasegmentals, duration, asr, phonetics, prosody},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Molloy_1998_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Molloy_1998_a.ps},
  year = 1998
}
@inproceedings{Nakai1998ICSLP,
  author = {Mitsuru Nakai and Hiroshi Shimodaira},
  title = {{The Use of F0 Reliability Function for Prosodic
                   Command Analysis on F0 Contour Generation Model}},
  booktitle = {Proc. ICSLP'98},
  categories = {asr, atr, jaist},
  month = dec,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Nakai1998ICSLP.pdf},
  year = 1998
}
@phdthesis{DissStrom,
  author = {V. Strom},
  title = {Automatische Erkennung von Satzmodus, Akzentuierung
                   und Phrasengrenzen},
  school = {University of Bonn},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Diss.Volker.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Diss.Volker.ps},
  year = 1998
}
@inproceedings{Pagel_1998_a,
  author = {Vincent Pagel and Kevin Lenzo and Alan W Black},
  title = {Letter to sound rules for accented lexicon compression},
  booktitle = {ICSLP98},
  volume = 5,
  pages = {2015-2020},
  categories = {pronunciation, synthesis, satissfy, festival},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Pagel_1998_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Pagel_1998_a.ps},
  year = 1998
}
@inproceedings{Keeni1998ICONIP,
  author = {Kanad Keeni and Kenji Nakayama and Hiroshi Shimodaira},
  title = {{Automatic Generation of Initial Weights and Target
                   Outputs of Multi-layer Neural Networks and its
                   Application to Pattern Classification}},
  booktitle = {International Conference on Neural Information
                   Processing (ICONIP'98)},
  pages = {1622--1625},
  categories = {ann, jaist},
  journal = {},
  month = oct,
  year = 1998
}
@inproceedings{shig98,
  author = {Yoshinori Shiga and Hiroshi Matsuura and Tsuneo Nitta},
  title = {Segmental duration control based on an articulatory
                   model},
  booktitle = {Proc. ICSLP},
  volume = 5,
  pages = {2035--2038},
  abstract = {This paper proposes a new method that determines
                   segmental duration for text-to-speech conversion based
                   on the movement of articulatory organs which compose an
                   articulatory model. The articulatory model comprises
                   four time-variable articulatory parameters representing
                   the conditions of articulatory organs whose physical
                   restriction seems to significantly influence the
                   segmental duration. The parameters are controlled
                   according to an input sequence of phonetic symbols,
                   following which segmental duration is determined based
                   on the variation of the articulatory parameters. The
                   proposed method is evaluated through an experiment
                   using a Japanese speech database that consists of 150
                   phonetically balanced sentences. The results indicate
                   that the mean square error of predicted segmental
                   duration is approximately 15[ms] for the closed set and
                   15--17[ms] for the open set. The error is within
                   20[ms], the level of acceptability for distortion of
                   segmental duration without loss of naturalness, and
                   hence the method is proved to effectively predict
                   segmental duration.},
  categories = {speech, synthesis, duration, articulatory model,
                   toshiba},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/shiga_icslp98.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/shiga_icslp98.ps},
  year = 1998
}
@article{carreira-specom98,
  author = {M.~Carreira-Perpiñán and S.~Renals},
  title = {Dimensionality reduction of electropalatographic data
                   using latent variable models},
  journal = {Speech Communication},
  volume = {26},
  pages = {259--282},
  abstract = {We consider the problem of obtaining a reduced
                   dimension representation of electropalatographic (EPG)
                   data. An unsupervised learning approach based on latent
                   variable modelling is adopted, in which an underlying
                   lower dimension representation is inferred directly
                   from the data. Several latent variable models are
                   investigated, including factor analysis and the
                   generative topographic mapping (GTM). Experiments were
                   carried out using a subset of the EUR-ACCOR database,
                   and the results indicate that these automatic methods
                   capture important, adaptive structure in the EPG data.
                   Nonlinear latent variable modelling clearly outperforms
                   the investigated linear models in terms of
                   log-likelihood and reconstruction error and suggests a
                   substantially smaller intrinsic dimensionality for the
                   EPG data than that claimed by previous studies. A
                   two-dimensional representation is produced with
                   applications to speech therapy, language learning and
                   articulatory dynamics.},
  categories = {ml,lv,artic,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/specom98.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/specom98.ps.gz},
  year = 1998
}
@inproceedings{Stolcke_1998_b,
  author = {Andreas Stolcke and E. Shriberg and R. Bates and P.
                   Taylor and K. Ries and D. Jurafsky and N. Coccaro and
                   R. Martin and M. Meteer and C. Van Ess-Dykema},
  title = {Dialog Act modelling for Conversational Speech},
  booktitle = {AAAI Spring Symposium on Applying Machine Learning to
                   Discourse Processing},
  categories = {prosody, recognition, language modelling, intonation,
                   dialogue, satissfy},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Stolcke_1998_b.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Stolcke_1998_b.ps},
  year = 1998
}
@inproceedings{williams-escapron98,
  author = {G.~Williams and S.~Renals },
  title = {Confidence measures for evaluating pronunciation
                   models},
  booktitle = {ESCA Workshop on Modeling pronunciation variation for
                   automatic speech recognition},
  pages = {151--155},
  address = {Kerkrade, Netherlands},
  abstract = {In this paper, we investigate the use of confidence
                   measures for the evaluation of pronunciation models and
                   the employment of these evaluations in an automatic
                   baseform learning process. The confidence measures and
                   pronunciation models are obtained from the Abbot hybrid
                   Hidden Markov Model/Artificial Neural Network Large
                   Vocabulary Continuous Speech Recognition system.
                   Experiments were carried out for a number of baseform
                   learning schemes using the ARPA North American Business
                   News and the Broadcast News corpora from which it was
                   found that a confidence measure based scheme provided
                   the largest reduction in Word Error Rate.},
  categories = {recognition,conf,hybrid,abbot,wsj,bnews,pron,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/esca98.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/esca98.ps.gz},
  year = 1998
}
@inproceedings{mayoturk:98b,
  author = {Mayo, C.},
  title = {A longitudinal study of perceptual weighting and
                   phonemic awarenes},
  booktitle = {Chicago Linguistics Society 34},
  categories = {speech perception, development, cue weighting,
                   phonemic awareness, literacy},
  year = 1998
}
@inproceedings{wester-98-voicedata,
  author = {M. Wester},
  title = {Automatic Classification of Voice Quality: Comparing
                   Regression Models and Hidden {M}arkov Models},
  booktitle = {Proc. of VOICEDATA98, Symposium on Databases in Voice
                   Quality Research and Education},
  pages = {92-97},
  address = {Utrecht},
  abstract = {In this paper, two methods for automatically
                   classifying voice quality are compared: regression
                   analysis and hidden Markov models (HMMs). The findings
                   of this research show that HMMs can be used to classify
                   voice quality. The HMMs performed better than the
                   regression models in classifying breathiness and
                   overall degree of deviance, and the two methods showed
                   similar results on the roughness scale. However, the
                   results are not spectacular. This is mainly due to the
                   type of material that was available and the number of
                   listeners who assessed the material. Nonetheless, I
                   argue in this paper that these findings are interesting
                   because they are a promising step towards developing a
                   system for classifying voice quality.},
  categories = {voice quality, Nijmegen},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/wester.1998.1.pdf},
  year = 1998
}
@inproceedings{Black_1998_a,
  author = {Alan W Black and Kevin Lenzo and Vincent Pagel},
  title = {Issues in Building General Letter to Sound Rules},
  booktitle = {The Third ESCA Workshop in Speech Synthesis },
  pages = {77-80},
  categories = {pronunciation, synthesis, satissfy, festival},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Black_1998_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Black_1998_a.ps},
  year = 1998
}
@article{Shriberg_1998_a,
  author = {Elizabeth Shriberg and R. Bates and P. Taylor and A.
                   Stolcke and K. Ries and D. Jurafsky and N. Coccaro and
                   R. Martin and M. Meteer and C. Van Ess-Dykema},
  title = {Can Prosody Aid the Automatic Classification of Dialog
                   Acts in Conversational Speech?},
  journal = {Language and Speech},
  volume = 41,
  number = {3-4},
  categories = {prosody, recognition, language modelling, intonation,
                   dialogue, satissfy},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Shriberg_1998_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Shriberg_1998_a.ps},
  year = 1998
}
@inproceedings{Rokui1998ICONIP,
  author = {Jun Rokui and Hiroshi Shimodaira},
  title = {{Modified Minimum Classification Error Learning and
                   Its Application to Neural Networks}},
  booktitle = {ICONIP'98, Kitakyushu, Japan},
  categories = {ann, mce, jaist},
  month = oct,
  year = 1998
}
@inbook{Williams_1998_a,
  author = {Briony Williams},
  title = {The phonetic manifestation of stress in {W}elsh},
  booktitle = {Word prosodic systems in the languages of {E}urope
                   (ed. h. van der hulst)},
  categories = {prosody, phonetics, welshdata},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Williams_1998_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Williams_1998_a.ps},
  year = 1998
}
@inproceedings{Iida1998IIZUKA,
  author = {Eiji Iida and Hiroshi Shimodaira and Susumu Kunifuji
                   and Masayuki Kimura},
  title = {{A system to Perform Human Problem Solving}},
  booktitle = {The 5th International Conference on Soft Computing and
                   Information / Intelligent Systems (IIZUKA'98)},
  pages = {},
  categories = {jaist},
  month = oct,
  year = 1998
}
@inproceedings{fitt_isard_icslp98,
  author = {Sue Fitt and Steve Isard},
  title = {Representing the environments for phonological
                   processes in an accent-independent lexicon for
                   synthesis of {E}nglish},
  booktitle = {Proc. ICSLP 1998},
  volume = 3,
  pages = {847-850},
  address = {Sydney, Australia},
  abstract = {This paper reports on work developing an
                   accent-independent lexicon for use in synthesising
                   speech in English. Lexica which use phonemic
                   transcriptions are only suitable for one accent, and
                   developing a lexicon for a new accent is a long and
                   laborious process. Potential solutions to this problem
                   include the use of conversion rules to generate lexica
                   of regional pronunciations from standard accents and
                   encoding of regional variation by means of keywords.
                   The latter proposal forms the basis of the current
                   work. However, even if we use a keyword system for
                   lexical transcription there are a number of remaining
                   theoretical and methodological problems if we are to
                   synthesise and recognise accents to a high degree of
                   accuracy; these problems are discussed in the following
                   paper.},
  categories = {speech synthesis, lexicon, accents, regional
                   pronunciation, phonology},
  month = dec,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Fitt_1998_b.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Fitt_1998_b.ps},
  year = 1998
}
@inproceedings{Kessens98,
  author = {J.M. Kessens and M. Wester and C. Cucchiarini and H.
                   Strik},
  title = {The Selection of Pronunciation Variants: Comparing the
                   Performance of Man and Machine},
  booktitle = {Proc. of ICSLP '98},
  pages = {2715-2718},
  address = {Sydney},
  abstract = {In this paper the performance of an automatic
                   transcription tool is evaluated. The transcription tool
                   is a Continuous Speech Recognizer (CSR) running in
                   forced recognition mode. For evaluation the performance
                   of the CSR was compared to that of nine expert
                   listeners. Both man and the machine carried out exactly
                   the same task: deciding whether a segment was present
                   or not in 467 cases. It turned out that the performance
                   of the CSR is comparable to that of the experts.},
  categories = {asr, pm, VIOS, Nijmegen},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/kessens.1998.1.pdf},
  year = 1998
}
@inproceedings{Sproat_1998_a,
  author = {Richard Sproat and Andrew Hunt and Mari Ostendorf and
                   Paul Taylor and Alan Black and Kevin Lenzo},
  title = {Sable: a standard for {TTS} markup},
  booktitle = {I{CSLP}98},
  volume = 5,
  pages = {1719-1724},
  address = {Sydney, Australia},
  categories = {markup, sable},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Sproat_1998_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Sproat_1998_a.ps},
  year = 1998
}