The Centre for Speech Technology Research, The university of Edinburgh

Publications by Steve Isard

stepheni.bib

@inproceedings{Williams_1997_a,
  author = {Briony J. Williams and Stephen Isard},
  title = {A keyvowel approach to the synthesis of regional
                   accents of {E}nglish},
  booktitle = {Eurospeech 97},
  address = {Rhodes, Greece},
  categories = {synthesis},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1997/Williams_1997_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1997/Williams_1997_a.ps},
  year = 1997
}
@inproceedings{dzikovskaSIGDIAL20112,
  author = {Dzikovska, Myroslava and Isard, Amy and Bell, Peter
                   and Moore, Johanna and Steinhauser, Natalie and
                   Campbell, Gwendolyn},
  title = {{Beetle II}: an adaptable tutorial dialogue system},
  booktitle = {Proceedings of the SIGDIAL 2011 Conference, demo
                   session},
  pages = {338--340},
  address = {Portland, Oregon},
  publisher = {Association for Computational Linguistics},
  abstract = {We present Beetle II, a tutorial dialogue system which
                   accepts unrestricted language input and supports
                   experimentation with different tutorial planning and
                   dialogue strategies. Our first system evaluation
                   compared two tutorial policies and demonstrated that
                   the system can be used to study the impact of different
                   approaches to tutoring. The system is also designed to
                   allow experimentation with a variety of natural
                   language techniques, and discourse and dialogue
                   strategies.},
  month = jun,
  url = {http://www.aclweb.org/anthology/W11-2041},
  year = 2011
}
@inproceedings{fitt_isard_eurospeech99,
  author = {Sue Fitt and Steve Isard},
  title = {Synthesis of regional {E}nglish using a keyword
                   lexicon},
  booktitle = {Proc. Eurospeech 1999},
  volume = 2,
  pages = {823-826},
  address = {Budapest},
  abstract = {We discuss the use of an accent-independent keyword
                   lexicon to synthesise speakers with different regional
                   accents. The paper describes the system architecture
                   and the transcription system used in the lexicon, and
                   then focuses on the construction of word-lists for
                   recording speakers. We illustrate by mentioning some of
                   the features of Scottish and Irish English, which we
                   are currently synthesising, and describe how these are
                   captured by keyword synthesis.},
  categories = {speech synthesis, lexicon, accents, regional
                   pronunciation},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Fitt_1999_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Fitt_1999_a.ps},
  year = 1999
}
@incollection{Carletta_1995_a,
  author = {Jean Carletta and Amy Isard and Stephen Isard and
                   Jacqueline Kowtko and Gwyneth Doherty-Sneddon and Anne
                   H. Anderson},
  title = {The Coding of Dialogue Structure in a Corpus},
  booktitle = {Proceedings of the Ninth Twente Workshop on Language
                   Technology: Corpus-based Approaches to Dialogue
                   Modelling},
  publisher = {Universiteit Twente, Enschede},
  editor = {J.A. Andernach and S.P. van de Burgt and G.F. van der
                   Hoeven},
  categories = {dialogue},
  year = 1995
}
@inproceedings{isard:king:taylor:kowtko:snowbird95,
  author = {Stephen Isard and Simon King and Paul A. Taylor and
                   Jacqueline Kowtko},
  title = {Prosodic Information in a Speech Recognition System
                   intended for Dialogue},
  booktitle = {IEEE Workshop in speech recognition},
  address = {Snowbird, Utah},
  abstract = {We report on an automatic speech recognition system
                   intended for use in dialogue, whose original aspect is
                   its use of prosodic information for two different
                   purposes. The first is to improve the word level
                   accuracy of the system. The second is to constrain the
                   language model applied to a given utterance by taking
                   into account the way that dialogue context and
                   intonational tune interact to limit the possibilities
                   for what an utterance might be.},
  categories = {},
  year = 1995
}
@inproceedings{DBLP:conf/aied/DzikovskaIBMSCTCS11,
  author = {Myroslava Dzikovska and Amy Isard and Peter Bell and
                   Johanna D. Moore and Natalie B. Steinhauser and
                   Gwendolyn E. Campbell and Leanne S. Taylor and Simon
                   Caine and Charlie Scott},
  title = {Adaptive Intelligent Tutorial Dialogue in the {Beetle
                   II} System},
  booktitle = {Artificial Intelligence in Education - 15th
                   International Conference (AIED 2011), interactive event},
  volume = {6738},
  series = {Lecture Notes in Computer Science},
  pages = {621},
  address = {Auckland, New Zealand},
  publisher = {Springer},
  doi = {10.1007/978-3-642-21869-9_122},
  year = 2011
}
@article{Taylor_1998_b,
  author = {Paul A. Taylor and S. King and S. D. Isard and H.
                   Wright},
  title = {Intonation and Dialogue Context as Constraints for
                   Speech Recognition},
  journal = {Language and Speech},
  volume = 41,
  number = {3},
  pages = {493-512},
  categories = {asr, intonation, dialogue, lm, id4s},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Taylor_1998_b.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Taylor_1998_b.ps},
  year = 1998
}
@inproceedings{king:stephenson:isard:taylor:strachan:icslp1998,
  author = {Simon King and Todd Stephenson and Stephen Isard and
                   Paul Taylor and Alex Strachan},
  title = {Speech Recognition via Phonetically Featured Syllables},
  booktitle = {Proc. {ICSLP} `98},
  pages = {1031-1034},
  address = {Sydney, Australia},
  abstract = {We describe a speech recogniser which uses a speech
                   production-motivated phonetic-feature description of
                   speech. We argue that this is a natural way to describe
                   the speech signal and offers an efficient intermediate
                   parameterisation for use in speech recognition. We also
                   propose to model this description at the syllable
                   rather than phone level. The ultimate goal of this work
                   is to generate syllable models whose parameters
                   explicitly describe the trajectories of the phonetic
                   features of the syllable. We hope to move away from
                   Hidden Markov Models (HMMs) of context-dependent phone
                   units. As a step towards this, we present a preliminary
                   system which consists of two parts: recognition of the
                   phonetic features from the speech signal using a neural
                   network; and decoding of the feature-based description
                   into phonemes using HMMs.},
  categories = {asr},
  month = dec,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/King_Stephenson_Isard_Taylor_Strachan_icslp1998.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/King_Stephenson_Isard_Taylor_Strachan_icslp1998.ps},
  year = 1998
}
@inproceedings{Wright_1999_a,
  author = {H. Wright and Massimo Poesio and Stephen Isard},
  title = {Using high level dialogue information for dialogue act
                   recognition using prosodic features},
  booktitle = {Proceedings of an {ESCA} Tutorial and Research
                   Workshop on Dialogue and Prosody},
  pages = {139-143},
  address = {Eindhoven, The Netherlands},
  categories = {dialogue, prosody, asr},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Wright_1999_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Wright_1999_a.ps},
  year = 1999
}
@article{Carletta_1997_b,
  author = {Jean Carletta and Amy Isard and Stephen Isard and
                   Jacqueline C. Kowtko and Gwyneth Doherty-Sneddon and
                   Anne H. Anderson},
  title = {The reliability of a dialogue structure coding scheme},
  journal = {Computational Linguistics},
  volume = 23,
  number = 1,
  pages = {13-31},
  categories = {dialogue coding},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1997/Carletta_1997_b.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1997/Carletta_1997_b.ps},
  year = 1997
}
@inproceedings{taylor:shimodaira:isard:king:kowtko:icslp1996,
  author = {Paul A. Taylor and Hiroshi Shimodaira and Stephen
                   Isard and Simon King and Jacqueline Kowtko},
  title = {Using Prosodic Information to Constrain Language
                   Models for Spoken dialogue},
  booktitle = {Proc. {ICSLP} `96},
  address = {Philadelphia},
  abstract = {We present work intended to improve speech recognition
                   performance for computer dialogue by taking into
                   account the way that dialogue context and intonational
                   tune interact to limit the possibilities for what an
                   utterance might be. We report here on the extra
                   constraint achieved in a bigram language model
                   expressed in terms of entropy by using separate
                   submodels for different sorts of dialogue acts and
                   trying to predict which submodel to apply by analysis
                   of the intonation of the sentence being recognised.},
  categories = {asr, intonation, dialogue, lm,id4s},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1996/Taylor_1996_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1996/Taylor_1996_a.ps},
  year = 1996
}
@article{Wright-Hastie_2002,
  author = {Helen Wright-Hastie and Massimo Poesio and Stephen
                   Isard},
  title = {Automatically predicting dialogue structure using
                   prosodic features},
  journal = {Speech Communication},
  volume = 36,
  number = {1-2},
  pages = {63-79},
  categories = {dialogue, prosody, recognition},
  year = 2002
}
@inproceedings{bell12_tutoring,
  author = {Bell, Peter and Dzikovska, Myroslava and Isard, Amy},
  title = {Designing a spoken language interface for a tutorial
                   dialogue system},
  booktitle = {Proc. Interspeech},
  address = {Portland, Oregon, USA},
  abstract = {We describe our work in building a spoken language
                   interface for a tutorial dialogue system. Our goal is
                   to allow natural, unrestricted student interaction with
                   the computer tutor, which has been shown to improve the
                   student's learning gain, but presents challenges for
                   speech recognition and spoken language understanding.
                   We discuss the choice of system components and present
                   the results of development experiments in both acoustic
                   and language modelling for speech recognition in this
                   domain.},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2012/tutoring_is2012.pdf},
  year = 2012
}
@inproceedings{Taylor_1992_a,
  author = {Paul A. Taylor and S. D. Isard},
  title = {A New Model of Intonation for Use with Speech
                   Recognition and Synthesis},
  booktitle = {International Conference on Spoken Language Processing},
  address = {Banff, Canada},
  categories = {synthesis, intonation},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1992/Taylor_1992_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1992/Taylor_1992_a.ps},
  year = 1992
}
@article{Campbell_1991_a,
  author = {W. N. Campbell and Stephen D. Isard},
  title = {Segmental Durations in a Syllable Frame},
  journal = {Journal of Phonetics},
  volume = 19,
  pages = {37-47},
  categories = {synthesis, intonation},
  year = 1991
}
@inproceedings{Molloy_1998_a,
  author = {Laurence Molloy and Stephen Isard},
  title = {Suprasegmental Duration Modelling with Elastic
                   Constraints in Automatic Speech Recognition},
  booktitle = {ICSLP},
  volume = 7,
  pages = {2975-2978},
  address = {Sydney, Australia},
  categories = {suprasegmentals, duration, asr, phonetics, prosody},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Molloy_1998_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Molloy_1998_a.ps},
  year = 1998
}
@inproceedings{Hockey_1997_a,
  author = {Beth Ann Hockey and Deborah Rossen-Knill and Beverly
                   Spejewski and Matthew Stone and Stephen Isard},
  title = {Can You Predict Responses to Yes/no Questions? Yes,
                   No, and Stuff},
  booktitle = {Eurospeech '97},
  pages = {2267-2270},
  categories = {dialogue},
  year = 1997
}
@inproceedings{Taylor_1990,
  author = {Paul A. Taylor and Stephen D. Isard},
  title = {Automatic Diphone Segmentation using Hidden Markov
                   Models},
  booktitle = {{SST-90}, Third International Australian Conference in
                   Speech Science and Technology},
  address = {Melbourne, Australia},
  categories = {synthesis, waveform generation},
  year = 1990
}
@inproceedings{Taylor_1991_b,
  author = {Paul A. Taylor and Stephen D. Isard},
  title = {Automatic Diphone Segmentation},
  booktitle = {Proc. Eurospeech '91},
  address = {Genova, Italy},
  categories = {synthesis},
  year = 1991
}
@inproceedings{McKenna_1999_a,
  author = {John McKenna and Stephen Isard},
  title = {Tailoring Kalman Filtering Towards Speaker
                   Characterisation},
  booktitle = {Proc. {E}urospeech '99},
  volume = 6,
  pages = {2793-2796},
  address = {Budapest},
  categories = {signal processing, synthesis},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/McKenna_1999_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/McKenna_1999_a.ps},
  year = 1999
}
@inproceedings{Isard_S_1986,
  author = {Stephen D. Isard and D. A. Miller},
  title = {Diphone Synthesis Techniques},
  booktitle = {{IEEE} Conference Publication no 258},
  pages = {77-82},
  categories = {synthesis, waveform generation},
  year = 1986
}
@inproceedings{taylor:king:isard:wright:kowtko:eurospeech1997,
  author = {Paul A. Taylor and Simon King and Stephen Isard and
                   Helen Wright and Jacqueline Kowtko},
  title = {Using Intonation to Constrain Language Models in
                   Speech Recognition},
  booktitle = {Proc. {E}urospeech'97},
  address = {Rhodes},
  abstract = {This paper describes a method for using intonation to
                   reduce word error rate in a speech recognition system
                   designed to recognise spontaneous dialogue speech. We
                   use a form of dialogue analysis based on the theory of
                   conversational games. Different move types under this
                   analysis conform to different language models.
                   Different move types are also characterised by
                   different intonational tunes. Our overall recognition
                   strategy is first to predict from intonation the type
                   of game move that a test utterance represents, and then
                   to use a bigram language model for that type of move
                   during recognition. point in a game.},
  categories = {asr, intonation, dialogue, lm,id4s},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1997/Taylor_King_Isard_Wright_Kowtko_eurospeech1997.pdf},
  year = 1997
}
@inproceedings{fitt_isard_icslp98,
  author = {Sue Fitt and Steve Isard},
  title = {Representing the environments for phonological
                   processes in an accent-independent lexicon for
                   synthesis of {E}nglish},
  booktitle = {Proc. ICSLP 1998},
  volume = 3,
  pages = {847-850},
  address = {Sydney, Australia},
  abstract = {This paper reports on work developing an
                   accent-independent lexicon for use in synthesising
                   speech in English. Lexica which use phonemic
                   transcriptions are only suitable for one accent, and
                   developing a lexicon for a new accent is a long and
                   laborious process. Potential solutions to this problem
                   include the use of conversion rules to generate lexica
                   of regional pronunciations from standard accents and
                   encoding of regional variation by means of keywords.
                   The latter proposal forms the basis of the current
                   work. However, even if we use a keyword system for
                   lexical transcription there are a number of remaining
                   theoretical and methodological problems if we are to
                   synthesise and recognise accents to a high degree of
                   accuracy; these problems are discussed in the following
                   paper.},
  categories = {speech synthesis, lexicon, accents, regional
                   pronunciation, phonology},
  month = dec,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Fitt_1998_b.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Fitt_1998_b.ps},
  year = 1998
}
@incollection{Isard_S_1996,
  author = {A. Conkie and Stephen D. Isard},
  title = {Optimal Coupling of Diphones},
  booktitle = {Progress in Speech Synthesis},
  publisher = {Springer},
  editor = {Santen, J. P. H. and Sproat, R. W. and Olive, J. P.
                   and Hirschberg},
  categories = {synthesis, waveform generation},
  year = 1996
}
@inproceedings{Isard_S_1988,
  author = {Stephen D. Isard and Mark Pearson},
  title = {A Repertoire of {British} {English} Contours for
                   Speech Synthesis},
  booktitle = {SPEECH '88, 7th FASE Symposium},
  address = {London},
  categories = {synthesis, intonation},
  year = 1988
}
@inproceedings{Campbell_1990,
  author = {W. N. Campbell and Stephen D. Isard and A. I. C.
                   Monaghan and J. Verhoven},
  title = {Duration, Pitch and Diphones in the {CSTR TTS} system},
  booktitle = {ICSLP '90},
  categories = {synthesis, intonation, systems},
  year = 1990
}