Publications by Sue Fitt

sue.bib

@inproceedings{fitt_eurospeech97,
  author = {Fitt, Sue},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1997/Fitt_1997_a.ps},
  title = {The generation of regional pronunciations of {E}nglish for speech synthesis},
  booktitle = {Proc. Eurospeech 1997},
  year = {1997},
  month = {September},
  address = {Rhodes, Greece},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1997/Fitt_1997_a.pdf},
  abstract = {Most speech synthesisers and recognisers for English currently use pronunciation lexicons in standard British or American accents, but as use of speech technology grows there will be more demand for the incorporation of regional accents. This paper describes the use of rules to transform existing lexicons of standard British and American pronunciations to a set of regional British and American accents. The paper briefly discusses some features of the regional accents in the project, and the framework used for generatiing pronunciations. Certain theoretical and practical problems are highlighted; for some of these, solutions are suggested, but it is shown that some difficulties cannot be resolved by automatic rules. However, althought the method described cannot produce phonetic transcriptions with 100\% accuracy, it is more accurate than using letter-to-sound rules, and faster than producing transcriptions by hand.},
  categories = {speech synthesis, lexicon, accents, regional pronunciation}
}

@inproceedings{fitt_isard_eurospeech99,
  author = {Fitt, Sue and Isard, Steve},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Fitt_1999_a.ps},
  title = {Synthesis of regional {E}nglish using a keyword lexicon},
  booktitle = {Proc. Eurospeech 1999},
  year = {1999},
  abstract = {We discuss the use of an accent-independent keyword lexicon to synthesise speakers with different regional accents. The paper describes the system architecture and the transcription system used in the lexicon, and then focuses on the construction of word-lists for recording speakers. We illustrate by mentioning some of the features of Scottish and Irish English, which we are currently synthesising, and describe how these are captured by keyword synthesis.},
  month = {September},
  volume = {2},
  address = {Budapest},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Fitt_1999_a.pdf},
  pages = {823-826},
  categories = {speech synthesis, lexicon, accents, regional pronunciation}
}

@inproceedings{fitt_richmond_interspeech06,
  author = {Fitt, Sue and Richmond, Korin},
  title = {Redundancy and productivity in the speech technology lexicon - can we do better?},
  booktitle = {Proc. Interspeech 2006},
  month = {September},
  year = {2006},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/Fitt_2006.pdf},
  abstract = {Current lexica for speech technology typically contain much redundancy, while omitting useful information. A comparison with lexica in other media and for other purposes is instructive, as it highlights some features we may borrow for text-to-speech and speech recognition lexica. We describe some aspects of the new lexicon we are producing, Combilex, whose structure and implementation is specifically designed to reduce redundancy and improve the representation of productive elements of English. Most importantly, many English words are predictable derivations of baseforms, or compounds. Storing the lexicon as a combination of baseforms and derivational rules speeds up lexicon development, and improves coverage and maintainability.},
  categories = {dictionary, lexicon, pronunciation, English accents, productivity, derivation, redundancy, relational database}
}

@inproceedings{fitt_icphs99,
  author = {Fitt, Sue},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Fitt_1999_b.ps},
  title = {The treatment of vowels preceding 'r' in a keyword lexicon of {E}nglish},
  booktitle = {Proc. ICPhS 1999},
  month = {August},
  year = {1999},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1999/Fitt_1999_b.pdf},
  abstract = {Work is progressing on a keyword lexicon aimed at enabling the synthesis of various regional accents of English. This paper focuses on a particular issue, that of vowels before orthographic 'r'. These vowels are discussed with respect to rhotic and non-rhotic accents, in terms of both keyword sets and phonetic realisation. Criteria for the use of keysymbols are discussed, and it is noted that these criteria result in inclusion of post-vocalic /r/ in the lexicon, with deletion by rule for non-rhotic accents. It is noted that some keyvowels in our original set have had to be split, while others may prove to be reduncant.},
  categories = {speech synthesis, lexicon, accents, regional pronunciation, rhotic}
}

@inproceedings{richmond_interspeech2010,
  author = {Richmond, Korin and Clark, Robert and Fitt, Sue},
  title = {On Generating {C}ombilex Pronunciations via Morphological Analysis},
  booktitle = {Proc. Interspeech},
  year = {2010},
  abstract = {Combilex is a high-quality lexicon that has been developed specifically for speech technology purposes and recently released by CSTR. Combilex benefits from many advanced features. This paper explores one of these: the ability to generate fully-specified transcriptions for morphologically derived words automatically. This functionality was originally implemented to encode the pronunciations of derived words in terms of their constituent morphemes, thus accelerating lexicon development and ensuring a high level of consistency. In this paper, we propose this method of modelling pronunciations can be exploited further by combining it with a morphological parser, thus yielding a method to generate full transcriptions for unknown derived words. Not only could this accelerate adding new derived words to Combilex, but it could also serve as an alternative to conventional letter-to-sound rules. This paper presents preliminary work indicating this is a promising direction.},
  month = {September},
  address = {Makuhari, Japan},
  keywords = {combilex lexicon, letter-to-sound rules, grapheme-to-phoneme conversion, morphological decomposition},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2010/IS100683.pdf},
  pages = {1974--1977}
}

@inproceedings{schmidt_fitt_scott_jack_eurospeech93,
  author = {Schmidt, Mark and Fitt, Sue and Scott, Christina and Jack, Mervin},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1993/Fitt_1993_a.ps},
  title = {Phonetic transcription standards for {E}uropean names ({ONOMASTICA}).},
  booktitle = {Proc. Eurospeech 1993},
  month = {September},
  year = {1993},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1993/Schmidt_1993_a.pdf},
  abstract = {This paper details the standards identified for phonetic transcription of names as part of the ONOMASTICA project, a European-wide research initiative for the construction of a multi-language pronunciation lexicon of proper names. The main design criteria adopted by the consortium for the development of this multi-language pronunciation dictionary are discussed, including aspects such as phonetic transcription standards, definitions of quality, quality control mechanisms and language specific details concerning phonetic transcription and the annotation of the language of origin.},
  categories = {multi-language dictionary, proper names, phonetic transcription standards, quality control}
}

@inproceedings{fitt_icos96,
  author = {Fitt, Sue},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1996/Fitt_1996_a.ps},
  title = {Spelling unfamiliar names},
  booktitle = {Proc. International Congress of Onomastic Sciences 1996},
  year = {1996},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1996/Fitt_1996_a.pdf},
  abstract = {This paper will examine the written transcription of unfamiliar spoken names. It is well documented that the writing of personal and place names by people who are unfamiliar with the spelling of the name contributes to the evolution of names. The current paper describes a study which examines the processes involved, using experiments in which Scottish subjects are asked to write down unfamiliar spoken British and European town names.},
  categories = {names, onomastics, orthography, phonology}
}

@phdthesis{fitt_thesis98,
  author = {Fitt, Sue},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/thesis.ps},
  school = {The Centre for Speech Technology Research, Edinburgh University},
  title = {Processing unfamiliar words - a study in the perception and production of native and foreign placenames},
  abstract = {This thesis sets out to examine some of the linguistic processes which take place when speakers are faced with unfamiliar and potentially foreign place names, and the possible psycholinguistic origins of these processes. It is concluded that lexical networks are used to map from input to output, and that phonological rule-based models do not fully account for the data. Previous studies of nativisation have tended to catalogue the phonological and spelling changes which have taken place in historical examples, and explanations have generally been limited to comparison of details of the borrowed and borrowing languages, rather than being set in a solid linguistic framework describing the ways in which speakers and readers process words. There have been psycholinguistic studies of unfamiliar words, but these have generally ignored the foreign dimension, and have been limited in scope. Traditional linguistic work, meanwhile, focuses on descriptions, either abstract or more related to mental processes, of the language that we know and use every day. Studies of foreign language learning also have a rather different focus from the current work, as they examine what happens when we attempt, over a period of time, to acquire new sounds, vocabulary and grammar. This study takes an experimental approach to nativisation, presenting Edinburgh secondary school pupils with a series of unfamiliar spoken and written European town names, and asking them to reproduce the names either in writing or speech, along with a judgement of origin. The resulting pronunciations and spellings are examined for accuracy, errors and changes, both in perception and production. Different explanations of the output are considered, and it is concluded that models which apply a set of linguistic rules to the input in order to generate an output cannot account for the variety of data produced. Lexicon-based models, on the other hand, using activation of known words or word-sets, and analogy with word-parts, are more able to explain both the details of individual responses and the variety of responses across subjects.},
  year = {1998},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Fitt_1998_c.pdf},
  categories = {pronunciation, onomastics, names, phonology, pseudowords, orthography}
}

@inproceedings{richmond2009a,
  author = {Richmond, K. and Clark, R. and Fitt, S.},
  title = {Robust {LTS} rules with the {Combilex} speech technology lexicon},
  booktitle = {Proc. Interspeech},
  year = {2009},
  abstract = {Combilex is a high quality pronunciation lexicon aimed at speech technology applications that has recently been released by CSTR. Combilex benefits from several advanced features. This paper evaluates one of these: the explicit alignment of phones to graphemes in a word. This alignment can help to rapidly develop robust and accurate letter-to-sound (LTS) rules, without needing to rely on automatic alignment methods. To evaluate this, we used Festival's LTS module, comparing its standard automatic alignment with Combilex's explicit alignment. Our results show using Combilex's alignment improves LTS accuracy: 86.50\% words correct as opposed to 84.49\%, with our most general form of lexicon. In addition, building LTS models is greatly accelerated, as the need to list allowed alignments is removed. Finally, loose comparison with other studies indicates Combilex is a superior quality lexicon in terms of consistency and size.},
  month = {September},
  address = {Brighton, UK},
  keywords = {combilex, letter-to-sound rules, grapheme-to-phoneme conversion},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2009/IS090308.pdf},
  pages = {1295--1298}
}

@inproceedings{fitt_eurospeech01_b,
  author = {Fitt, Sue},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Fitt_2001_a.ps},
  title = {Morphological approaches for an {E}nglish pronunciation lexicon},
  booktitle = {Proc. Eurospeech 2001},
  year = {2001},
  month = {September},
  address = {Aalborg},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Fitt_2001_a.pdf},
  abstract = {Most pronunciation lexica for speech synthesis in English take no account of morphology. Here we demonstrate the benefits of including a morphological breakdown in the transcription. These include maintaining consistency, developing the symbol set and providing the environmental description for allophones and phonetic variables. Our approach does not use a full morphological generator, but includes morphlogical boundaries in the lexicon.},
  categories = {speech synthesis, morphology, lexica}
}

@inproceedings{fitt_isard_icslp98,
  author = {Fitt, Sue and Isard, Steve},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Fitt_1998_b.ps},
  title = {Representing the environments for phonological processes in an accent-independent lexicon for synthesis of {E}nglish},
  booktitle = {Proc. ICSLP 1998},
  year = {1998},
  abstract = {This paper reports on work developing an accent-independent lexicon for use in synthesising speech in English. Lexica which use phonemic transcriptions are only suitable for one accent, and developing a lexicon for a new accent is a long and laborious process. Potential solutions to this problem include the use of conversion rules to generate lexica of regional pronunciations from standard accents and encoding of regional variation by means of keywords. The latter proposal forms the basis of the current work. However, even if we use a keyword system for lexical transcription there are a number of remaining theoretical and methodological problems if we are to synthesise and recognise accents to a high degree of accuracy; these problems are discussed in the following paper.},
  month = {December},
  volume = {3},
  address = {Sydney, Australia},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Fitt_1998_b.pdf},
  pages = {847-850},
  categories = {speech synthesis, lexicon, accents, regional pronunciation, phonology}
}

@inproceedings{fitt_eurospeech01a,
  author = {Fitt, Sue},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Fitt_2001_b.ps},
  title = {Using real words for recording diphones},
  booktitle = {Proc. Eurospeech 2001},
  month = {September},
  year = {2001},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Fitt_2001_b.pdf},
  abstract = {This paper focuses on the creation of word-lists for making diphone recordings for speech synthesis. Such lists often consist of nonsense words, which has the advantage that the phonetic environment can be constrained, and it is easy to produce lists containing all possible combinations. However, this approach has the disadvantage that non-experts may find it difficult to read the nonsense-word transcriptions. For this reason, we investigate here the issues associated with the use of real words in creating diphone recordings.},
  categories = {speech synthesis, recordings, diphones}
}

@inproceedings{richmond2007b,
  author = {Richmond, K. and Strom, V. and Clark, R. and Yamagishi, J. and Fitt, S.},
  title = {Festival Multisyn Voices for the 2007 Blizzard Challenge},
  booktitle = {Proc. Blizzard Challenge Workshop (in Proc. SSW6)},
  year = {2007},
  month = {August},
  key = {richmond2007b},
  address = {Bonn, Germany},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2007/blizzard2007paper.pdf},
  abstract = {This paper describes selected aspects of the Festival Multisyn entry to the Blizzard Challenge 2007. We provide an overview of the process of building the three required voices from the speech data provided. This paper focuses on new features of Multisyn which are currently under development and which have been employed in the system used for this Blizzard Challenge. These differences are the application of a more flexible phonetic lattice representation during forced alignment labelling and the use of a pitch accent target cost component. Finally, we also examine aspects of the speech data provided for this year's Blizzard Challenge and raise certain issues for discussion concerning the aim of comparing voices made with differing subsets of the data provided.},
  categories = {tts, blizzard, multisyn, unit selection}
}

@inproceedings{fitt_eurospeech95,
  author = {Fitt, Sue},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1995/Fitt_1995_a.ps},
  title = {The pronunciation of unfamiliar native and non-native town names},
  booktitle = {Proc. Eurospeech 1995},
  year = {1995},
  month = {September},
  address = {Madrid, Spain},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1995/Fitt_1995_a.pdf},
  abstract = {This paper will discuss pronunciations of unfamiliar names, both British and foreign, by native speakers of English. Most studies which look at peoples' pronunciations of unfamiliar of pseudowords are based on English word-patterns, rather than a cross-language selection, while algorithms for determining the pronunciation of names from a variety of languages do not necessarily tell us how real people behave in such a situation. This paper shows that subjects may use different systems or sub-systems of rules to pronounce unknown names which they perceive to be non-native. If we wish to model human behaviour in novel word pronunciation, we need to take account the fact that, while native speakers are not experts in all foreign languages, neither are they linguistically naive.},
  categories = {pronunciation, onomastics, names, phonology, pseudowords, L2}
}