2001.bib

@comment{{This file has been generated by bib2bib 1.92}}
@comment{{Command line: /home/korin/bibtex2html-1.92-LINUX/bib2bib -oc /home/korin/projects/publications/new_output/transitdata/2001-citations -ob /home/korin/projects/publications/new_output/transitdata/2001.bib -c 'year : "2001"' /home/korin/projects/publications/filtlists/full_publications_list.bib}}
@inproceedings{koumpis-eurospeech01,
  author = {K.~Koumpis and S.~Renals and M.~Niranjan},
  title = {Extractive Summarization of Voicemail using Lexical
                   and Prosodic Feature Subset Selection},
  booktitle = {Proc. Eurospeech},
  pages = {2377--2380},
  address = {Aalborg, Denmark},
  abstract = {This paper presents a novel data-driven approach to
                   summarizing spoken audio transcripts utilizing lexical
                   and prosodic features. The former are obtained from a
                   speech recognizer and the latter are extracted
                   automatically from speech waveforms. We employ a
                   feature subset selection algorithm, based on ROC
                   curves, which examines different combinations of
                   features at different target operating conditions. The
                   approach is evaluated on the IBM Voicemail corpus,
                   demonstrating that it is possible and desirable to
                   avoid complete commitment to a single best classifier
                   or feature set.},
  categories = {voicemail,summarization,prosody,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/eurospeech01.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/eurospeech01.ps.gz},
  year = 2001
}
@inproceedings{Sagayama2001ISCA08a,
  author = {Shigeki Sagayama and Yutaka Kato and Mitsuru Nakai and
                   Hiroshi Shimodaira},
  title = {{Jacobian Approach to Joint Adaptation to Noise,
                   Channel and Vocal Tract Length}},
  booktitle = {Proc. ISCA Workshop on Adaptation Methods (Sophia
                   Antipolis, France)},
  pages = {117--120},
  categories = {asr, jaist},
  month = aug,
  year = 2001
}
@inproceedings{Goubanova:2001,
  author = {Goubanova, O.},
  title = {Predicting segmental durations using {B}ayesian
                   {B}elief Networks},
  booktitle = {CD-ROM Proc. 4th ISCA Tutorial and Research Workshop
                   on Speech Synthesis},
  address = {Scotland, UK},
  year = 2001
}
@inproceedings{frankel01:alternative,
  author = {Frankel, J. and King, S.},
  title = {Speech recognition in the articulatory domain:
                   investigating an alternative to acoustic {HMM}s},
  booktitle = {Proc. Workshop on Innovations in Speech Processing},
  abstract = {We describe a speech recognition system which uses a
                   combination of acoustic and articulatory features as
                   input. Linear dynamic models capture the trajectories
                   which characterize each segment type. We describe
                   classification and recognition tasks for systems based
                   on acoustic data in conjunction with both real and
                   automatically recovered articulatory parameters.},
  categories = {am,artic,asr,ldm,mocha,edinburgh},
  month = apr,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Frankel_King_WISP2001.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Frankel_King_WISP2001.ps},
  year = 2001
}
@inproceedings{koumpis-icoin01,
  author = {K.~Koumpis and C.~Ladas and S. Renals},
  title = {An Advanced Integrated Architecture for Wireless
                   Voicemail Retrieval},
  booktitle = {Proc. 15th IEEE International Conference on
                   Information Networking},
  pages = {403--410},
  abstract = {This paper describes an alternative architecture for
                   voicemail data retrieval on the move. It is comprised
                   of three distinct components: a speech recognizer, a
                   text summarizer and a WAP push service initiator,
                   enabling mobile users to receive a text summary of
                   their voicemail in realtime without an explicit
                   request. Our approach overcomes the cost and usability
                   limitations of the conventional voicemail retrieval
                   paradigm which requires a connection establishment in
                   order to listen to spoken messages. We report
                   performance results on all different components of the
                   system which has been trained on a database containing
                   1843 North American English messages as well as on the
                   duration of the corresponding data path. The proposed
                   architecture can be further customized to meet the
                   requirements of a complete voicemail value-added
                   service.},
  categories = {voicemail,summarization,sheffield},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/icoin01.ps.gz},
  year = 2001
}
@inproceedings{renals-trec01,
  author = {S.~Renals and D.~Abberley},
  title = {The {THISL} {SDR} system at {TREC}--9},
  booktitle = {Proc. Ninth Text Retrieval Conference (TREC--9)},
  pages = {},
  abstract = {This paper describes our participation in the TREC-9
                   Spoken Document Retrieval (SDR) track. The THISL SDR
                   system consists of a realtime version of a hybrid
                   connectionist/HMM large vocabulary speech recognition
                   system and a probabilistic text retrieval system. This
                   paper describes the configuration of the speech
                   recognition and text retrieval systems, including
                   segmentation and query expansion. We report our results
                   for development tests using the TREC-8 queries, and for
                   the TREC-9 evaluation.},
  categories = {thisl,bnews,trec,ir,recognition,eval,abbot,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/trec9-proc.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/trec9-proc.ps.gz},
  year = 2001
}
@inproceedings{christensen-prosody01,
  author = {H.~Christensen and Y.~Gotoh and S.~Renals},
  title = {Punctuation Annotation using Statistical Prosody
                   Models},
  booktitle = {Proc. ISCA Workshop on Prosody in Speech Recognition
                   and Understanding},
  pages = {},
  address = {Red Bank, NJ, USA},
  abstract = {This paper is about the development of statistical
                   models of prosodic features to generate linguistic
                   meta-data for spoken language. In particular, we are
                   concerned with automatically punctuating the output of
                   a broadcast news speech recogniser. We present a
                   statistical finite state model that combines prosodic,
                   linguistic and punctuation class features. Experimental
                   results are presented using the Hub-4 Broadcast News
                   corpus, and in the light of our results we discuss the
                   issue of a suitable method of evaluating the present
                   task.},
  categories = {stobs,ie,lm,prosody,bnews,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/pros01-punc.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/pros01-punc.ps.gz},
  year = 2001
}
@article{mayosturkwatson:01,
  author = {Mayo, C. and Turk, A. and Watson, J.},
  title = {Flexibility of acoustic cue weighting in children's
                   speech perception},
  journal = {Journal of the Acoustical Society of America},
  volume = 109,
  pages = {2313},
  categories = {speech perception, development, cue weighting},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/JASA-Mayo-Turk-Watson.pdf},
  year = 2001
}
@inproceedings{Fujinaga2001ICASSP,
  author = {Katsuhisa Fujinaga and Mitsuru Nakai and Hiroshi
                   Shimodaira and Shigeki Sagayama},
  title = {{Multiple-Regression Hidden Markov Model}},
  booktitle = {Proc. ICASSP 2001},
  categories = {asr, jaist},
  month = may,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Fujinaga2001ICASSP.pdf},
  year = 2001
}
@inproceedings{frankel01:ASR,
  author = {Frankel, J. and King, S.},
  title = {{ASR} - Articulatory Speech Recognition},
  booktitle = {Proc. {E}urospeech},
  pages = {599-602},
  address = {Aalborg, Denmark},
  abstract = {In this paper we report recent work on a speech
                   recognition system using a combination of acoustic and
                   articulatory features as input. Linear dynamic models
                   are used to capture the trajectories which characterize
                   each segment type. We describe classification and
                   recognition tasks for systems based on acoustic data in
                   conjunction with both real and automatically recovered
                   articulatory parameters.},
  categories = {am,artic,asr,ldm,mocha,edinburgh},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Frankel_King_Eurospeech2001.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Frankel_King_Eurospeech2001.ps},
  year = 2001
}
@article{Wester-01,
  author = {M. Wester and J. M. Kessens and C. Cucchiarini and H.
                   Strik},
  title = {Obtaining phonetic transcriptions: a comparison
                   between expert listeners and a continuous speech
                   recognizer},
  journal = {Language and Speech},
  volume = {44(3)},
  pages = {377-403},
  abstract = {In this article, we address the issue of using a
                   continuous speech recognition tool to obtain phonetic
                   or phonological representations of speech. Two
                   experiments were carried out in which the performance
                   of a continuous speech recognizer (CSR) was compared to
                   the performance of expert listeners in a task of
                   judging whether a number of prespecified phones had
                   been realized in an utterance. In the first experiment,
                   nine expert listeners and the CSR carried out exactly
                   the same task: deciding whether a segment was present
                   or not in 467 cases. In the second experiment, we
                   expanded on the first experiment by focusing on two
                   phonological processes: schwa-deletion and
                   schwa-insertion. The results of these experiments show
                   that significant differences in performance were found
                   between the CSR and the listeners, but also between
                   individual listeners. Although some of these
                   differences appeared to be statistically significant,
                   their magnitude is such that they may very well be
                   acceptable depending on what the transcriptions are
                   needed for. In other words, although the CSR is not
                   infallible, it makes it possible to explore large
                   datasets, which might outweigh the errors introduced by
                   the mistakes the CSR makes. For these reasons, we can
                   conclude that the CSR can be used instead of a listener
                   to carry out this type of task: deciding whether a
                   phone is present or not.},
  categories = {automatic transcription, pm, VIOS, Nijmegen},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/wester.2001.3.pdf},
  year = 2001
}
@article{Shimodaira2001NIPS,
  author = {Hiroshi Shimodaira and Ken-ichi Noma and Mitsuru Nakai
                   and Shigeki Sagayama},
  title = {{Dynamic Time-Alignment Kernel in Support Vector
                   Machine}},
  journal = {Advances in Neural Information Processing Systems 14,
                   NIPS2001},
  volume = {2},
  pages = {921--928},
  abstract = { A new class of Support Vector Machine (SVM) that is
                   applicable to sequential-pattern recognition such as
                   speech recognition is developed by incorporating an
                   idea of non-linear time alignment into the kernel
                   function. Since the time-alignment operation of
                   sequential pattern is embedded in the new kernel
                   function, standard SVM training and classification
                   algorithms can be employed without further
                   modifications. The proposed SVM (DTAK-SVM) is evaluated
                   in speaker-dependent speech recognition experiments of
                   hand-segmented phoneme recognition. Preliminary
                   experimental results show comparable recognition
                   performance with hidden Markov models (HMMs). },
  categories = {ml, svm, jaist},
  month = dec,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Shimodaira2001NIPS.pdf},
  year = 2001
}
@inproceedings{Chang-Euro-01,
  author = {S. Chang and S. Greenberg and M. Wester},
  title = {An Elitist Approach to Articulatory-Acoustic Feature
                   Classification},
  booktitle = {Proc. of Eurospeech '01},
  pages = {1729-1733},
  address = {Aalborg},
  abstract = {A novel framework for automatic articulatory-acoustic
                   feature extraction has been developed for enhancing the
                   accuracy of place- and manner-of-articulation
                   classification in spoken language. The elitist approach
                   focuses on frames for which neural network (MLP)
                   classifiers are highly confident, and discards the
                   rest. Using this method, it is possible to achieve a
                   frame-level accuracy of 93\% for manner information on
                   a corpus of American English sentences passed through a
                   telephone network (NTIMIT). Place information is
                   extracted for each manner class independently,
                   resulting in an appreciable gain in place-feature
                   classification relative to performance for a manner-
                   independent system. The elitist framework provides a
                   potential means of automatically annotating a corpus at
                   the phonetic level without recourse to a word-level
                   transcript and could thus be of utility for developing
                   training materials for automatic speech recognition and
                   speech synthesis applications, as well as aid the
                   empirical study of spoken language.},
  categories = {aaf, NTIMIT, Berkeley},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/wester.2001.2.pdf},
  year = 2001
}
@inproceedings{Keeni2001SPPRA,
  author = {Kanad Keeni and Kunio Goto and Hiroshi Shimodaira},
  title = {{On Extraction of E-Mail Address from Fax Message for
                   Automatic Delivery to Individual Recipient}},
  booktitle = {IASTED International Conference on Siganl Processing
                   Pattern Recognition and Application},
  categories = {nn, jaist},
  month = jul,
  year = 2001
}
@inproceedings{fitt_eurospeech01_b,
  author = {Sue Fitt},
  title = {Morphological approaches for an {E}nglish
                   pronunciation lexicon},
  booktitle = {Proc. Eurospeech 2001},
  address = {Aalborg},
  abstract = {Most pronunciation lexica for speech synthesis in
                   English take no account of morphology. Here we
                   demonstrate the benefits of including a morphological
                   breakdown in the transcription. These include
                   maintaining consistency, developing the symbol set and
                   providing the environmental description for allophones
                   and phonetic variables. Our approach does not use a
                   full morphological generator, but includes morphlogical
                   boundaries in the lexicon.},
  categories = {speech synthesis, morphology, lexica},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Fitt_2001_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Fitt_2001_a.ps},
  year = 2001
}
@inproceedings{Nakai2001ICDAR,
  author = {Mitsuru Nakai and Naoto Akira and Hiroshi Shimodaira
                   and Shigeki Sagayama},
  title = {{Substroke Approach to {HMM}-based On-line Kanji
                   Handwriting Recognition}},
  booktitle = {Proc. of ICDAR'01},
  pages = {491--495},
  abstract = { A new method is proposed for on-line handwriting
                   recognition of Kanji characters. The method employs
                   substroke HMMs as minimum units to constitute Japanese
                   Kanji characters and utilizes the direction of pen
                   motion. The main motivation is to fully utilize the
                   continuous speech recognition algorithm by relating
                   sentence speech to Kanji character, phonemes to
                   substrokes, and grammar to Kanji structure. The
                   proposed system consists input feature analysis,
                   substroke HMMs, a character structure dictionary and a
                   decoder. The present approach has the following
                   advantages over the conventional methods that employ
                   whole character HMMs. 1) Much smaller memory
                   requirement for dictionary and models. 2) Fast
                   recognition by employing efficient substroke network
                   search. 3) Capability of recognizing characters not
                   included in the training data if defined as a sequence
                   of substrokes in the dictionary. 4) Capability of
                   recognizing characters written by various different
                   stroke orders with multiple definitions per one
                   character in the dictionary. 5) Easiness in HMM
                   adaptation to the user with a few sample character
                   data. },
  categories = {hwr, jaist},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Nakai2001ICDAR.pdf},
  year = 2001
}
@inproceedings{koumpis-prosody01,
  author = {K.~Koumpis and S.~Renals},
  title = {The role of prosody in a voicemail summarization
                   system},
  booktitle = {Proc. ISCA Workshop on Prosody in Speech Recognition
                   and Understanding},
  address = {Red Bank, NJ, USA},
  abstract = {When a speaker leaves a voicemail message there are
                   prosodic cues that emphasize the important points in
                   the message, in addition to lexical content. In this
                   paper we compare and visualize the relative
                   contribution of these two types of features within a
                   voicemail summarization system. We describe the
                   system's ability to generate summaries of two test
                   sets, having trained and validated using 700 messages
                   from the IBM Voicemail corpus. Results measuring the
                   quality of summary artifacts show that combined lexical
                   and prosodic features are at least as robust as
                   combined lexical features alone across all operating
                   conditions.},
  categories = {voicemail,summarization,prosody,sheffield},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/pros01-vm.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/pros01-vm.ps.gz},
  year = 2001
}
@inproceedings{Sagayama2001ISCA08b,
  author = {Shigeki Sagayama and Koichi Shinoda and Mitsuru Nakai
                   and Hiroshi Shimodaira},
  title = {{Analytic Methods for Acoustic Model Adaptation: A
                   Review}},
  booktitle = {Proc. ISCA Workshop on Adaptation Methods (Sophia
                   Antipolis France)},
  pages = {67--76},
  note = {Invited Paper},
  categories = {asr, jaist},
  journal = {},
  month = aug,
  year = 2001
}
@inproceedings{Wester-Chang-01,
  author = {M. Wester and S. Greenberg and S. Chang},
  title = {A {D}utch Treatment of an Elitist Approach to
                   Articulatory-Acoustic Feature Classification},
  booktitle = {Proc. of Eurospeech '01},
  pages = {1729-1732},
  address = {Aalborg},
  abstract = {A novel approach to articulatory-acoustic feature
                   extraction has been developed for enhancing the
                   accuracy of classification associated with place and
                   manner of articulation information. This elitist
                   approach is tested on a corpus of spontaneous Dutch
                   using two different systems, one trained on a subset of
                   the same corpus, the other trained on a corpus from a
                   different language (American English). The feature
                   dimensions, voicing and manner of articulation transfer
                   relatively well between the two languages. However,
                   place information transfers less well. Manner-specific
                   training can be used to improve classification of
                   articulatory place information.},
  categories = {aaf, NTIMIT, VIOS, Berkeley},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/wester.2001.1.pdf},
  year = 2001
}
@inproceedings{fitt_eurospeech01a,
  author = {Sue Fitt},
  title = {Using real words for recording diphones},
  booktitle = {Proc. Eurospeech 2001},
  abstract = {This paper focuses on the creation of word-lists for
                   making diphone recordings for speech synthesis. Such
                   lists often consist of nonsense words, which has the
                   advantage that the phonetic environment can be
                   constrained, and it is easy to produce lists containing
                   all possible combinations. However, this approach has
                   the disadvantage that non-experts may find it difficult
                   to read the nonsense-word transcriptions. For this
                   reason, we investigate here the issues associated with
                   the use of real words in creating diphone recordings.},
  categories = {speech synthesis, recordings, diphones},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Fitt_2001_b.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Fitt_2001_b.ps},
  year = 2001
}
@inproceedings{richmond2001,
  author = {Richmond, K.},
  title = {Mixture Density Networks, Human Articulatory Data and
                   Acoustic-to-Articulatory Inversion of Continuous Speech},
  booktitle = {Proc. Workshop on Innovation in Speech Processing},
  pages = {259--276},
  organization = {Institute of Acoustics},
  categories = {artic, ann, mlp, mdn, inversion, mocha, edinburgh},
  key = {richmond2001},
  month = apr,
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Richmond_2001_a.ps},
  year = 2001
}