2001.bib
@comment{{This file has been generated by bib2bib 1.92}}
@comment{{Command line: /home/korin/bibtex2html-1.92-LINUX/bib2bib -oc /home/korin/projects/publications/new_output/transitdata/2001-citations -ob /home/korin/projects/publications/new_output/transitdata/2001.bib -c 'year : "2001"' /home/korin/projects/publications/filtlists/full_publications_list.bib}}
@inproceedings{koumpis-eurospeech01,
author = {K.~Koumpis and S.~Renals and M.~Niranjan},
title = {Extractive Summarization of Voicemail using Lexical
and Prosodic Feature Subset Selection},
booktitle = {Proc. Eurospeech},
pages = {2377--2380},
address = {Aalborg, Denmark},
abstract = {This paper presents a novel data-driven approach to
summarizing spoken audio transcripts utilizing lexical
and prosodic features. The former are obtained from a
speech recognizer and the latter are extracted
automatically from speech waveforms. We employ a
feature subset selection algorithm, based on ROC
curves, which examines different combinations of
features at different target operating conditions. The
approach is evaluated on the IBM Voicemail corpus,
demonstrating that it is possible and desirable to
avoid complete commitment to a single best classifier
or feature set.},
categories = {voicemail,summarization,prosody,sheffield},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/eurospeech01.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/eurospeech01.ps.gz},
year = 2001
}
@inproceedings{Sagayama2001ISCA08a,
author = {Shigeki Sagayama and Yutaka Kato and Mitsuru Nakai and
Hiroshi Shimodaira},
title = {{Jacobian Approach to Joint Adaptation to Noise,
Channel and Vocal Tract Length}},
booktitle = {Proc. ISCA Workshop on Adaptation Methods (Sophia
Antipolis, France)},
pages = {117--120},
categories = {asr, jaist},
month = aug,
year = 2001
}
@inproceedings{Goubanova:2001,
author = {Goubanova, O.},
title = {Predicting segmental durations using {B}ayesian
{B}elief Networks},
booktitle = {CD-ROM Proc. 4th ISCA Tutorial and Research Workshop
on Speech Synthesis},
address = {Scotland, UK},
year = 2001
}
@inproceedings{frankel01:alternative,
author = {Frankel, J. and King, S.},
title = {Speech recognition in the articulatory domain:
investigating an alternative to acoustic {HMM}s},
booktitle = {Proc. Workshop on Innovations in Speech Processing},
abstract = {We describe a speech recognition system which uses a
combination of acoustic and articulatory features as
input. Linear dynamic models capture the trajectories
which characterize each segment type. We describe
classification and recognition tasks for systems based
on acoustic data in conjunction with both real and
automatically recovered articulatory parameters.},
categories = {am,artic,asr,ldm,mocha,edinburgh},
month = apr,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Frankel_King_WISP2001.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Frankel_King_WISP2001.ps},
year = 2001
}
@inproceedings{koumpis-icoin01,
author = {K.~Koumpis and C.~Ladas and S. Renals},
title = {An Advanced Integrated Architecture for Wireless
Voicemail Retrieval},
booktitle = {Proc. 15th IEEE International Conference on
Information Networking},
pages = {403--410},
abstract = {This paper describes an alternative architecture for
voicemail data retrieval on the move. It is comprised
of three distinct components: a speech recognizer, a
text summarizer and a WAP push service initiator,
enabling mobile users to receive a text summary of
their voicemail in realtime without an explicit
request. Our approach overcomes the cost and usability
limitations of the conventional voicemail retrieval
paradigm which requires a connection establishment in
order to listen to spoken messages. We report
performance results on all different components of the
system which has been trained on a database containing
1843 North American English messages as well as on the
duration of the corresponding data path. The proposed
architecture can be further customized to meet the
requirements of a complete voicemail value-added
service.},
categories = {voicemail,summarization,sheffield},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/icoin01.ps.gz},
year = 2001
}
@inproceedings{renals-trec01,
author = {S.~Renals and D.~Abberley},
title = {The {THISL} {SDR} system at {TREC}--9},
booktitle = {Proc. Ninth Text Retrieval Conference (TREC--9)},
pages = {},
abstract = {This paper describes our participation in the TREC-9
Spoken Document Retrieval (SDR) track. The THISL SDR
system consists of a realtime version of a hybrid
connectionist/HMM large vocabulary speech recognition
system and a probabilistic text retrieval system. This
paper describes the configuration of the speech
recognition and text retrieval systems, including
segmentation and query expansion. We report our results
for development tests using the TREC-8 queries, and for
the TREC-9 evaluation.},
categories = {thisl,bnews,trec,ir,recognition,eval,abbot,sheffield},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/trec9-proc.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/trec9-proc.ps.gz},
year = 2001
}
@inproceedings{christensen-prosody01,
author = {H.~Christensen and Y.~Gotoh and S.~Renals},
title = {Punctuation Annotation using Statistical Prosody
Models},
booktitle = {Proc. ISCA Workshop on Prosody in Speech Recognition
and Understanding},
pages = {},
address = {Red Bank, NJ, USA},
abstract = {This paper is about the development of statistical
models of prosodic features to generate linguistic
meta-data for spoken language. In particular, we are
concerned with automatically punctuating the output of
a broadcast news speech recogniser. We present a
statistical finite state model that combines prosodic,
linguistic and punctuation class features. Experimental
results are presented using the Hub-4 Broadcast News
corpus, and in the light of our results we discuss the
issue of a suitable method of evaluating the present
task.},
categories = {stobs,ie,lm,prosody,bnews,sheffield},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/pros01-punc.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/pros01-punc.ps.gz},
year = 2001
}
@article{mayosturkwatson:01,
author = {Mayo, C. and Turk, A. and Watson, J.},
title = {Flexibility of acoustic cue weighting in children's
speech perception},
journal = {Journal of the Acoustical Society of America},
volume = 109,
pages = {2313},
categories = {speech perception, development, cue weighting},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/JASA-Mayo-Turk-Watson.pdf},
year = 2001
}
@inproceedings{Fujinaga2001ICASSP,
author = {Katsuhisa Fujinaga and Mitsuru Nakai and Hiroshi
Shimodaira and Shigeki Sagayama},
title = {{Multiple-Regression Hidden Markov Model}},
booktitle = {Proc. ICASSP 2001},
categories = {asr, jaist},
month = may,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Fujinaga2001ICASSP.pdf},
year = 2001
}
@inproceedings{frankel01:ASR,
author = {Frankel, J. and King, S.},
title = {{ASR} - Articulatory Speech Recognition},
booktitle = {Proc. {E}urospeech},
pages = {599-602},
address = {Aalborg, Denmark},
abstract = {In this paper we report recent work on a speech
recognition system using a combination of acoustic and
articulatory features as input. Linear dynamic models
are used to capture the trajectories which characterize
each segment type. We describe classification and
recognition tasks for systems based on acoustic data in
conjunction with both real and automatically recovered
articulatory parameters.},
categories = {am,artic,asr,ldm,mocha,edinburgh},
month = sep,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Frankel_King_Eurospeech2001.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Frankel_King_Eurospeech2001.ps},
year = 2001
}
@article{Wester-01,
author = {M. Wester and J. M. Kessens and C. Cucchiarini and H.
Strik},
title = {Obtaining phonetic transcriptions: a comparison
between expert listeners and a continuous speech
recognizer},
journal = {Language and Speech},
volume = {44(3)},
pages = {377-403},
abstract = {In this article, we address the issue of using a
continuous speech recognition tool to obtain phonetic
or phonological representations of speech. Two
experiments were carried out in which the performance
of a continuous speech recognizer (CSR) was compared to
the performance of expert listeners in a task of
judging whether a number of prespecified phones had
been realized in an utterance. In the first experiment,
nine expert listeners and the CSR carried out exactly
the same task: deciding whether a segment was present
or not in 467 cases. In the second experiment, we
expanded on the first experiment by focusing on two
phonological processes: schwa-deletion and
schwa-insertion. The results of these experiments show
that significant differences in performance were found
between the CSR and the listeners, but also between
individual listeners. Although some of these
differences appeared to be statistically significant,
their magnitude is such that they may very well be
acceptable depending on what the transcriptions are
needed for. In other words, although the CSR is not
infallible, it makes it possible to explore large
datasets, which might outweigh the errors introduced by
the mistakes the CSR makes. For these reasons, we can
conclude that the CSR can be used instead of a listener
to carry out this type of task: deciding whether a
phone is present or not.},
categories = {automatic transcription, pm, VIOS, Nijmegen},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/wester.2001.3.pdf},
year = 2001
}
@article{Shimodaira2001NIPS,
author = {Hiroshi Shimodaira and Ken-ichi Noma and Mitsuru Nakai
and Shigeki Sagayama},
title = {{Dynamic Time-Alignment Kernel in Support Vector
Machine}},
journal = {Advances in Neural Information Processing Systems 14,
NIPS2001},
volume = {2},
pages = {921--928},
abstract = { A new class of Support Vector Machine (SVM) that is
applicable to sequential-pattern recognition such as
speech recognition is developed by incorporating an
idea of non-linear time alignment into the kernel
function. Since the time-alignment operation of
sequential pattern is embedded in the new kernel
function, standard SVM training and classification
algorithms can be employed without further
modifications. The proposed SVM (DTAK-SVM) is evaluated
in speaker-dependent speech recognition experiments of
hand-segmented phoneme recognition. Preliminary
experimental results show comparable recognition
performance with hidden Markov models (HMMs). },
categories = {ml, svm, jaist},
month = dec,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Shimodaira2001NIPS.pdf},
year = 2001
}
@inproceedings{Chang-Euro-01,
author = {S. Chang and S. Greenberg and M. Wester},
title = {An Elitist Approach to Articulatory-Acoustic Feature
Classification},
booktitle = {Proc. of Eurospeech '01},
pages = {1729-1733},
address = {Aalborg},
abstract = {A novel framework for automatic articulatory-acoustic
feature extraction has been developed for enhancing the
accuracy of place- and manner-of-articulation
classification in spoken language. The elitist approach
focuses on frames for which neural network (MLP)
classifiers are highly confident, and discards the
rest. Using this method, it is possible to achieve a
frame-level accuracy of 93\% for manner information on
a corpus of American English sentences passed through a
telephone network (NTIMIT). Place information is
extracted for each manner class independently,
resulting in an appreciable gain in place-feature
classification relative to performance for a manner-
independent system. The elitist framework provides a
potential means of automatically annotating a corpus at
the phonetic level without recourse to a word-level
transcript and could thus be of utility for developing
training materials for automatic speech recognition and
speech synthesis applications, as well as aid the
empirical study of spoken language.},
categories = {aaf, NTIMIT, Berkeley},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/wester.2001.2.pdf},
year = 2001
}
@inproceedings{Keeni2001SPPRA,
author = {Kanad Keeni and Kunio Goto and Hiroshi Shimodaira},
title = {{On Extraction of E-Mail Address from Fax Message for
Automatic Delivery to Individual Recipient}},
booktitle = {IASTED International Conference on Siganl Processing
Pattern Recognition and Application},
categories = {nn, jaist},
month = jul,
year = 2001
}
@inproceedings{fitt_eurospeech01_b,
author = {Sue Fitt},
title = {Morphological approaches for an {E}nglish
pronunciation lexicon},
booktitle = {Proc. Eurospeech 2001},
address = {Aalborg},
abstract = {Most pronunciation lexica for speech synthesis in
English take no account of morphology. Here we
demonstrate the benefits of including a morphological
breakdown in the transcription. These include
maintaining consistency, developing the symbol set and
providing the environmental description for allophones
and phonetic variables. Our approach does not use a
full morphological generator, but includes morphlogical
boundaries in the lexicon.},
categories = {speech synthesis, morphology, lexica},
month = sep,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Fitt_2001_a.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Fitt_2001_a.ps},
year = 2001
}
@inproceedings{Nakai2001ICDAR,
author = {Mitsuru Nakai and Naoto Akira and Hiroshi Shimodaira
and Shigeki Sagayama},
title = {{Substroke Approach to {HMM}-based On-line Kanji
Handwriting Recognition}},
booktitle = {Proc. of ICDAR'01},
pages = {491--495},
abstract = { A new method is proposed for on-line handwriting
recognition of Kanji characters. The method employs
substroke HMMs as minimum units to constitute Japanese
Kanji characters and utilizes the direction of pen
motion. The main motivation is to fully utilize the
continuous speech recognition algorithm by relating
sentence speech to Kanji character, phonemes to
substrokes, and grammar to Kanji structure. The
proposed system consists input feature analysis,
substroke HMMs, a character structure dictionary and a
decoder. The present approach has the following
advantages over the conventional methods that employ
whole character HMMs. 1) Much smaller memory
requirement for dictionary and models. 2) Fast
recognition by employing efficient substroke network
search. 3) Capability of recognizing characters not
included in the training data if defined as a sequence
of substrokes in the dictionary. 4) Capability of
recognizing characters written by various different
stroke orders with multiple definitions per one
character in the dictionary. 5) Easiness in HMM
adaptation to the user with a few sample character
data. },
categories = {hwr, jaist},
month = sep,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Nakai2001ICDAR.pdf},
year = 2001
}
@inproceedings{koumpis-prosody01,
author = {K.~Koumpis and S.~Renals},
title = {The role of prosody in a voicemail summarization
system},
booktitle = {Proc. ISCA Workshop on Prosody in Speech Recognition
and Understanding},
address = {Red Bank, NJ, USA},
abstract = {When a speaker leaves a voicemail message there are
prosodic cues that emphasize the important points in
the message, in addition to lexical content. In this
paper we compare and visualize the relative
contribution of these two types of features within a
voicemail summarization system. We describe the
system's ability to generate summaries of two test
sets, having trained and validated using 700 messages
from the IBM Voicemail corpus. Results measuring the
quality of summary artifacts show that combined lexical
and prosodic features are at least as robust as
combined lexical features alone across all operating
conditions.},
categories = {voicemail,summarization,prosody,sheffield},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/pros01-vm.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/pros01-vm.ps.gz},
year = 2001
}
@inproceedings{Sagayama2001ISCA08b,
author = {Shigeki Sagayama and Koichi Shinoda and Mitsuru Nakai
and Hiroshi Shimodaira},
title = {{Analytic Methods for Acoustic Model Adaptation: A
Review}},
booktitle = {Proc. ISCA Workshop on Adaptation Methods (Sophia
Antipolis France)},
pages = {67--76},
note = {Invited Paper},
categories = {asr, jaist},
journal = {},
month = aug,
year = 2001
}
@inproceedings{Wester-Chang-01,
author = {M. Wester and S. Greenberg and S. Chang},
title = {A {D}utch Treatment of an Elitist Approach to
Articulatory-Acoustic Feature Classification},
booktitle = {Proc. of Eurospeech '01},
pages = {1729-1732},
address = {Aalborg},
abstract = {A novel approach to articulatory-acoustic feature
extraction has been developed for enhancing the
accuracy of classification associated with place and
manner of articulation information. This elitist
approach is tested on a corpus of spontaneous Dutch
using two different systems, one trained on a subset of
the same corpus, the other trained on a corpus from a
different language (American English). The feature
dimensions, voicing and manner of articulation transfer
relatively well between the two languages. However,
place information transfers less well. Manner-specific
training can be used to improve classification of
articulatory place information.},
categories = {aaf, NTIMIT, VIOS, Berkeley},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/wester.2001.1.pdf},
year = 2001
}
@inproceedings{fitt_eurospeech01a,
author = {Sue Fitt},
title = {Using real words for recording diphones},
booktitle = {Proc. Eurospeech 2001},
abstract = {This paper focuses on the creation of word-lists for
making diphone recordings for speech synthesis. Such
lists often consist of nonsense words, which has the
advantage that the phonetic environment can be
constrained, and it is easy to produce lists containing
all possible combinations. However, this approach has
the disadvantage that non-experts may find it difficult
to read the nonsense-word transcriptions. For this
reason, we investigate here the issues associated with
the use of real words in creating diphone recordings.},
categories = {speech synthesis, recordings, diphones},
month = sep,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Fitt_2001_b.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Fitt_2001_b.ps},
year = 2001
}
@inproceedings{richmond2001,
author = {Richmond, K.},
title = {Mixture Density Networks, Human Articulatory Data and
Acoustic-to-Articulatory Inversion of Continuous Speech},
booktitle = {Proc. Workshop on Innovation in Speech Processing},
pages = {259--276},
organization = {Institute of Acoustics},
categories = {artic, ann, mlp, mdn, inversion, mocha, edinburgh},
key = {richmond2001},
month = apr,
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Richmond_2001_a.ps},
year = 2001
}