2004.bib
@comment{{This file has been generated by bib2bib 1.92}}
@comment{{Command line: /home/korin/bibtex2html-1.92-LINUX/bib2bib -oc /home/korin/projects/publications/new_output/transitdata/2004-citations -ob /home/korin/projects/publications/new_output/transitdata/2004.bib -c 'year : "2004"' /home/korin/projects/publications/filtlists/full_publications_list.bib}}
@inproceedings{leo_04-2,
author = {Enrico Zovato and Stefano Sandri and Silvia Quazza and
Leonardo Badino},
title = {Prosodic analysis of a multi-style corpus in the
perspective of emotional speech synthesis},
booktitle = {Proc. ICSLP 2004},
address = {Jeju, Korea},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/ThA1404p.8_p890.pdf},
year = 2004
}
@inproceedings{shig042,
author = {Yoshinori Shiga and Simon King},
title = {Source-Filter Separation for Articulation-to-Speech
Synthesis},
booktitle = {Proc. ICSLP},
address = {Jeju, Korea},
abstract = {In this paper we examine a method for separating out
the vocal-tract filter response from the voice source
characteristic using a large articulatory database. The
method realises such separation for voiced speech using
an iterative approximation procedure under the
assumption that the speech production process is a
linear system composed of a voice source and a
vocal-tract filter, and that each of the components is
controlled independently by different sets of factors.
Experimental results show that the spectral variation
is evidently influenced by the fundamental frequency or
the power of speech, and that the tendency of the
variation may be related closely to speaker identity.
The method enables independent control over the voice
source characteristic in our articulation-to-speech
synthesis.},
categories = {artic, lbg, clustering, mocha, source-filter,
edinburgh},
month = oct,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/shiga_icslp04b.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/shiga_icslp04b.ps},
year = 2004
}
@article{Wray04-LC04,
author = {Wray, A. and Cox, S.J. and Lincoln, M. and Tryggvason,
J.},
title = {A Formulaic Approach to Translation at the Post
Office: Reading the Signs},
journal = {Language and Communication},
volume = {24},
number = {1},
pages = {59-75},
abstract = {TESSA is an interactive translation system designed to
support transactions between a post office clerk and a
deaf customer. The system translates the clerk's speech
into British Sign Language (BSL), displayed on a
screen, using a specially-developed avatar (virtual
human). TESSA is a context-constrained exemplification
of one of two basic approaches to machine translation,
neither of which can currently fulfil all of the
demands of successful automatic translation. Drawing on
recent research in theoretical psycholinguistics, we
show how TESSA is a convincing prototype model of one
aspect of real human language processing. Ways are
suggested of exploiting this parallel, potentially
offering new possibilities for the future design of
artificial language systems.},
categories = {visicast,sign language,translation,UEA},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/WrayCoxetal2004.pdf},
year = 2004
}
@inproceedings{vepa-king-isca04,
author = {Vepa, J. and King, S.},
title = {Subjective evaluation of join cost and smoothing
methods},
booktitle = {Proc. 5th {ISCA} speech synthesis workshop},
address = {Pittsburgh, USA},
abstract = {In our previous papers, we have proposed join cost
functions derived from spectral distances, which have
good correlations with perceptual scores obtained for a
range of concatenation discontinuities. To further
validate their ability to predict concatenation
discontinuities, we have chosen the best three spectral
distances and evaluated them subjectively in a
listening test. The units for synthesis stimuli are
obtained from a state-of-the-art unit selection
text-to-speech system: `rVoice' from Rhetorical Systems
Ltd. We also compared three different smoothing methods
in this listening test. In this paper, we report
listeners' preferences for each join costs in
combination with each smoothing method.},
categories = {join cost, Kalman filter, smoothing, evaluation,
rVoice, edinburgh},
month = jun,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/vepa_tts04.pdf},
year = 2004
}
@inproceedings{christensen-ecir04,
author = {H. Christensen and B. Kolluru and Y. Gotoh and S.
Renals},
title = {From text summarisation to style-specific
summarisation for broadcast news},
booktitle = {Proc. ECIR--2004},
pages = {},
abstract = {In this paper we report on a series of experiments
investigating the path from text-summarisation to
style-specific summarisation of spoken news stories. We
show that the portability of traditional text
summarisation features to broadcast news is dependent
on the diffusiveness of the information in the
broadcast news story. An analysis of two categories of
news stories (containing only read speech or some
spontaneous speech) demonstrates the importance of the
style and the quality of the transcript, when
extracting the summary-worthy information content.
Further experiments indicate the advantages of doing
style-specific summarisation of broadcast news.},
categories = {s3l,summarization,bnews,edinburgh},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/ecir04.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/ecir04.ps.gz},
year = 2004
}
@inproceedings{vepa_king_icslp2004,
author = {Jithendra Vepa and Simon King},
title = {Subjective Evaluation Of Join Cost Functions Used In
Unit Selection Speech Synthesis},
booktitle = {Proc. 8th International Conference on Spoken Language
Processing (ICSLP)},
address = {Jeju, Korea},
abstract = {In our previous papers, we have proposed join cost
functions derived from spectral distances, which have
good correlations with perceptual scores obtained for a
range of concatenation discontinuities. To further
validate their ability to predict concatenation
discontinuities, we have chosen the best three spectral
distances and evaluated them subjectively in a
listening test. The unit sequences for synthesis
stimuli are obtained from a state-of-the-art unit
selection text-tospeech system: rVoice from Rhetorical
Systems Ltd. In this paper, we report listeners
preferences for each of the three join cost functions.},
month = oct,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/vepa_king_icslp2004.pdf},
year = 2004
}
@inproceedings{dielmann-icassp04,
author = {A. Dielmann and S. Renals},
title = {Dynamic {Bayesian} Networks for Meeting Structuring},
booktitle = {Proc. IEEE ICASSP},
pages = {},
abstract = {This paper is about the automatic structuring of
multiparty meetings using audio information. We have
used a corpus of 53 meetings, recorded using a
microphone array and lapel microphones for each
participant. The task was to segment meetings into a
sequence of meeting actions, or phases. We have adopted
a statistical approach using dynamic Bayesian networks
(DBNs). Two DBN architectures were investigated: a
two-level hidden Markov model (HMM) in which the
acoustic observations were concatenated; and a
multistream DBN in which two separate observation
sequences were modelled. Additionally we have also
explored the use of counter variables to constrain the
number of action transitions. Experimental results
indicate that the DBN architectures are an improvement
over a simple baseline HMM, with the multistream DBN
with counter constraints producing an action error rate
of 6\%.},
categories = {m4,multimodal,dbn,meetings,edinburgh},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/icassp04-m4.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/icassp04-m4.ps.gz},
year = 2004
}
@incollection{vepa:king:joincostchapter2004,
author = {Jithendra Vepa and Simon King},
title = {Join Cost for Unit Selection Speech Synthesis},
booktitle = {Speech Synthesis},
publisher = {Prentice Hall},
editor = {Alwan, Abeer and Narayanan, Shri},
categories = {},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/Vepa_King_joincostchapter2004.ps},
year = 2004
}
@inproceedings{mayoturk:04b,
author = {Mayo, C. and Turk, A.},
title = {The Development of Perceptual Cue Weighting Within and
Across Monosyllabic Words},
booktitle = {LabPhon 9, University of Illinois at Urbana-Champaign},
categories = {speech perception, development, cue weighting},
year = 2004
}
@inproceedings{wester04:asynch,
author = {Wester, M. and Frankel, J. and King, S.},
title = {Asynchronous Articulatory Feature Recognition Using
Dynamic {B}ayesian Networks},
booktitle = {Proc. IEICI Beyond HMM Workshop},
address = {Kyoto},
abstract = {This paper builds on previous work where dynamic
Bayesian networks (DBN) were proposed as a model for
articulatory feature recognition. Using DBNs makes it
possible to model the dependencies between features, an
addition to previous approaches which was found to
improve feature recognition performance. The DBN
results were promising, giving close to the accuracy of
artificial neural nets (ANNs). However, the system was
trained on canonical labels, leading to an overly
strong set of constraints on feature co-occurrence. In
this study, we describe an embedded training scheme
which learns a set of data-driven asynchronous feature
changes where supported in the data. Using a subset of
the OGI Numbers corpus, we describe articulatory
feature recognition experiments using both
canonically-trained and asynchronous DBNs. Performance
using DBNs is found to exceed that of ANNs trained on
an identical task, giving a higher recognition
accuracy. Furthermore, inter-feature dependencies
result in a more structured model, giving rise to fewer
feature combinations in the recognition output. In
addition to an empirical evaluation of this modelling
approach, we give a qualitative analysis, comparing
asynchrony found through our data-driven methods to the
asynchrony which may be expected on the basis of
linguistic knowledge.},
categories = {am,artic,asr,dbn,oginumbers,edinburgh},
month = dec,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/Wester_et_al_IEICE.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/Wester_et_al_IEICE.ps},
year = 2004
}
@inproceedings{Toney2004,
author = {Toney, D. and Feinberg, D. and Richmond, K.},
title = {Acoustic Features for Profiling Mobile Users of
Conversational Interfaces},
booktitle = {6th International Symposium on Mobile Human-Computer
Interaction - {MobileHCI} 2004},
editor = {Brewster, S. and Dunlop, M.},
pages = {394--398},
address = {Glasgow, Scotland},
publisher = {Springer},
abstract = {Conversational interfaces allow human users to use
spoken language to interact with computer-based
information services. In this paper, we examine the
potential for personalizing speech-based human-computer
interaction according to the user's gender and age. We
describe a system that uses acoustic features of the
user's speech to automatically estimate these physical
characteristics. We discuss the difficulties of
implementing this process in relation to the high level
of environmental noise that is typical of mobile
human-computer interaction.},
month = sep,
year = 2004
}
@inproceedings{shig043,
author = {Yoshinori Shiga and Simon King},
title = {Estimating detailed spectral envelopes using
articulatory clustering},
booktitle = {Proc. ICSLP},
address = {Jeju, Korea},
abstract = {This paper presents an articulatory-acoustic mapping
where detailed spectral envelopes are estimated. During
the estimation, the harmonics of a range of F0 values
are derived from the spectra of multiple voiced speech
signals vocalized with similar articulator settings.
The envelope formed by these harmonics is represented
by a cepstrum, which is computed by fitting the peaks
of all the harmonics based on the weighted least square
method in the frequency domain. The experimental result
shows that the spectral envelopes are estimated with
the highest accuracy when the cepstral order is 48--64
for a female speaker, which suggests that representing
the real response of the vocal tract requires
high-quefrency elements that conventional speech
synthesis methods are forced to discard in order to
eliminate the pitch component of speech.},
categories = {artic, lbg, clustering, mocha, harmonic, envelope,
edinburgh},
month = oct,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/shiga_icslp04a.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/shiga_icslp04a.ps},
year = 2004
}
@inproceedings{frankel04:artic_dbn,
author = {Frankel, J. and Wester, M. and King, S.},
title = {Articulatory feature recognition using dynamic
{B}ayesian networks},
booktitle = {Proc. {ICSLP}},
abstract = {This paper describes the use of dynamic Bayesian
networks for the task of articulatory feature
recognition. We show that by modeling the dependencies
between a set of 6 multi-leveled articulatory features,
recognition accuracy is increased over an equivalent
system in which features are considered independent.
Results are compared to those found using artificial
neural networks on an identical task.},
categories = {am,artic,asr,dbn,timit,edinburgh},
month = sep,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/Frankel_et_al_ICSLP2004.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/Frankel_et_al_ICSLP2004.ps},
year = 2004
}
@inproceedings{clarkrichmondking_ssw504,
author = {Robert A.J. Clark and Korin Richmond and Simon King},
title = {Festival 2 -- build your own general purpose unit
selection speech synthesiser},
booktitle = {Proc. 5th {ISCA} workshop on speech synthesis},
abstract = {This paper describes version 2 of the Festival speech
synthesis system. Festival 2 provides a development
environment for concatenative speech synthesis, and now
includes a general purpose unit selection speech
synthesis engine. We discuss various aspects of unit
selection speech synthesis, focusing on the research
issues that relate to voice design and the automation
of the voice development process.},
categories = {synthesis, festival, unitselection},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/clarkrichmondking_ssw504.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/clarkrichmondking_ssw504.ps},
year = 2004
}
@inproceedings{bakerclarkwhite_ssw504,
author = {Rachel Baker and Robert A.J. Clark and Michael White},
title = {Synthesising Contextually Appropriate Intonation in
Limited Domains},
booktitle = {Proc. 5th {ISCA} workshop on speech synthesis},
address = {Pittsburgh, USA},
categories = {synthesis, prosody, intonation, festival},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/bakerclarkwhite_ssw504.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/bakerclarkwhite_ssw504.ps},
year = 2004
}
@inproceedings{Gutkin:King:icslp04,
author = {Alexander Gutkin and Simon King},
title = {Phone classification in pseudo-{E}uclidean Vector
Spaces},
booktitle = {Proc. 8th International Conference on Spoken Language
Processing (ICSLP)},
volume = {II},
pages = {1453--1457},
address = {Jeju Island, Korea},
abstract = { Recently we have proposed a structural framework for
modelling speech, which is based on patterns of
phonological distinctive features, a linguistically
well-motivated alternative to standard vector-space
acoustic models like HMMs. This framework gives
considerable representational freedom by working with
features that have explicit linguistic interpretation,
but at the expense of the ability to apply the wide
range of analytical decision algorithms available in
vector spaces, restricting oneself to more
computationally expensive and less-developed symbolic
metric tools. In this paper we show that a
dissimilarity-based distance-preserving transition from
the original structural representation to a
corresponding pseudo-Euclidean vector space is
possible. Promising results of phone classification
experiments conducted on the TIMIT database are
reported. },
categories = {structural,recognition,acoustic,phonetic_feature,timit,edinburgh},
issn = {1225-441x},
month = oct,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/gutkin_king_icslp04.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/gutkin_king_icslp04.ps.gz},
year = 2004
}
@inproceedings{leo_04-3,
author = {Leonardo Badino},
title = {Chinese Text Word Segmentation Considering Semantic
Links among Sentences},
booktitle = {Proc. ICSLP 2004},
address = {Jeju, Korea},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/ThB2202p.22_p965.pdf},
year = 2004
}
@inproceedings{Gutkin:King:icpr04,
author = {Alexander Gutkin and Simon King},
title = {{S}tructural {R}epresentation of {S}peech for
{P}honetic {C}lassification},
booktitle = {Proc. 17th International Conference on Pattern
Recognition (ICPR)},
volume = 3,
pages = {438--441},
address = {Cambridge, UK},
publisher = {IEEE Computer Society Press},
abstract = { This paper explores the issues involved in using
symbolic metric algorithms for automatic speech
recognition (ASR), via a structural representation of
speech. This representation is based on a set of
phonological distinctive features which is a
linguistically well-motivated alternative to the
``beads-on-a-string'' view of speech that is standard
in current ASR systems. We report the promising results
of phoneme classification experiments conducted on a
standard continuous speech task. },
categories = {structural,recognition,acoustic,phonetic_feature,timit,edinburgh},
isbn = {0-7695-2128-2},
month = aug,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/gutkin_king_icpr04.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/gutkin_king_icpr04.ps.gz},
year = 2004
}
@inproceedings{dielmann-mmsp04,
author = {A. Dielmann and S. Renals},
title = {Multi-stream segmentation of meetings},
booktitle = {Proc. IEEE Workshop on Multimedia Signal Processing},
pages = {},
abstract = {This paper investigates the automatic segmentation of
meetings into a sequence of group actions or phases.
Our work is based on a corpus of multiparty meetings
collected in a meeting room instrumented with video
cameras, lapel microphones and a microphone array. We
have extracted a set of feature streams, in this case
extracted from the audio data, based on speaker turns,
prosody and a transcript of what was spoken. We have
related these signals to the higher level semantic
categories via a multistream statistical model based on
dynamic Bayesian networks (DBNs). We report on a set of
experiments in which different DBN architectures are
compared, together with the different feature streams.
The resultant system has an action error rate of 9\%.},
categories = {m4,multimodal,dbn,meetings,edinburgh},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/dielmann-mmsp04.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/dielmann-mmsp04.ps.gz},
year = 2004
}
@inproceedings{leo_04-4,
author = {Leonardo Badino and Claudia Barolo and Silvia Quazza},
title = {Language independent phoneme mapping for foreign {TTS}},
booktitle = {Proc. 5th ISCA Speech Synthesis Workshop},
address = {Pittsburgh, USA},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/2026.pdf},
year = 2004
}
@inproceedings{abdelhaleem-icassp04,
author = {Y. H. Abdel-Haleem and S. Renals and N. D. Lawrence},
title = {Acoustic space dimensionality selection and
combination using the maximum entropy principle},
booktitle = {Proc. IEEE ICASSP},
pages = {},
abstract = {In this paper we propose a discriminative approach to
acoustic space dimensionality selection based on
maximum entropy modelling. We form a set of constraints
by composing the acoustic space with the space of phone
classes, and use a continuous feature formulation of
maximum entropy modelling to select an optimal feature
set. The suggested approach has two steps: (1) the
selection of the best acoustic space that efficiently
and economically represents the acoustic data and its
variability; (2) the combination of selected acoustic
features in the maximum entropy framework to estimate
the posterior probabilities over the phonetic labels
given the acoustic input. Specific contributions of
this paper include a parameter estimation algorithm
(generalized improved iterative scaling) that enables
the use of negative features, the parameterization of
constraint functions using Gaussian mixture models, and
experimental results using the TIMIT database.},
categories = {ml,maxent,am,recognition,edinburgh,sheffield},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/icassp04-me.pdf},
year = 2004
}
@inproceedings{shig040,
author = {Yoshinori Shiga},
title = {Source-filter separation based on an articulatory
corpus},
booktitle = {One day meeting for young speech researchers ({UK}
meeting)},
address = {University College London, London, United Kingdom},
abstract = {A new approach is presented for estimating voice
source and vocal-tract filter characteristics based on
an articulatory database. From the viewpoint of
acoustics, in order to estimate the transfer function
of a system, both the input and output of the system
need to be observed. In the case of the source-filter
separation problem, however, only the output (i.e.
speech) is observable, and the response of the system
(vocal tract) and the input (voice source) must be
estimated simultaneously. The estimation is hence
theoretically impossible, and consequently the
estimation problem is generally solved approximately by
applying rather oversimplified models. The proposed
approach separates these two characteristics under the
assumption that each of the characteristics is
controlled independently by a different set of factors.
The separation is achieved by iterative approximation
based on the above assumption using a large speech
corpus including electro-magnetic articulograph data.
The proposed approach enables the independent control
of the source and filter characteristics, and thus
contributes toward improving speech quality in speech
synthesis.},
categories = {artic, lbg, clustering, mocha, source-filter,
edinburgh},
month = apr,
year = 2004
}
@inproceedings{calhoun:04,
author = {Calhoun, Sasha},
title = {Phonetic dimensions of intonational categories: the
case of {L}+{H}* and {H}*},
booktitle = {Prosody 2004},
address = {Nara, Japan},
note = {poster},
abstract = {ToBI, in its conception, was an attempt to describe
intonation in terms of phonological categories. An
effect of the success of ToBI in doing this has been to
make it standard to try to characterise all
intonational phonological distinctions in terms of ToBI
distinctions, i.e. segmental alignment of pitch targets
and pitch height as either High or Low. Here we report
a series of experiments which attempted to do this,
linking two supposed phonological categories, theme and
rheme accents, to two controversial ToBI pitch accents
L+H* and H* respectively. Our results suggest a
reanalysis of the dimensions of phonological
intonational distinctions. It is suggested that there
are three layers affecting the intonational contour:
global extrinsic, local extrinsic and intrinsic; and
the theme-rheme distinction may lie in the local
extrinsic layer. It is the similarity both of the
phonetic effects and the semantic information conveyed
by the last two layers that has led to the confusion in
results such as those reported here.},
categories = {prosody, intonational phonology, information
structure, metrical structure, production and
perception experiment},
month = mar,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/calhounsp04.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/calhounsp04.ps},
year = 2004
}
@inproceedings{Gutkin:etal:ets-cam04,
author = {Alexander Gutkin and David Gay and Lev Goldfarb and
Mirjam Wester},
title = {On the {A}rticulatory {R}epresentation of {S}peech
within the {E}volving {T}ransformation {S}ystem
{F}ormalism},
booktitle = {Pattern Representation and the Future of Pattern
Recognition (Proc. Satellite Workshop of 17th
International Conference on Pattern Recognition)},
editor = {Lev Goldfarb},
pages = {57--76},
address = {Cambridge, UK},
abstract = { This paper deals with the formulation of an
alternative, structural, approach to the speech
representation and recognition problem. In this
approach, we require both the representation and the
learning algorithms to be linguistically meaningful and
to naturally represent the linguistic data at hand.
This allows the speech recognition system to discover
the emergent combinatorial structure of the linguistic
classes. The proposed approach is developed within the
ETS formalism, the first formalism in applied
mathematics specifically designed to address the issues
of class and object/event representation. We present an
initial application of ETS to the articulatory
modelling of speech based on elementary physiological
gestures that can be reliably represented as the ETS
primitives. We discuss the advantages of this gestural
approach over prevalent methods and its promising
potential to mathematical modelling and representation
in linguistics. },
categories = {structural,recognition,ets,artic,mocha,edinburgh,unb},
month = aug,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/ets_cam04_dasr.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/ets_cam04_dasr.ps.gz},
year = 2004
}
@inproceedings{shig041,
author = {Yoshinori Shiga and Simon King},
title = {Accurate spectral envelope estimation for
articulation-to-speech synthesis},
booktitle = {Proc. 5th ISCA Speech Synthesis Workshop},
pages = {19--24},
address = {CMU, Pittsburgh, USA},
abstract = {This paper introduces a novel articulatory-acoustic
mapping in which detailed spectral envelopes are
estimated based on the cepstrum, inclusive of the
high-quefrency elements which are discarded in
conventional speech synthesis to eliminate the pitch
component of speech. For this estimation, the method
deals with the harmonics of multiple voiced-speech
spectra so that several sets of harmonics can be
obtained at various pitch frequencies to form a
spectral envelope. The experimental result shows that
the method estimates spectral envelopes with the
highest accuracy when the cepstral order is 48--64,
which suggests that the higher order coeffcients are
required to represent detailed envelopes reflecting the
real vocal-tract responses.},
categories = {artic, lbg, clustering, mocha, harmonic, envelope,
edinburgh},
month = jun,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/shiga_ssw504.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/shiga_ssw504.ps},
year = 2004
}
@inproceedings{leo_04-1,
author = {Leonardo Badino and Claudia Barolo and Silvia Quazza},
title = {A General Approach to {TTS} Reading of Mixed-Language
Texts},
booktitle = {Proc. ICSLP 2004},
address = {Jeju, Korea},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/WeA2401o.5_p1083.pdf},
year = 2004
}
@article{mayoturk:04,
author = {Mayo, C. and Turk, T.},
title = {Adult-child differences in acoustic cue weighting are
influenced by segmental context: Children are not
always perceptually biased towards transitions},
journal = {Journal of the Acoustical Society of America},
volume = 115,
pages = {3184-3194},
categories = {speech perception, development, cue weighting},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/mayo-turk-2004a.pdf},
year = 2004
}