Publications by Olga Goubanova
s9808417.bib
@inproceedings{Goubanova:2003,
author = {Goubanova, O.},
title = {{B}ayesian Modelling Of Vowel Segment Duration For
Text-to-Speech Synthesis Using Distinctive Features},
booktitle = {Proc. ICPhS 2003},
volume = 3,
pages = {2349},
address = {Barcelona, Spain},
abstract = {We report the results of applying the Bayesian Belief
Network (BN) approach to predicting vowel duration. A
Bayesian inference of the vowel duration is performed
on a hybrid Bayesian network consisting of discrete and
continuous nodes, with the nodes in the network
representing the linguistic factors that affect segment
duration. New to the present research, we model segment
identity factor as a set of distinctive features. The
features chosen were height, frontness, length, and
roundness. We also experimented with a word class
feature that implicitly represents word frequency
information. We contrasted the results of the belief
network model with those of the sums of products (SoP)
model and classification and regression tree (CART)
model. We trained and tested all three models on the
same data. In terms of the RMS error and correlation
coefficient, our BN model performs no worse than SoP
model, and it significantly outperforms CART model.},
categories = {Bayesian, text-to-speech synthesis, duration modelling},
ps = {http://www.cstr.ed.ac.uk/downloads/publications/2003/OGoubanova_icphs2k3.ps},
year = 2003
}
@inproceedings{Goubanova:2001,
author = {Goubanova, O.},
title = {Predicting segmental durations using {B}ayesian
{B}elief Networks},
booktitle = {CD-ROM Proc. 4th ISCA Tutorial and Research Workshop
on Speech Synthesis},
address = {Scotland, UK},
year = 2001
}
@article{goubanova:king:specom2008,
author = {Olga Goubanova and Simon King},
title = {Bayesian networks for phone duration prediction},
journal = {Speech Communication},
volume = {50},
number = {4},
pages = {301-311},
abstract = {In a text-to-speech system, the duration of each phone
may be predicted by a duration model. This model is
usually trained using a database of phones with known
durations; each phone (and the context it appears in)
is characterised by a feature vector that is composed
of a set of linguistic factor values. We describe the
use of a graphical model -- a Bayesian network -- for
predicting the duration of a phone, given the values
for these factors. The network has one discrete
variable for each of the linguistic factors and a
single continuous variable for the phone's duration.
Dependencies between variables (or the lack of them)
are represented in the BN structure by arcs (or missing
arcs) between pairs of nodes. During training, both the
topology of the network and its parameters are learned
from labelled data. We compare the results of the BN
model with results for sums of products and CART models
on the same data. In terms of the root mean square
error, the BN model performs much better than both CART
and SoP models. In terms of correlation coefficient,
the BN model performs better than the SoP model, and as
well as the CART model. A BN model has certain
advantages over CART and SoP models. Training SoP
models requires a high degree of expertise. CART models
do not deal with interactions between factors in any
explicit way. As we demonstrate, a BN model can also
make accurate predictions of a phone's duration, even
when the values for some of the linguistic factors are
unknown.},
categories = {Text-to-speech; Bayesian networks; Duration modelling;
Sums of products; Classification and regression trees},
doi = {10.1016/j.specom.2007.10.002},
month = {April},
year = 2008
}
@inproceedings{Goubanova:2002,
author = {Goubanova, O.},
title = {Forms of Introduction in Map Task Dialogues: Case of
{L2} {Russian} Speakers},
booktitle = {Proc. ICSLP 2002},
address = {Denver, USA},
year = 2002
}
@inproceedings{Goubanova-Taylor:2000,
author = {Goubanova, O. and Taylor, P.},
title = {Using {B}ayesian {B}elief Networks for model duration
in text-to-speech systems},
booktitle = {CD-ROM Proc. ICSLP 2000},
address = {Beijing, China},
year = 2000
}
@inproceedings{goubanova_king_isp05,
author = {Olga Goubanova and Simon King},
title = {Predicting Consonant Duration with {B}ayesian Belief
Networks},
booktitle = {Proc. Interspeech 2005},
address = {Lisbon, Portugal},
abstract = {Consonant duration is influenced by a number of
linguistic factors such as the consonant s identity,
within-word position, stress level of the previous and
following vowels, phrasal position of the word
containing the target consonant, its syllabic position,
identity of the previous and following segments. In our
work, consonant duration is predicted from a Bayesian
belief network (BN) consisting of discrete nodes for
the linguistic factors and a single continuous node for
the consonant s duration. Interactions between factors
are represented as conditional dependency arcs in this
graphical model. Given the parameters of the belief
network, the duration of each consonant in the test set
is then predicted as the value with the maximum
probability. We compare the results of the belief
network model with those of sums-of-products (SoP) and
classification and regression tree (CART) models using
the same data. In terms of RMS error, our BN model
performs better than both CART and SoP models. In terms
of the correlation coefficient, our BN model performs
better than SoP model, and no worse than CART model. In
addition, the Bayesian model reliably predicts
consonant duration in cases of missing or hidden
linguistic factors.},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2005/goubanova_king_isp2005.pdf},
year = 2005
}