2013.bib

@comment{{This file has been generated by bib2bib 1.92}}
@comment{{Command line: /home/korin/bibtex2html-1.92-LINUX/bib2bib -oc /home/korin/projects/publications/new_output/transitdata/2013-citations -ob /home/korin/projects/publications/new_output/transitdata/2013.bib -c 'year : "2013"' /home/korin/projects/publications/filtlists/full_publications_list.bib}}
@article{6289354,
  author = {Ling, Z. and Richmond, K. and Yamagishi, J.},
  title = {Articulatory Control of {HMM}-based Parametric Speech
                   Synthesis using Feature-Space-Switched Multiple
                   Regression},
  journal = {Audio, Speech, and Language Processing, IEEE
                   Transactions on},
  volume = {21},
  number = {1},
  pages = {207--219},
  abstract = {In previous work we proposed a method to control the
                   characteristics of synthetic speech flexibly by
                   integrating articulatory features into a hidden Markov
                   model (HMM) based parametric speech synthesiser. In
                   this method, a unified acoustic-articulatory model is
                   trained, and context-dependent linear transforms are
                   used to model the dependency between the two feature
                   streams. In this paper, we go significantly further and
                   propose a feature-space-switched multiple regression
                   HMM to improve the performance of articulatory control.
                   A multiple regression HMM (MRHMM) is adopted to model
                   the distribution of acoustic features, with
                   articulatory features used as exogenous explanatory
                   variables. A separate Gaussian mixture model (GMM) is
                   introduced to model the articulatory space, and
                   articulatory-to-acoustic regression matrices are
                   trained for each component of this GMM, instead of for
                   the context-dependent states in the HMM. Furthermore,
                   we propose a task-specific context feature tailoring
                   method to ensure compatibility between state context
                   features and articulatory features that are manipulated
                   at synthesis time. The proposed method is evaluated on
                   two tasks, using a speech database with acoustic
                   waveforms and articulatory movements recorded in
                   parallel by electromagnetic articulography (EMA). In a
                   vowel identity modification task, the new method
                   achieves better performance when reconstructing target
                   vowels by varying articulatory inputs than our previous
                   approach. A second vowel creation task shows our new
                   method is highly effective at producing a new vowel
                   from appropriate articulatory representations which,
                   even though no acoustic samples for this vowel are
                   present in the training data, is shown to sound highly
                   natural.},
  doi = {10.1109/TASL.2012.2215600},
  issn = {1558-7916},
  year = 2013
}
@article{Dines2011,
  author = {John Dines and Hui Liang and Lakshmi Saheer and
                   Matthew Gibson and William Byrne and Keiichiro Oura and
                   Keiichi Tokuda and Junichi Yamagishi and Simon King and
                   Mirjam Wester and Teemu Hirsimäki and Reima
                   Karhila and Mikko Kurimo},
  title = {Personalising speech-to-speech translation:
                   Unsupervised cross-lingual speaker adaptation for
                   {HMM}-based speech synthesis},
  journal = {Computer Speech and Language},
  volume = {27},
  number = {2},
  pages = {420--437},
  abstract = {In this paper we present results of unsupervised
                   cross-lingual speaker adaptation applied to
                   text-to-speech synthesis. The application of our
                   research is the personalisation of speech-to-speech
                   translation in which we employ a HMM statistical
                   framework for both speech recognition and synthesis.
                   This framework provides a logical mechanism to adapt
                   synthesised speech output to the voice of the user by
                   way of speech recognition. In this work we present
                   results of several different unsupervised and
                   cross-lingual adaptation approaches as well as an
                   end-to-end speaker adaptive speech-to-speech
                   translation system. Our experiments show that we can
                   successfully apply speaker adaptation in both
                   unsupervised and cross-lingual scenarios and our
                   proposed algorithms seem to generalise well for several
                   language pairs. We also discuss important future
                   directions including the need for better evaluation
                   metrics.},
  doi = {10.1016/j.csl.2011.08.003},
  issn = {0885-2308},
  keywords = {Speech-to-speech translation, Cross-lingual speaker
                   adaptation, HMM-based speech synthesis, Speaker
                   adaptation, Voice conversion},
  url = {http://www.sciencedirect.com/science/article/pii/S0885230811000441},
  month = feb,
  year = 2013
}