The Centre for Speech Technology Research, The university of Edinburgh

Publications by Pawel Swietojanski

s1136550.bib

@inproceedings{swi2012_dnn,
  author = {Swietojanski, P. and Ghoshal, A. and Renals, S.},
  title = {Unsupervised Cross-lingual knowledge transfer in {DNN-based LVCSR}},
  booktitle = {Proc. IEEE Workshop on Spoken Language Technology},
  address = {Miami, Florida, USA},
  abstract = {We investigate the use of cross-lingual acoustic data to initialise deep neural network (DNN) acoustic models by means
of unsupervised restricted Boltzmann machine (RBM) pretraining.
DNNs for German are pretrained using one or all of German, Portuguese, Spanish and Swedish. The DNNs are used in a tandem configuration, where the network outputs are used as features for a hidden Markov model (HMM) whose
emission densities are modeled by Gaussian mixture models (GMMs), as well as in a hybrid configuration, where the network outputs are used as the HMM state likelihoods. The experiments show that unsupervised pretraining is more crucial
for the hybrid setups, particularly with limited amounts of transcribed training data. More importantly, unsupervised pretraining is shown to be language-independent.},
  month = dec,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2012/ps_slt2012.pdf},
  year = 2012
}
@inproceedings{bell12_mlan,
  author = {Bell, P. and Gales, M. and Lanchantin, P. and Liu, X.
                   and Long, Y. and Renals, S. and Swietojanski, P. and
                   Woodland, P.},
  title = {Transcription of multi-genre media archives using
                   out-of-domain data},
  booktitle = {Proc. IEEE Workshop on Spoken Language Technology},
  address = {Miami, Florida, USA},
  abstract = {We describe our work on developing a speech
                   recognition system for multi-genre media archives. The
                   high diversity of the data makes this a challenging
                   recognition task, which may benefit from systems
                   trained on a combination of in-domain and out-of-domain
                   data. Working with tandem HMMs, we present Multi-level
                   Adaptive Networks (MLAN), a novel technique for
                   incorporating information from out-of-domain posterior
                   features using deep neural networks. We show that it
                   provides a substantial reduction in WER over other
                   systems, with relative WER reductions of 15\% over a
                   PLP baseline, 9\% over in-domain tandem features and
                   8\% over the best out-of-domain tandem features.},
  month = dec,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2012/mlan_slt2012.pdf},
  year = 2012
}