Publications by Liang Lu
llu.bib
@inproceedings{llu2012map,
author = {Lu, L. and Ghoshal, A. and Renals, S.},
title = {{Maximum a posteriori adaptation of subspace Gaussian
mixture models for cross-lingual speech recognition}},
booktitle = {Proc. ICASSP},
abstract = {This paper concerns cross-lingual acoustic modeling in
the case when there are limited target language
resources. We build on an approach in which a subspace
Gaussian mixture model (SGMM) is adapted to the target
language by reusing the globally shared parameters
estimated from out-of-language training data. In
current cross-lingual systems, these parameters are
fixed when training the target system, which can give
rise to a mismatch between the source and target
systems. We investigate a maximum a posteriori (MAP)
adaptation approach to alleviate the potential
mismatch. In particular, we focus on the adaptation of
phonetic subspace parameters using a matrix variate
Gaussian prior distribution. Experiments on the
GlobalPhone corpus using the MAP adaptation approach
results in word error rate reductions, compared with
the cross-lingual baseline systems and systems updated
using maximum likelihood, for training conditions with
1 hour and 5 hours of target language data.},
keywords = {Subspace Gaussian Mixture Model, Maximum a Posteriori
Adaptation, Cross-lingual Speech Recognition},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2012/llu-icassp-2012.pdf},
year = 2012
}
@article{lu_spl_2011,
author = {Lu, L. and Ghoshal, A. and Renals, S.},
title = {Regularized Subspace Gausian Mixture Models for Speech
Recognition},
journal = {IEEE Signal Processing Letters},
volume = {18},
number = {7},
pages = {419--422},
abstract = {Subspace Gaussian mixture models (SGMMs) provide a
compact representation of the Gaussian parameters in an
acoustic model, but may still suffer from over-fitting
with insufficient training data. In this letter, the
SGMM state parameters are estimated using a penalized
maximum-likelihood objective, based on $\ell_1$ and
$\ell_2$ regularization, as well as their combination,
referred to as the elastic net, for robust model
estimation. Experiments on the 5000-word Wall Street
Journal transcription task show word error rate
reduction and improved model robustness with
regularization.},
categories = {Acoustic Modelling, Regularization, Sparsity, Subspace
Gaussian Mixture Model},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2011/lu-spl-2011.pdf},
year = 2011
}
@inproceedings{lu2012jud,
author = {Lu, L. and Ghoshal, A. and Renals, S.},
title = {{Joint uncertainty decoding with unscented transform
for noise robust subspace Gaussian mixture model}},
booktitle = {Proc. Sapa-Scale workshop},
abstract = {Common noise compensation techniques use vector Taylor
series (VTS) to approximate the mismatch function.
Recent work shows that the approximation accuracy may
be improved by sampling. One such sampling technique is
the unscented transform (UT), which draws samples
deterministically from clean speech and noise model to
derive the noise corrupted speech parameters. This
paper applies UT to noise compensation of the subspace
Gaussian mixture model (SGMM). Since UT requires
relatively smaller number of samples for accurate
estimation, it has significantly lower computational
cost compared to other random sampling techniques.
However, the number of surface Gaussians in an SGMM is
typically very large, making the direct application of
UT, for compensating individual Gaussian components,
computationally impractical. In this paper, we avoid
the computational burden by employing UT in the
framework of joint uncertainty decoding (JUD), which
groups all the Gaussian components into small number of
classes, sharing the compensation parameters by class.
We evaluate the JUD-UT technique for an SGMM system
using the Aurora 4 corpus. Experimental results
indicate that UT can lead to increased accuracy
compared to VTS approximation if the JUD phase factor
is untuned, and to similar accuracy if the phase factor
is tuned empirically},
keywords = {noise compensation, SGMM, JUD, UT},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2012/llu-sapa2012.pdf},
year = 2012
}
@inproceedings{lu2012noise,
author = {Lu, L. and Chin, KK and Ghoshal, A. and Renals, S.},
title = {{Noise compensation for subspace Gaussian mixture
models}},
booktitle = {Proc. INTERSPEECH},
abstract = {Joint uncertainty decoding (JUD) is an effective
model-based noise compensation technique for
conventional Gaussian mixture model (GMM) based speech
recognition systems. In this paper, we apply JUD to
subspace Gaussian mixture model (SGMM) based acoustic
models. The total number of Gaussians in the SGMM
acoustic model is usually much larger than for
conventional GMMs, which limits the application of
approaches which explicitly compensate each Gaussian,
such as vector Taylor series (VTS). However, by
clustering the Gaussian components into a number of
regression classes, JUD-based noise compensation can be
successfully applied to SGMM systems. We evaluate the
JUD/SGMM technique using the Aurora 4 corpus, and the
experimental results indicated that it is more accurate
than conventional GMM-based systems using either VTS or
JUD noise compensation.},
keywords = {acoustic modelling, noise compensation, SGMM, JUD},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2012/llu-is2012.pdf},
year = 2012
}
@inproceedings{lu_asru_2011,
author = {Lu, L. and Ghoshal, A. and Renals, S.},
title = {Regularized Subspace {G}ausian Mixture Models for
Cross-lingual Speech Recognition},
booktitle = {Proc. ASRU},
abstract = {We investigate cross-lingual acoustic modelling for
low resource languages using the subspace Gaussian
mixture model (SGMM). We assume the presence of
acoustic models trained on multiple source languages,
and use the global subspace parameters from those
models for improved modelling in a target language with
limited amounts of transcribed speech. Experiments on
the GlobalPhone corpus using Spanish, Portuguese, and
Swedish as source languages and German as target
language (with 1 hour and 5 hours of transcribed audio)
show that multilingually trained SGMM shared parameters
result in lower word error rates (WERs) than using
those from a single source language. We also show that
regularizing the estimation of the SGMM state vectors
by penalizing their $\ell_1$-norm help to overcome
numerical instabilities and lead to lower WER.},
categories = {Subspace Gaussian Mixture Model, Cross-lingual, model
regularization},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2011/lu-asru-2011.pdf},
year = 2011
}