The Centre for Speech Technology Research, The university of Edinburgh

Publications by Arlo Faria

afaria.bib

@inproceedings{faria-eurospeech05,
  author = {A.~Faria and D.~Gelbart},
  title = {Efficient Pitch-based Estimation of {VLTN} Warp
                   Factors},
  booktitle = {Proc. Eurospeech},
  abstract = { To reduce inter-speaker variability, vocal tract
                   length normalization (VTLN) is commonly used to
                   transform acoustic features for automatic speech
                   recognition (ASR). The warp factors used in this
                   process are usually derived by maximum likelihood (ML)
                   estimation, involving an exhaustive search over
                   possible values. We describe an alternative approach:
                   exploit the correlation between a speaker's average
                   pitch and vocal tract length, and model the probability
                   distribution of warp factors conditioned on pitch
                   observations. This can be used directly for warp factor
                   estimation, or as a smoothing prior in combination with
                   ML estimates. Pitch-based warp factor estimation for
                   VTLN is effective and requires relatively little memory
                   and computation. Such an approach is well-suited for
                   environments with constrained resources, or where pitch
                   is already being computed for other purposes. },
  categories = {vocal tract length normalization,speaker adaptation},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2005/pbvtln-latest.pdf},
  year = 2005
}