The Centre for Speech Technology Research, The university of Edinburgh

Publications by Partha Lal

s0565860.bib

@inproceedings{livescu07:JHU_summary,
  author = {Livescu, K. and Çetin, Ö. and Hasegawa-Johnson, M.
                   and King, S. and Bartels, C. and Borges, N. and Kantor,
                   A. and Lal, P. and Yung, L. and Bezman,
                   Dawson-Haggerty, S. and Woods, B. and Frankel, J. and
                   Magimai-Doss, M. and Saenko, K.},
  title = {Articulatory feature-based methods for acoustic and
                   audio-visual speech recognition: {S}ummary from the
                   2006 {JHU} {S}ummer {W}orkshop},
  booktitle = {Proc. ICASSP},
  address = {Honolulu},
  abstract = {We report on investigations, conducted at the 2006
                   Johns HopkinsWorkshop, into the use of articulatory
                   features (AFs) for observation and pronunciation models
                   in speech recognition. In the area of observation
                   modeling, we use the outputs of AF classiers both
                   directly, in an extension of hybrid HMM/neural network
                   models, and as part of the observation vector, an
                   extension of the tandem approach. In the area of
                   pronunciation modeling, we investigate a model having
                   multiple streams of AF states with soft synchrony
                   constraints, for both audio-only and audio-visual
                   recognition. The models are implemented as dynamic
                   Bayesian networks, and tested on tasks from the
                   Small-Vocabulary Switchboard (SVitchboard) corpus and
                   the CUAVE audio-visual digits corpus. Finally, we
                   analyze AF classication and forced alignment using a
                   newly collected set of feature-level manual
                   transcriptions.},
  month = {April},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2007/livescu_icassp07_sum.pdf},
  year = 2007
}
@inproceedings{lal_interspeech06,
  author = {Partha Lal},
  title = {A Comparison of Singing Evaluation Algorithms},
  booktitle = {Proc. Interspeech 2006},
  abstract = {This paper describes a system that compares user
                   renditions of short sung clips with the original
                   version of those clips. The F0 of both recordings was
                   estimated and then Viterbi-aligned with each other. The
                   total difference in pitch after alignment was used as a
                   distance metric and transformed into a rating out of
                   ten, to indicate to the user how close he or she was to
                   the original singer. An existing corpus of sung speech
                   was used for initial design and optimisation of the
                   system. We then collected further development and
                   evaluation corpora - these recordings were judged for
                   closeness to an original recording by two human judges.
                   The rankings assigned by those judges were used to
                   design and optimise the system. The design was then
                   implemented and deployed as part of a telephone-based
                   entertainment application.},
  categories = {automated singing evaluation, pitch tracking,
                   entertainment applications},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/singing.pdf},
  year = 2006
}