Publications by Partha Lal
s0565860.bib
@inproceedings{livescu07:JHU_summary,
author = {Livescu, K. and Çetin, Ö. and Hasegawa-Johnson, M.
and King, S. and Bartels, C. and Borges, N. and Kantor,
A. and Lal, P. and Yung, L. and Bezman,
Dawson-Haggerty, S. and Woods, B. and Frankel, J. and
Magimai-Doss, M. and Saenko, K.},
title = {Articulatory feature-based methods for acoustic and
audio-visual speech recognition: {S}ummary from the
2006 {JHU} {S}ummer {W}orkshop},
booktitle = {Proc. ICASSP},
address = {Honolulu},
abstract = {We report on investigations, conducted at the 2006
Johns HopkinsWorkshop, into the use of articulatory
features (AFs) for observation and pronunciation models
in speech recognition. In the area of observation
modeling, we use the outputs of AF classiers both
directly, in an extension of hybrid HMM/neural network
models, and as part of the observation vector, an
extension of the tandem approach. In the area of
pronunciation modeling, we investigate a model having
multiple streams of AF states with soft synchrony
constraints, for both audio-only and audio-visual
recognition. The models are implemented as dynamic
Bayesian networks, and tested on tasks from the
Small-Vocabulary Switchboard (SVitchboard) corpus and
the CUAVE audio-visual digits corpus. Finally, we
analyze AF classication and forced alignment using a
newly collected set of feature-level manual
transcriptions.},
month = {April},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2007/livescu_icassp07_sum.pdf},
year = 2007
}
@inproceedings{lal_interspeech06,
author = {Partha Lal},
title = {A Comparison of Singing Evaluation Algorithms},
booktitle = {Proc. Interspeech 2006},
abstract = {This paper describes a system that compares user
renditions of short sung clips with the original
version of those clips. The F0 of both recordings was
estimated and then Viterbi-aligned with each other. The
total difference in pitch after alignment was used as a
distance metric and transformed into a rating out of
ten, to indicate to the user how close he or she was to
the original singer. An existing corpus of sung speech
was used for initial design and optimisation of the
system. We then collected further development and
evaluation corpora - these recordings were judged for
closeness to an original recording by two human judges.
The rankings assigned by those judges were used to
design and optimise the system. The design was then
implemented and deployed as part of a telephone-based
entertainment application.},
categories = {automated singing evaluation, pitch tracking,
entertainment applications},
month = sep,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/singing.pdf},
year = 2006
}