Publications by Michael Berger
s0788136.bib
@misc{Carnival_SIGGRAPH_2010,
author = {Michael Berger and Gregor Hofer and Hiroshi Shimodaira},
title = {Carnival: a modular framework for automated facial
animation},
howpublished = {Poster at SIGGRAPH 2010},
note = {Bronze award winner, ACM Student Research Competition},
abtract = {We present a software framework for speech- or
text-driven animation--including a platform-independent
API and an application implementing it--which unifies
state-of-the-art speech technology and graphics
technology within a single system.},
address = {Los Angeles, Calif., USA},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2010/carnival.pdf},
year = 2010
}
@inproceedings{felps_interspeech2010,
author = {Felps, Daniel and Geng, Christian and Berger, Michael
and Richmond, Korin and Gutierrez-Osuna, Ricardo},
title = {Relying on critical articulators to estimate vocal
tract spectra in an articulatory-acoustic database},
booktitle = {Proc. Interspeech},
pages = {1990--1993},
abstract = {We present a new phone-dependent feature weighting
scheme that can be used to map articulatory
configurations (e.g. EMA) onto vocal tract spectra
(e.g. MFCC) through table lookup. The approach consists
of assigning feature weights according to a feature's
ability to predict the acoustic distance between
frames. Since an articulator's predictive accuracy is
phone-dependent (e.g., lip location is a better
predictor for bilabial sounds than for palatal sounds),
a unique weight vector is found for each phone.
Inspection of the weights reveals a correspondence with
the expected critical articulators for many phones. The
proposed method reduces overall cepstral error by 6\%
when compared to a uniform weighting scheme. Vowels
show the greatest benefit, though improvements occur
for 80\% of the tested phones.},
keywords = {speech production, speech synthesis},
month = {September},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2010/IS100076.pdf},
year = 2010
}
@article{McGowanBerger2009,
author = {Richard S. McGowan and Michael A. Berger},
title = {Acoustic-articulatory mapping in vowels by locally
weighted regression},
journal = {Journal of the Acoustical Society of America},
volume = {126},
number = {4},
pages = {2011-2032},
abstract = {A method for mapping between simultaneously measured
articulatory and acoustic data is proposed. The method
uses principal components analysis on the articulatory
and acoustic variables, and mapping between the domains
by locally weighted linear regression, or loess
[Cleveland, W. S. (1979) J. Am. Stat. Assoc. 74,
829--836]. The latter method permits local variation in
the slopes of the linear regression, assuming that the
function being approximated is smooth. The methodology
is applied to vowels of four speakers in the Wisconsin
X-ray Microbeam Speech Production Database, with
formant analysis. Results are examined in terms of (1)
examples of forward (articulation-to-acoustics)
mappings and inverse mappings, (2) distributions of
local slopes and constants, (3) examples of
correlations among slopes and constants, (4)
root-mean-square error, and (5) sensitivity of formant
frequencies to articulatory change. It is shown that
the results are qualitatively correct and that loess
performs better than global regression. The forward
mappings show different root-mean-square error
properties than the inverse mappings indicating that
this method is better suited for the forward mappings
than the inverse mappings, at least for the data chosen
for the current study. Some preliminary results on
sensitivity of the first two formant frequencies to the
two most important articulatory principal components
are presented.},
categories = {Articulatory inversion, locally weighted regression,
X-ray microbeam, formant analysis},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2009/aam.pdf},
year = 2009
}
@misc{Hofer_Berger:sigg2010,
author = {Gregor Hofer and Korin Richmond and Michael Berger},
title = {Lip Synchronization by Acoustic Inversion},
howpublished = {Poster at Siggraph 2010},
address = {Los Angeles, USA},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2010/lipsync-sig10.pdf},
year = 2010
}
@article{10.1109/MCG.2011.71,
author = {Michael A. Berger and Gregor Hofer and Hiroshi
Shimodaira},
title = {Carnival -- Combining Speech Technology and Computer
Animation},
journal = {IEEE Computer Graphics and Applications},
volume = {31},
pages = {80-89},
address = {Los Alamitos, CA, USA},
doi = {10.1109/MCG.2011.71},
issn = {0272-1716},
publisher = {IEEE Computer Society},
year = 2011
}