2006.bib
@comment{{This file has been generated by bib2bib 1.92}}
@comment{{Command line: /home/korin/bibtex2html-1.92-LINUX/bib2bib -oc /home/korin/projects/publications/new_output/transitdata/2006-citations -ob /home/korin/projects/publications/new_output/transitdata/2006.bib -c 'year : "2006"' /home/korin/projects/publications/filtlists/full_publications_list.bib}}
@inproceedings{cuayahuitletal_interspeech06,
author = {Heriberto Cuayáhuitl and Steve Renals and Oliver
Lemon and Hiroshi Shimodaira},
title = {Learning Multi-Goal Dialogue Strategies Using
Reinforcement Learning With Reduced State-Action Spaces},
booktitle = {Proc. of INTERSPEECH},
abstract = {Learning dialogue strategies using the reinforcement
learning framework is problematic due to its expensive
computational cost. In this paper we propose an
algorithm that reduces a state-action space to one
which includes only valid state-actions. We performed
experiments on full and reduced spaces using three
systems (with 5, 9 and 20 slots) in the travel domain
using a simulated environment. The task was to learn
multi-goal dialogue strategies optimizing single and
multiple confirmations. Average results using
strategies learnt on reduced spaces reveal the
following benefits against full spaces: 1) less
computer memory (94\% reduction), 2) faster learning
(93\% faster convergence) and better performance (8.4\%
less time steps and 7.7\% higher reward).},
categories = {reinforcement learning, spoken dialogue systems},
month = sep,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/rss-icslp2006.pdf},
year = 2006
}
@incollection{alhames-mlmi05,
author = {M. Al-Hames and A. Dielmann and D. Gatica-Perez and S.
Reiter and S. Renals and G. Rigoll and D. Zhang},
title = {Multimodal Integration for Meeting Group Action
Segmentation and Recognition},
booktitle = {Proc. Multimodal Interaction and Related Machine
Learning Algorithms Workshop (MLMI--05)},
publisher = {Springer},
editor = {S. Renals and S. Bengio},
pages = {52--63},
abstract = {We address the problem of segmentation and recognition
of sequences of multimodal human interactions in
meetings. These interactions can be seen as a rough
structure of a meeting, and can be used either as input
for a meeting browser or as a first step towards a
higher semantic analysis of the meeting. A common
lexicon of multimodal group meeting actions, a shared
meeting data set, and a common evaluation procedure
enable us to compare the different approaches. We
compare three different multimodal feature sets and our
modelling infrastructures: a higher semantic feature
approach, multi-layer HMMs, a multistream DBN, as well
as a multi-stream mixed-state DBN for disturbed data.},
categories = {m4,ami,multimodal,dbn,meetings,edinburgh,IDIAP,munich},
year = 2006
}
@inproceedings{fitt_richmond_interspeech06,
author = {Sue Fitt and Korin Richmond},
title = {Redundancy and productivity in the speech technology
lexicon - can we do better?},
booktitle = {Proc. Interspeech 2006},
abstract = {Current lexica for speech technology typically contain
much redundancy, while omitting useful information. A
comparison with lexica in other media and for other
purposes is instructive, as it highlights some features
we may borrow for text-to-speech and speech recognition
lexica. We describe some aspects of the new lexicon we
are producing, Combilex, whose structure and
implementation is specifically designed to reduce
redundancy and improve the representation of productive
elements of English. Most importantly, many English
words are predictable derivations of baseforms, or
compounds. Storing the lexicon as a combination of
baseforms and derivational rules speeds up lexicon
development, and improves coverage and maintainability.},
categories = {dictionary, lexicon, pronunciation, English accents,
productivity, derivation, redundancy, relational
database},
month = sep,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/Fitt_2006.pdf},
year = 2006
}
@book{renals2006-mlmi06,
editor = {Steve Renals and Samy Bengio and Jonathan Fiscus},
title = {Machine learning for multimodal interaction
(Proceedings of {MLMI} '06)},
publisher = {Springer-Verlag},
volume = {4299},
series = {Lecture Notes in Computer Science},
year = 2006
}
@inproceedings{zhang-icslp2006,
author = {Le Zhang and Steve Renals},
title = {Phone Recognition Analysis for Trajectory {HMM}},
booktitle = {Proc. Interspeech 2006},
address = {Pittsburgh, USA},
abstract = { The trajectory {HMM} has been shown to be useful for
model-based speech synthesis where a smoothed
trajectory is generated using temporal constraints
imposed by dynamic features. To evaluate the
performance of such model on an ASR task, we present a
trajectory decoder based on tree search with delayed
path merging. Experiment on a speaker-dependent phone
recognition task using the MOCHA-TIMIT database shows
that the MLE-trained trajectory model, while retaining
attractive properties of being a proper generative
model, tends to favour over-smoothed trajectory among
competing hypothesises, and does not perform better
than a conventional {HMM}. We use this to build an
argument that models giving better fit on training data
may suffer a reduction of discrimination by being too
faithful to training data. This partially explains why
alternative acoustic models that try to explicitly
model temporal constraints do not achieve significant
improvements in ASR. },
key = {asr},
month = sep,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/zhang-icslp2006.pdf},
year = 2006
}
@inproceedings{janin06:rt06s,
author = {Janin, A. and Stolcke, A. and Anguera, X. and Boakye,
K. and Çetin, Ö. and Frankel, J. and Zheng, J.},
title = {The {ICSI-SRI} Spring 2006 Meeting Recognition System},
booktitle = {Proc. MLMI},
address = {Washington DC.},
abstract = {We describe the development of the ICSI-SRI speech
recognition system for the National Institute of
Standards and Technology (NIST) Spring 2006 Meeting
Rich Transcription (RT-06S) evaluation, highlighting
improvements made since last year, including
improvements to the delay-and-sum algorithm, the
nearfield segmenter, language models, posterior-based
features, HMM adaptation methods, and adapting to a
small amount of new lecture data. Results are reported
on RT-05S and RT-06S meeting data. Compared to the
RT-05S conference system, we achieved an overall
improvement of 4\% relative in the MDM and SDM
conditions, and 11\% relative in the IHM condition. On
lecture data, we achieved an overall improvement of 8\%
relative in the SDM condition, 12\% on MDM, 14\% on
ADM, and 15\% on IHM.},
categories = {am,asr},
month = {May},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/Janin_et_al_RT06s.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/Janin_et_al_RT06s.ps},
year = 2006
}
@inproceedings{NistevalAMI06,
author = {T. Hain and L. Burget and L. Burget and J. dines and
G. Garau and M. Karafiat and M. Lincoln and J. Vepa and
V. Wan},
title = {The {AMI} Meeting Transcription System: Progress and
Performance},
booktitle = {Proceedings of the Rich Transcription 2006 Spring
Meeting Recognition Evaluation},
abstract = {We present the AMI 2006 system for the transcription
of speech in meetings. The system was jointly developed
by multiple sites on the basis of the 2005 system for
participation in the NIST RT'05 evaluations. The paper
describes major developments such as improvements in
automatic segmentation, cross-domain model adaptation,
inclusion of MLP based features, improvements in
decoding, language modelling and vocal tract length
normalisation, the use of a new decoder, and a new
system architecture. This is followed by a
comprehensive description of the final system and its
performance in the NIST RT'06s evaluations. In
comparison to the previous year word error rate results
on the individual headset microphone task were reduced
by 20\% relative.},
categories = {LVCSR, NIST Meeting Transcription Evaluation RT06S},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/AMIasr.nist06.pdf},
year = 2006
}
@article{Ximera06,
author = {Hisashi Kawai and Tomoki Toda and Junichi Yamagishi
and Toshio Hirai and Jinfu Ni and Nobuyuki Nishizawa
and Minoru Tsuzaki and Keiichi Tokuda},
title = {XIMERA: a concatenative speech synthesis system with
large scale corpora},
journal = {IEICE Trans. Information and Systems},
volume = {J89-D-II},
number = 12,
pages = {2688-2698},
month = dec,
year = 2006
}
@incollection{king:ELL2_2006b,
author = {Simon King},
title = {Handling variation in speech and language processing},
booktitle = {Encyclopedia of Language and Linguistics},
publisher = {Elsevier},
editor = {Keith Brown},
edition = {2nd},
year = 2006
}
@inproceedings{murray06,
author = {G. Murray and S. Renals and J. Moore and J. Carletta},
title = {Incorporating Speaker and Discourse Features into
Speech Summarization},
booktitle = {Proceedings of the Human Language Technology
Conference - North American Chapter of the Association
for Computational Linguistics Meeting (HLT-NAACL) 2006,
New York City, USA},
abstract = {The research presented herein explores the usefulness
of incorporating speaker and discourse features in an
automatic speech summarization system applied to
meeting recordings from the ICSI Meetings corpus. By
analyzing speaker activity, turn-taking and discourse
cues, it is hypothesized that a system can outperform
solely text-based methods inherited from the field of
text summarization. The summarization methods are
described, two evaluation methods are applied and
compared, and the results clearly show that utilizing
such features is advantageous and efficient. Even
simple methods relying on discourse cues and speaker
activity can outperform text summarization approaches.},
categories = {summarization, speech summarization, prosody, latent
semantic analysis},
month = jun,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/hlt2006-final.pdf},
year = 2006
}
@inproceedings{cuayahuitletal_slt06,
author = {Heriberto Cuayáhuitl and Steve Renals and Oliver
Lemon and Hiroshi Shimodaira},
title = {Reinforcement Learning of Dialogue Strategies With
Hierarchical Abstract Machines},
booktitle = {Proc. of IEEE/ACL Workshop on Spoken Language
Technology (SLT)},
abstract = {In this paper we propose partially specified dialogue
strategies for dialogue strategy optimization, where
part of the strategy is specified deterministically and
the rest optimized with Reinforcement Learning (RL). To
do this we apply RL with Hierarchical Abstract Machines
(HAMs). We also propose to build simulated users using
HAMs, incorporating a combination of hierarchical
deterministic and probabilistic behaviour. We performed
experiments using a single-goal flight booking dialogue
system, and compare two dialogue strategies
(deterministic and optimized) using three types of
simulated user (novice, experienced and expert). Our
results show that HAMs are promising for both dialogue
optimization and simulation, and provide evidence that
indeed partially specified dialogue strategies can
outperform deterministic ones (on average 4.7 fewer
system turns) with faster learning than the traditional
RL framework.},
categories = {reinforcement learning, spoken dialogue systems},
month = dec,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/ham-slt2006.pdf},
year = 2006
}
@article{vepa_king_tsap05,
author = {Jithendra Vepa and Simon King},
title = {Subjective Evaluation of Join Cost and Smoothing
Methods for Unit Selection Speech Synthesis},
journal = {IEEE Transactions on Speech and Audio Processing},
volume = {14},
number = {5},
pages = {1763--1771},
abstract = {In unit selection-based concatenative speech
synthesis, join cost (also known as concatenation
cost), which measures how well two units can be joined
together, is one of the main criteria for selecting
appropriate units from the inventory. Usually, some
form of local parameter smoothing is also needed to
disguise the remaining discontinuities. This paper
presents a subjective evaluation of three join cost
functions and three smoothing methods. We describe the
design and performance of a listening test. The three
join cost functions were taken from our previous study,
where we proposed join cost functions derived from
spectral distances, which have good correlations with
perceptual scores obtained for a range of concatenation
discontinuities. This evaluation allows us to further
validate their ability to predict concatenation
discontinuities. The units for synthesis stimuli are
obtained from a state-of-the-art unit selection
text-to-speech system: rVoice from Rhetorical Systems
Ltd. In this paper, we report listeners' preferences
for each join cost in combination with each smoothing
method.},
categories = {TTS, join cost, listening test},
month = sep,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2005/vepa_king_ieee2005.pdf},
year = 2006
}
@article{frankel06:adapt,
author = {Frankel, J. and King, S.},
title = {Observation Process Adaptation for Linear Dynamic
Models},
journal = {Speech Communication},
volume = 48,
number = 9,
pages = {1192-1199},
abstract = {This work introduces two methods for adapting the
observation process parameters of linear dynamic models
(LDM) or other linear-Gaussian models. The first method
uses the expectation-maximization (EM) algorithm to
estimate transforms for location and covariance
parameters, and the second uses a generalized EM (GEM)
approach which reduces computation in making updates
from $O(p^6)$ to $O(p^3)$, where $p$ is the feature
dimension. We present the results of speaker adaptation
on TIMIT phone classification and recognition
experiments with relative error reductions of up to
$6\%$. Importantly, we find minimal differences in the
results from EM and GEM. We therefore propose that the
GEM approach be applied to adaptation of hidden Markov
models which use non-diagonal covariances. We provide
the necessary update equations.},
categories = {am,asr,ldm,timit,edinburgh},
month = {September},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/Frankel_King_SPECOM2006.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/Frankel_King_SPECOM2006.ps},
year = 2006
}
@inproceedings{clark_blizzard2006,
author = {Clark, R. and Richmond, K. and Strom, V. and King, S.},
title = {Multisyn Voices for the {B}lizzard {C}hallenge 2006},
booktitle = {Proc. Blizzard Challenge Workshop (Interspeech
Satellite)},
address = {Pittsburgh, USA},
note = {(http://festvox.org/blizzard/blizzard2006.html)},
abstract = {This paper describes the process of building unit
selection voices for the Festival Multisyn engine using
the ATR dataset provided for the Blizzard Challenge
2006. We begin by discussing recent improvements that
we have made to the Multisyn voice building process,
prompted by our participation in the Blizzard Challenge
2006. We then go on to discuss our interpretation of
the results observed. Finally, we conclude with some
comments and suggestions for the formulation of future
Blizzard Challenges.},
categories = {tts, blizzard, multisyn, unit selection},
key = {clark_blizzard2006},
month = sep,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/cstr_blizzard2006.pdf},
year = 2006
}
@inproceedings{lal_interspeech06,
author = {Partha Lal},
title = {A Comparison of Singing Evaluation Algorithms},
booktitle = {Proc. Interspeech 2006},
abstract = {This paper describes a system that compares user
renditions of short sung clips with the original
version of those clips. The F0 of both recordings was
estimated and then Viterbi-aligned with each other. The
total difference in pitch after alignment was used as a
distance metric and transformed into a rating out of
ten, to indicate to the user how close he or she was to
the original singer. An existing corpus of sung speech
was used for initial design and optimisation of the
system. We then collected further development and
evaluation corpora - these recordings were judged for
closeness to an original recording by two human judges.
The rankings assigned by those judges were used to
design and optimise the system. The design was then
implemented and deployed as part of a telephone-based
entertainment application.},
categories = {automated singing evaluation, pitch tracking,
entertainment applications},
month = sep,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/singing.pdf},
year = 2006
}
@phdthesis{calhoun:06,
author = {Calhoun, Sasha},
title = {Information Structure and the Prosodic Structure of
{E}nglish: a Probabilistic Relationship},
school = {University of Edinburgh},
abstract = {This thesis looks at how information structure is
signalled prosodically in English. It has been
standardly held that information structure is primarily
signalled by the distribution of pitch accents within
syntax structure, as well as intonation event type.
Rather, it is argued that previous work has
underestimated the importance, and richness, of
metrical prosodic structure and its role in signalling
information structure. A new approach is proposed: to
view information structure as a strong constraint on
the mapping of words onto metrical prosodic structure.
Focal elements (kontrast) align with nuclear
prominence, while accents on other words are not
usually directly 'meaningful'. Information units
(theme/rheme) try to align with prosodic phrases. This
mapping is probabilistic, so it is also influenced by
lexical and syntactic effects, as well as rhythmical
constraints and other features including emphasis.
Qualitative and quantitative analysis is presented in
support of these claims using the NXT Switchboard
corpus which has been annotated with substantial new
layers of semantic and prosodic features.},
year = 2006
}
@inproceedings{hachey06,
author = {B. Hachey and G. Murray and D. Reitter},
title = {Dimensionality Reduction Aids Term Co-Occurrence Based
Multi-Document Summarization},
booktitle = {Proceedings of ACL Summarization Workshop 2006,
Sydney, Australia},
abstract = {A key task in an extraction system for query-oriented
multi-document summarisation, necessary for computing
relevance and redundancy, is modelling text semantics.
In the Embra system, we use a representation derived
from the singular value decomposition of a term
co-occurrence matrix. We present methods to show the
reliability of performance improvements. We find that
Embra performs better with dimensionality reduction.},
categories = {summarization, latent semantic analysis},
month = jun,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/coling-acl2006.pdf},
year = 2006
}
@inproceedings{clark_king:proc:2006,
author = {Robert A. J. Clark and Simon King},
title = {Joint Prosodic and Segmental Unit Selection Speech
Synthesis},
booktitle = {Proc. Interspeech 2006},
address = {Pittsburgh, USA},
abstract = {We describe a unit selection technique for
text-to-speech synthesis which jointly searches the
space of possible diphone sequences and the space of
possible prosodic unit sequences in order to produce
synthetic speech with more natural prosody. We
demonstrates that this search, although currently
computationally expensive, can achieve improved
intonation compared to a baseline in which only the
space of possible diphone sequences is searched. We
discuss ways in which the search could be made
sufficiently efficient for use in a real-time system.},
month = sep,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/clarkking_interspeech_2006.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/clarkking_interspeech_2006.ps},
year = 2006
}
@inproceedings{bell_burrows_taylor_sp2006,
author = {Peter Bell and Tina Burrows and Paul Taylor},
title = {Adaptation of Prosodic Phrasing Models},
booktitle = {Proc. Speech Prosody 2006},
address = {Dresden, Germany},
abstract = {There is considerable variation in the prosodic
phrasing of speech betweeen different speakers and
speech styles. Due to the time and cost of obtaining
large quantities of data to train a model for every
variation, it is desirable to develop models that can
be adapted to new conditions with a limited amount of
training data. We describe a technique for adapting
HMM-based phrase boundary prediction models which
alters a statistic distribution of prosodic phrase
lengths. The adapted models show improved prediction
performance across different speakers and types of
spoken material.},
month = may,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/phrasing_sp2006.pdf},
year = 2006
}
@incollection{king:ELL2_2006a,
author = {Simon King},
title = {Language variation in speech technologies},
booktitle = {Encyclopedia of Language and Linguistics},
publisher = {Elsevier},
editor = {Keith Brown},
edition = {2nd},
year = 2006
}
@inproceedings{hsueh2006asm,
author = {Hsueh, P. and Moore, J. and Renals, S.},
title = {Automatic Segmentation of Multiparty Dialogue},
booktitle = {Proc. EACL06},
abstract = {In this paper, we investigate the prob- lem of
automatically predicting segment boundaries in spoken
multiparty dialogue. We extend prior work in two ways.
We first apply approaches that have been pro- posed for
predicting top-level topic shifts to the problem of
identifying subtopic boundaries. We then explore the
impact on performance of using ASR output as opposed to
human transcription. Exam- ination of the effect of
features shows that predicting top-level and predicting
subtopic boundaries are two distinct tasks: (1) for
predicting subtopic boundaries, the lexical
cohesion-based approach alone can achieve competitive
results, (2) for predicting top-level boundaries, the
ma- chine learning approach that combines
lexical-cohesion and conversational fea- tures performs
best, and (3) conversational cues, such as cue phrases
and overlapping speech, are better indicators for the
top- level prediction task. We also find that the
transcription errors inevitable in ASR output have a
negative impact on models that combine lexical-cohesion
and conver- sational features, but do not change the
general preference of approach for the two tasks. },
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/21_1_hsuehmoorerenals.pdf},
year = 2006
}
@inproceedings{strom06,
author = {Volker Strom and Robert Clark and Simon King},
title = {Expressive Prosody for Unit-selection Speech Synthesis},
booktitle = {Proc.~Interspeech},
address = {Pittsburgh},
abstract = {Current unit selection speech synthesis voices cannot
produce emphasis or interrogative contours because of a
lack of the necessary prosodic variation in the
recorded speech database. A method of recording script
design is proposed which addresses this shortcoming.
Appropriate components were added to the target cost
function of the Festival Multisyn engine, and a
perceptual evaluation showed a clear preference over
the baseline system.},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/strom06.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/strom06.ps},
year = 2006
}
@incollection{al-hames2006-mlmi06,
author = {Marc Al-Hames and Thomas Hain and Jan Cernocky and
Sascha Schreiber and Mannes Poel and Ronald Mueller and
Sebastien Marcel and David {van Leeuwen} and Jean-Marc
Odobez and Sileye Ba and Hervé Bourlard and Fabien
Cardinaux and Daniel Gatica-Perez and Adam Janin and
Petr Motlicek and Stephan Reiter and Steve Renals and
Jeroen {van Rest} and Rutger Rienks and Gerhard Rigoll
and Kevin Smith and Andrew Thean and Pavel Zemcik},
title = {Audio-video processing in meetings: Seven questions
and current {AMI} answers},
booktitle = {Machine Learning for Multimodal Interaction (Proc.
MLMI '06)},
publisher = {Springer},
editor = {S. Renals and S. Bengio and J. G. Fiscus},
volume = {4299},
series = {Lecture Notes in Computer Science},
pages = {24--35},
year = 2006
}
@book{renals2006-mlmi05,
editor = {Steve Renals and Samy Bengio},
title = {Machine learning for multimodal interaction
(Proceedings of {MLMI} '05)},
publisher = {Springer-Verlag},
volume = {3869},
series = {Lecture Notes in Computer Science},
year = 2006
}
@inproceedings{Shimodaira:kes06,
author = {Chie Shimodaira and Hiroshi Shimodaira and Susumu
Kunifuji},
title = {{A Divergent-Style Learning Support Tool for English
Learners Using a Thesaurus Diagram}},
booktitle = {{Proc. KES2006}},
address = {Bournemouth, United Kingdom},
abstract = { This paper proposes an English learning support tool
which provides users with divergent information to find
the right words and expressions. In contrast to a
number of software tools for English translation and
composition, the proposed tool is designed to give
users not only the right answer to the user's query but
also a lot of words and examples which are relevant to
the query. Based on the lexical information provided by
the lexical database, WordNet, the proposed tool
provides users with a thesaurus diagram, in which
synonym sets and relation links are presented in
multiple windows to help users to choose adequate words
and understand similarities and differences between
words. Subjective experiments are carried out to
evaluate the system. },
categories = {knowledge engineering},
month = oct,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/kes2006.pdf},
year = 2006
}
@inproceedings{murray06b,
author = {G. Murray and S. Renals and M. Taboada},
title = {Prosodic Correlates of Rhetorical Relations},
booktitle = {Proceedings of HLT/NAACL ACTS Workshop, 2006, New York
City, USA},
abstract = {This paper investigates the usefulness of prosodic
features in classifying rhetorical relations between
utterances in meeting recordings. Five rhetorical
relations of \textit{contrast}, \textit{elaboration},
\textit{summary}, \textit{question} and \textit{cause}
are explored. Three training methods - supervised,
unsupervised, and combined - are compared, and
classification is carried out using support vector
machines. The results of this pilot study are
encouraging but mixed, with pairwise classification
achieving an average of 68\% accuracy in discerning
between relation pairs using only prosodic features,
but multi-class classification performing only slightly
better than chance.},
categories = {rhetorical structure theory, prosody, unsupervised
learning},
month = jun,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/dacts-hlt.pdf},
year = 2006
}
@inproceedings{Shimodaira:iwfhr06,
author = {Junko Tokuno and Mitsuru Nakai and Hiroshi Shimodaira
and Shigeki Sagayama and Masaki Nakagawa},
title = {{On-line Handwritten Character Recognition Selectively
employing Hierarchical Spatial Relationships among
Subpatterns}},
booktitle = {{Proc. IWFHR-10}},
address = {La Baule, France},
abstract = { This paper proposes an on-line handwritten character
pattern recognition method that examines spatial
relationships among subpatterns which are components of
a character pattern. Conventional methods evaluating
spatial relationships among subpatterns have not
considered characteristics of deformed handwritings and
evaluate all the spatial relationships equally.
However, the deformations of spatial features are
different within a character pattern. In our approach,
we assume that the distortions of spatial features are
dependent on the hierarchy of character patterns so
that we selectively evaluate hierarchical spatial
relationships of subpatterns by employing Bayesian
network as a post-processor of our sub-stroke based HMM
recognition system. Experiments of on-line handwritten
Kanji character recognition with a lexicon of 1,016
elementary characters revealed that the approach we
propose improves the recognition accuracy for different
types of deformations. },
categories = {online handwriting recognition},
month = oct,
year = 2006
}
@inproceedings{richmond2006,
author = {Richmond, K.},
title = {A Trajectory Mixture Density Network for the
Acoustic-Articulatory Inversion Mapping},
booktitle = {Proc. Interspeech},
address = {Pittsburgh, USA},
abstract = {This paper proposes a trajectory model which is based
on a mixture density network trained with target
features augmented with dynamic features together with
an algorithm for estimating maximum likelihood
trajectories which respects constraints between the
static and derived dynamic features. This model was
evaluated on an inversion mapping task. We found the
introduction of the trajectory model successfully
reduced root mean square error by up to $7.5\%$, as
well as increasing correlation scores.},
categories = {acoustic-articulatory, inversion mapping, MDN, MLPG,
trajectory modelling},
month = sep,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/interspeech2006_richmond.pdf},
year = 2006
}
@inproceedings{murray06c,
author = {G. Murray and S. Renals},
title = {Dialogue Act Compression Via Pitch Contour
Preservation},
booktitle = {Proceedings of the 9th International Conference on
Spoken Language Processing, Pittsburgh, USA},
abstract = {This paper explores the usefulness of prosody in
automatically compressing dialogue acts from meeting
speech. Specifically, this work attempts to compress
utterances by preserving the pitch contour of the
original whole utterance. Two methods of doing this are
described in detail and are evaluated
\textit{subjectively} using human annotators and
\textit{objectively} using edit distance with a
human-authored gold-standard. Both metrics show that
such a prosodic approach is much better than the random
baseline approach and significantly better than a
simple text compression method.},
categories = {automatic compression, prosody, summarization},
month = sep,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/inter2006.pdf},
year = 2006
}