2006.bib

@comment{{This file has been generated by bib2bib 1.92}}
@comment{{Command line: /home/korin/bibtex2html-1.92-LINUX/bib2bib -oc /home/korin/projects/publications/new_output/transitdata/2006-citations -ob /home/korin/projects/publications/new_output/transitdata/2006.bib -c 'year : "2006"' /home/korin/projects/publications/filtlists/full_publications_list.bib}}
@inproceedings{cuayahuitletal_interspeech06,
  author = {Heriberto Cuayáhuitl and Steve Renals and Oliver
                   Lemon and Hiroshi Shimodaira},
  title = {Learning Multi-Goal Dialogue Strategies Using
                   Reinforcement Learning With Reduced State-Action Spaces},
  booktitle = {Proc. of INTERSPEECH},
  abstract = {Learning dialogue strategies using the reinforcement
                   learning framework is problematic due to its expensive
                   computational cost. In this paper we propose an
                   algorithm that reduces a state-action space to one
                   which includes only valid state-actions. We performed
                   experiments on full and reduced spaces using three
                   systems (with 5, 9 and 20 slots) in the travel domain
                   using a simulated environment. The task was to learn
                   multi-goal dialogue strategies optimizing single and
                   multiple confirmations. Average results using
                   strategies learnt on reduced spaces reveal the
                   following benefits against full spaces: 1) less
                   computer memory (94\% reduction), 2) faster learning
                   (93\% faster convergence) and better performance (8.4\%
                   less time steps and 7.7\% higher reward).},
  categories = {reinforcement learning, spoken dialogue systems},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/rss-icslp2006.pdf},
  year = 2006
}
@incollection{alhames-mlmi05,
  author = {M. Al-Hames and A. Dielmann and D. Gatica-Perez and S.
                   Reiter and S. Renals and G. Rigoll and D. Zhang},
  title = {Multimodal Integration for Meeting Group Action
                   Segmentation and Recognition},
  booktitle = {Proc. Multimodal Interaction and Related Machine
                   Learning Algorithms Workshop (MLMI--05)},
  publisher = {Springer},
  editor = {S. Renals and S. Bengio},
  pages = {52--63},
  abstract = {We address the problem of segmentation and recognition
                   of sequences of multimodal human interactions in
                   meetings. These interactions can be seen as a rough
                   structure of a meeting, and can be used either as input
                   for a meeting browser or as a first step towards a
                   higher semantic analysis of the meeting. A common
                   lexicon of multimodal group meeting actions, a shared
                   meeting data set, and a common evaluation procedure
                   enable us to compare the different approaches. We
                   compare three different multimodal feature sets and our
                   modelling infrastructures: a higher semantic feature
                   approach, multi-layer HMMs, a multistream DBN, as well
                   as a multi-stream mixed-state DBN for disturbed data.},
  categories = {m4,ami,multimodal,dbn,meetings,edinburgh,IDIAP,munich},
  year = 2006
}
@inproceedings{fitt_richmond_interspeech06,
  author = {Sue Fitt and Korin Richmond},
  title = {Redundancy and productivity in the speech technology
                   lexicon - can we do better?},
  booktitle = {Proc. Interspeech 2006},
  abstract = {Current lexica for speech technology typically contain
                   much redundancy, while omitting useful information. A
                   comparison with lexica in other media and for other
                   purposes is instructive, as it highlights some features
                   we may borrow for text-to-speech and speech recognition
                   lexica. We describe some aspects of the new lexicon we
                   are producing, Combilex, whose structure and
                   implementation is specifically designed to reduce
                   redundancy and improve the representation of productive
                   elements of English. Most importantly, many English
                   words are predictable derivations of baseforms, or
                   compounds. Storing the lexicon as a combination of
                   baseforms and derivational rules speeds up lexicon
                   development, and improves coverage and maintainability.},
  categories = {dictionary, lexicon, pronunciation, English accents,
                   productivity, derivation, redundancy, relational
                   database},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/Fitt_2006.pdf},
  year = 2006
}
@book{renals2006-mlmi06,
  editor = {Steve Renals and Samy Bengio and Jonathan Fiscus},
  title = {Machine learning for multimodal interaction
                   (Proceedings of {MLMI} '06)},
  publisher = {Springer-Verlag},
  volume = {4299},
  series = {Lecture Notes in Computer Science},
  year = 2006
}
@inproceedings{zhang-icslp2006,
  author = {Le Zhang and Steve Renals},
  title = {Phone Recognition Analysis for Trajectory {HMM}},
  booktitle = {Proc. Interspeech 2006},
  address = {Pittsburgh, USA},
  abstract = { The trajectory {HMM} has been shown to be useful for
                   model-based speech synthesis where a smoothed
                   trajectory is generated using temporal constraints
                   imposed by dynamic features. To evaluate the
                   performance of such model on an ASR task, we present a
                   trajectory decoder based on tree search with delayed
                   path merging. Experiment on a speaker-dependent phone
                   recognition task using the MOCHA-TIMIT database shows
                   that the MLE-trained trajectory model, while retaining
                   attractive properties of being a proper generative
                   model, tends to favour over-smoothed trajectory among
                   competing hypothesises, and does not perform better
                   than a conventional {HMM}. We use this to build an
                   argument that models giving better fit on training data
                   may suffer a reduction of discrimination by being too
                   faithful to training data. This partially explains why
                   alternative acoustic models that try to explicitly
                   model temporal constraints do not achieve significant
                   improvements in ASR. },
  key = {asr},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/zhang-icslp2006.pdf},
  year = 2006
}
@inproceedings{janin06:rt06s,
  author = {Janin, A. and Stolcke, A. and Anguera, X. and Boakye,
                   K. and Çetin, Ö. and Frankel, J. and Zheng, J.},
  title = {The {ICSI-SRI} Spring 2006 Meeting Recognition System},
  booktitle = {Proc. MLMI},
  address = {Washington DC.},
  abstract = {We describe the development of the ICSI-SRI speech
                   recognition system for the National Institute of
                   Standards and Technology (NIST) Spring 2006 Meeting
                   Rich Transcription (RT-06S) evaluation, highlighting
                   improvements made since last year, including
                   improvements to the delay-and-sum algorithm, the
                   nearfield segmenter, language models, posterior-based
                   features, HMM adaptation methods, and adapting to a
                   small amount of new lecture data. Results are reported
                   on RT-05S and RT-06S meeting data. Compared to the
                   RT-05S conference system, we achieved an overall
                   improvement of 4\% relative in the MDM and SDM
                   conditions, and 11\% relative in the IHM condition. On
                   lecture data, we achieved an overall improvement of 8\%
                   relative in the SDM condition, 12\% on MDM, 14\% on
                   ADM, and 15\% on IHM.},
  categories = {am,asr},
  month = {May},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/Janin_et_al_RT06s.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/Janin_et_al_RT06s.ps},
  year = 2006
}
@inproceedings{NistevalAMI06,
  author = {T. Hain and L. Burget and L. Burget and J. dines and
                   G. Garau and M. Karafiat and M. Lincoln and J. Vepa and
                   V. Wan},
  title = {The {AMI} Meeting Transcription System: Progress and
                   Performance},
  booktitle = {Proceedings of the Rich Transcription 2006 Spring
                   Meeting Recognition Evaluation},
  abstract = {We present the AMI 2006 system for the transcription
                   of speech in meetings. The system was jointly developed
                   by multiple sites on the basis of the 2005 system for
                   participation in the NIST RT'05 evaluations. The paper
                   describes major developments such as improvements in
                   automatic segmentation, cross-domain model adaptation,
                   inclusion of MLP based features, improvements in
                   decoding, language modelling and vocal tract length
                   normalisation, the use of a new decoder, and a new
                   system architecture. This is followed by a
                   comprehensive description of the final system and its
                   performance in the NIST RT'06s evaluations. In
                   comparison to the previous year word error rate results
                   on the individual headset microphone task were reduced
                   by 20\% relative.},
  categories = {LVCSR, NIST Meeting Transcription Evaluation RT06S},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/AMIasr.nist06.pdf},
  year = 2006
}
@article{Ximera06,
  author = {Hisashi Kawai and Tomoki Toda and Junichi Yamagishi
                   and Toshio Hirai and Jinfu Ni and Nobuyuki Nishizawa
                   and Minoru Tsuzaki and Keiichi Tokuda},
  title = {XIMERA: a concatenative speech synthesis system with
                   large scale corpora},
  journal = {IEICE Trans. Information and Systems},
  volume = {J89-D-II},
  number = 12,
  pages = {2688-2698},
  month = dec,
  year = 2006
}
@incollection{king:ELL2_2006b,
  author = {Simon King},
  title = {Handling variation in speech and language processing},
  booktitle = {Encyclopedia of Language and Linguistics},
  publisher = {Elsevier},
  editor = {Keith Brown},
  edition = {2nd},
  year = 2006
}
@inproceedings{murray06,
  author = {G. Murray and S. Renals and J. Moore and J. Carletta},
  title = {Incorporating Speaker and Discourse Features into
                   Speech Summarization},
  booktitle = {Proceedings of the Human Language Technology
                   Conference - North American Chapter of the Association
                   for Computational Linguistics Meeting (HLT-NAACL) 2006,
                   New York City, USA},
  abstract = {The research presented herein explores the usefulness
                   of incorporating speaker and discourse features in an
                   automatic speech summarization system applied to
                   meeting recordings from the ICSI Meetings corpus. By
                   analyzing speaker activity, turn-taking and discourse
                   cues, it is hypothesized that a system can outperform
                   solely text-based methods inherited from the field of
                   text summarization. The summarization methods are
                   described, two evaluation methods are applied and
                   compared, and the results clearly show that utilizing
                   such features is advantageous and efficient. Even
                   simple methods relying on discourse cues and speaker
                   activity can outperform text summarization approaches.},
  categories = {summarization, speech summarization, prosody, latent
                   semantic analysis},
  month = jun,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/hlt2006-final.pdf},
  year = 2006
}
@inproceedings{cuayahuitletal_slt06,
  author = {Heriberto Cuayáhuitl and Steve Renals and Oliver
                   Lemon and Hiroshi Shimodaira},
  title = {Reinforcement Learning of Dialogue Strategies With
                   Hierarchical Abstract Machines},
  booktitle = {Proc. of IEEE/ACL Workshop on Spoken Language
                   Technology (SLT)},
  abstract = {In this paper we propose partially specified dialogue
                   strategies for dialogue strategy optimization, where
                   part of the strategy is specified deterministically and
                   the rest optimized with Reinforcement Learning (RL). To
                   do this we apply RL with Hierarchical Abstract Machines
                   (HAMs). We also propose to build simulated users using
                   HAMs, incorporating a combination of hierarchical
                   deterministic and probabilistic behaviour. We performed
                   experiments using a single-goal flight booking dialogue
                   system, and compare two dialogue strategies
                   (deterministic and optimized) using three types of
                   simulated user (novice, experienced and expert). Our
                   results show that HAMs are promising for both dialogue
                   optimization and simulation, and provide evidence that
                   indeed partially specified dialogue strategies can
                   outperform deterministic ones (on average 4.7 fewer
                   system turns) with faster learning than the traditional
                   RL framework.},
  categories = {reinforcement learning, spoken dialogue systems},
  month = dec,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/ham-slt2006.pdf},
  year = 2006
}
@article{vepa_king_tsap05,
  author = {Jithendra Vepa and Simon King},
  title = {Subjective Evaluation of Join Cost and Smoothing
                   Methods for Unit Selection Speech Synthesis},
  journal = {IEEE Transactions on Speech and Audio Processing},
  volume = {14},
  number = {5},
  pages = {1763--1771},
  abstract = {In unit selection-based concatenative speech
                   synthesis, join cost (also known as concatenation
                   cost), which measures how well two units can be joined
                   together, is one of the main criteria for selecting
                   appropriate units from the inventory. Usually, some
                   form of local parameter smoothing is also needed to
                   disguise the remaining discontinuities. This paper
                   presents a subjective evaluation of three join cost
                   functions and three smoothing methods. We describe the
                   design and performance of a listening test. The three
                   join cost functions were taken from our previous study,
                   where we proposed join cost functions derived from
                   spectral distances, which have good correlations with
                   perceptual scores obtained for a range of concatenation
                   discontinuities. This evaluation allows us to further
                   validate their ability to predict concatenation
                   discontinuities. The units for synthesis stimuli are
                   obtained from a state-of-the-art unit selection
                   text-to-speech system: rVoice from Rhetorical Systems
                   Ltd. In this paper, we report listeners' preferences
                   for each join cost in combination with each smoothing
                   method.},
  categories = {TTS, join cost, listening test},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2005/vepa_king_ieee2005.pdf},
  year = 2006
}
@article{frankel06:adapt,
  author = {Frankel, J. and King, S.},
  title = {Observation Process Adaptation for Linear Dynamic
                   Models},
  journal = {Speech Communication},
  volume = 48,
  number = 9,
  pages = {1192-1199},
  abstract = {This work introduces two methods for adapting the
                   observation process parameters of linear dynamic models
                   (LDM) or other linear-Gaussian models. The first method
                   uses the expectation-maximization (EM) algorithm to
                   estimate transforms for location and covariance
                   parameters, and the second uses a generalized EM (GEM)
                   approach which reduces computation in making updates
                   from $O(p^6)$ to $O(p^3)$, where $p$ is the feature
                   dimension. We present the results of speaker adaptation
                   on TIMIT phone classification and recognition
                   experiments with relative error reductions of up to
                   $6\%$. Importantly, we find minimal differences in the
                   results from EM and GEM. We therefore propose that the
                   GEM approach be applied to adaptation of hidden Markov
                   models which use non-diagonal covariances. We provide
                   the necessary update equations.},
  categories = {am,asr,ldm,timit,edinburgh},
  month = {September},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/Frankel_King_SPECOM2006.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/Frankel_King_SPECOM2006.ps},
  year = 2006
}
@inproceedings{clark_blizzard2006,
  author = {Clark, R. and Richmond, K. and Strom, V. and King, S.},
  title = {Multisyn Voices for the {B}lizzard {C}hallenge 2006},
  booktitle = {Proc. Blizzard Challenge Workshop (Interspeech
                   Satellite)},
  address = {Pittsburgh, USA},
  note = {(http://festvox.org/blizzard/blizzard2006.html)},
  abstract = {This paper describes the process of building unit
                   selection voices for the Festival Multisyn engine using
                   the ATR dataset provided for the Blizzard Challenge
                   2006. We begin by discussing recent improvements that
                   we have made to the Multisyn voice building process,
                   prompted by our participation in the Blizzard Challenge
                   2006. We then go on to discuss our interpretation of
                   the results observed. Finally, we conclude with some
                   comments and suggestions for the formulation of future
                   Blizzard Challenges.},
  categories = {tts, blizzard, multisyn, unit selection},
  key = {clark_blizzard2006},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/cstr_blizzard2006.pdf},
  year = 2006
}
@inproceedings{lal_interspeech06,
  author = {Partha Lal},
  title = {A Comparison of Singing Evaluation Algorithms},
  booktitle = {Proc. Interspeech 2006},
  abstract = {This paper describes a system that compares user
                   renditions of short sung clips with the original
                   version of those clips. The F0 of both recordings was
                   estimated and then Viterbi-aligned with each other. The
                   total difference in pitch after alignment was used as a
                   distance metric and transformed into a rating out of
                   ten, to indicate to the user how close he or she was to
                   the original singer. An existing corpus of sung speech
                   was used for initial design and optimisation of the
                   system. We then collected further development and
                   evaluation corpora - these recordings were judged for
                   closeness to an original recording by two human judges.
                   The rankings assigned by those judges were used to
                   design and optimise the system. The design was then
                   implemented and deployed as part of a telephone-based
                   entertainment application.},
  categories = {automated singing evaluation, pitch tracking,
                   entertainment applications},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/singing.pdf},
  year = 2006
}
@phdthesis{calhoun:06,
  author = {Calhoun, Sasha},
  title = {Information Structure and the Prosodic Structure of
                   {E}nglish: a Probabilistic Relationship},
  school = {University of Edinburgh},
  abstract = {This thesis looks at how information structure is
                   signalled prosodically in English. It has been
                   standardly held that information structure is primarily
                   signalled by the distribution of pitch accents within
                   syntax structure, as well as intonation event type.
                   Rather, it is argued that previous work has
                   underestimated the importance, and richness, of
                   metrical prosodic structure and its role in signalling
                   information structure. A new approach is proposed: to
                   view information structure as a strong constraint on
                   the mapping of words onto metrical prosodic structure.
                   Focal elements (kontrast) align with nuclear
                   prominence, while accents on other words are not
                   usually directly 'meaningful'. Information units
                   (theme/rheme) try to align with prosodic phrases. This
                   mapping is probabilistic, so it is also influenced by
                   lexical and syntactic effects, as well as rhythmical
                   constraints and other features including emphasis.
                   Qualitative and quantitative analysis is presented in
                   support of these claims using the NXT Switchboard
                   corpus which has been annotated with substantial new
                   layers of semantic and prosodic features.},
  year = 2006
}
@inproceedings{hachey06,
  author = {B. Hachey and G. Murray and D. Reitter},
  title = {Dimensionality Reduction Aids Term Co-Occurrence Based
                   Multi-Document Summarization},
  booktitle = {Proceedings of ACL Summarization Workshop 2006,
                   Sydney, Australia},
  abstract = {A key task in an extraction system for query-oriented
                   multi-document summarisation, necessary for computing
                   relevance and redundancy, is modelling text semantics.
                   In the Embra system, we use a representation derived
                   from the singular value decomposition of a term
                   co-occurrence matrix. We present methods to show the
                   reliability of performance improvements. We find that
                   Embra performs better with dimensionality reduction.},
  categories = {summarization, latent semantic analysis},
  month = jun,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/coling-acl2006.pdf},
  year = 2006
}
@inproceedings{clark_king:proc:2006,
  author = {Robert A. J. Clark and Simon King},
  title = {Joint Prosodic and Segmental Unit Selection Speech
                   Synthesis},
  booktitle = {Proc. Interspeech 2006},
  address = {Pittsburgh, USA},
  abstract = {We describe a unit selection technique for
                   text-to-speech synthesis which jointly searches the
                   space of possible diphone sequences and the space of
                   possible prosodic unit sequences in order to produce
                   synthetic speech with more natural prosody. We
                   demonstrates that this search, although currently
                   computationally expensive, can achieve improved
                   intonation compared to a baseline in which only the
                   space of possible diphone sequences is searched. We
                   discuss ways in which the search could be made
                   sufficiently efficient for use in a real-time system.},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/clarkking_interspeech_2006.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/clarkking_interspeech_2006.ps},
  year = 2006
}
@inproceedings{bell_burrows_taylor_sp2006,
  author = {Peter Bell and Tina Burrows and Paul Taylor},
  title = {Adaptation of Prosodic Phrasing Models},
  booktitle = {Proc. Speech Prosody 2006},
  address = {Dresden, Germany},
  abstract = {There is considerable variation in the prosodic
                   phrasing of speech betweeen different speakers and
                   speech styles. Due to the time and cost of obtaining
                   large quantities of data to train a model for every
                   variation, it is desirable to develop models that can
                   be adapted to new conditions with a limited amount of
                   training data. We describe a technique for adapting
                   HMM-based phrase boundary prediction models which
                   alters a statistic distribution of prosodic phrase
                   lengths. The adapted models show improved prediction
                   performance across different speakers and types of
                   spoken material.},
  month = may,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/phrasing_sp2006.pdf},
  year = 2006
}
@incollection{king:ELL2_2006a,
  author = {Simon King},
  title = {Language variation in speech technologies},
  booktitle = {Encyclopedia of Language and Linguistics},
  publisher = {Elsevier},
  editor = {Keith Brown},
  edition = {2nd},
  year = 2006
}
@inproceedings{hsueh2006asm,
  author = {Hsueh, P. and Moore, J. and Renals, S.},
  title = {Automatic Segmentation of Multiparty Dialogue},
  booktitle = {Proc. EACL06},
  abstract = {In this paper, we investigate the prob- lem of
                   automatically predicting segment boundaries in spoken
                   multiparty dialogue. We extend prior work in two ways.
                   We first apply approaches that have been pro- posed for
                   predicting top-level topic shifts to the problem of
                   identifying subtopic boundaries. We then explore the
                   impact on performance of using ASR output as opposed to
                   human transcription. Exam- ination of the effect of
                   features shows that predicting top-level and predicting
                   subtopic boundaries are two distinct tasks: (1) for
                   predicting subtopic boundaries, the lexical
                   cohesion-based approach alone can achieve competitive
                   results, (2) for predicting top-level boundaries, the
                   ma- chine learning approach that combines
                   lexical-cohesion and conversational fea- tures performs
                   best, and (3) conversational cues, such as cue phrases
                   and overlapping speech, are better indicators for the
                   top- level prediction task. We also find that the
                   transcription errors inevitable in ASR output have a
                   negative impact on models that combine lexical-cohesion
                   and conver- sational features, but do not change the
                   general preference of approach for the two tasks. },
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/21_1_hsuehmoorerenals.pdf},
  year = 2006
}
@inproceedings{strom06,
  author = {Volker Strom and Robert Clark and Simon King},
  title = {Expressive Prosody for Unit-selection Speech Synthesis},
  booktitle = {Proc.~Interspeech},
  address = {Pittsburgh},
  abstract = {Current unit selection speech synthesis voices cannot
                   produce emphasis or interrogative contours because of a
                   lack of the necessary prosodic variation in the
                   recorded speech database. A method of recording script
                   design is proposed which addresses this shortcoming.
                   Appropriate components were added to the target cost
                   function of the Festival Multisyn engine, and a
                   perceptual evaluation showed a clear preference over
                   the baseline system.},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/strom06.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/strom06.ps},
  year = 2006
}
@incollection{al-hames2006-mlmi06,
  author = {Marc Al-Hames and Thomas Hain and Jan Cernocky and
                   Sascha Schreiber and Mannes Poel and Ronald Mueller and
                   Sebastien Marcel and David {van Leeuwen} and Jean-Marc
                   Odobez and Sileye Ba and Hervé Bourlard and Fabien
                   Cardinaux and Daniel Gatica-Perez and Adam Janin and
                   Petr Motlicek and Stephan Reiter and Steve Renals and
                   Jeroen {van Rest} and Rutger Rienks and Gerhard Rigoll
                   and Kevin Smith and Andrew Thean and Pavel Zemcik},
  title = {Audio-video processing in meetings: Seven questions
                   and current {AMI} answers},
  booktitle = {Machine Learning for Multimodal Interaction (Proc.
                   MLMI '06)},
  publisher = {Springer},
  editor = {S. Renals and S. Bengio and J. G. Fiscus},
  volume = {4299},
  series = {Lecture Notes in Computer Science},
  pages = {24--35},
  year = 2006
}
@book{renals2006-mlmi05,
  editor = {Steve Renals and Samy Bengio},
  title = {Machine learning for multimodal interaction
                   (Proceedings of {MLMI} '05)},
  publisher = {Springer-Verlag},
  volume = {3869},
  series = {Lecture Notes in Computer Science},
  year = 2006
}
@inproceedings{Shimodaira:kes06,
  author = {Chie Shimodaira and Hiroshi Shimodaira and Susumu
                   Kunifuji},
  title = {{A Divergent-Style Learning Support Tool for English
                   Learners Using a Thesaurus Diagram}},
  booktitle = {{Proc. KES2006}},
  address = {Bournemouth, United Kingdom},
  abstract = { This paper proposes an English learning support tool
                   which provides users with divergent information to find
                   the right words and expressions. In contrast to a
                   number of software tools for English translation and
                   composition, the proposed tool is designed to give
                   users not only the right answer to the user's query but
                   also a lot of words and examples which are relevant to
                   the query. Based on the lexical information provided by
                   the lexical database, WordNet, the proposed tool
                   provides users with a thesaurus diagram, in which
                   synonym sets and relation links are presented in
                   multiple windows to help users to choose adequate words
                   and understand similarities and differences between
                   words. Subjective experiments are carried out to
                   evaluate the system. },
  categories = {knowledge engineering},
  month = oct,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/kes2006.pdf},
  year = 2006
}
@inproceedings{murray06b,
  author = {G. Murray and S. Renals and M. Taboada},
  title = {Prosodic Correlates of Rhetorical Relations},
  booktitle = {Proceedings of HLT/NAACL ACTS Workshop, 2006, New York
                   City, USA},
  abstract = {This paper investigates the usefulness of prosodic
                   features in classifying rhetorical relations between
                   utterances in meeting recordings. Five rhetorical
                   relations of \textit{contrast}, \textit{elaboration},
                   \textit{summary}, \textit{question} and \textit{cause}
                   are explored. Three training methods - supervised,
                   unsupervised, and combined - are compared, and
                   classification is carried out using support vector
                   machines. The results of this pilot study are
                   encouraging but mixed, with pairwise classification
                   achieving an average of 68\% accuracy in discerning
                   between relation pairs using only prosodic features,
                   but multi-class classification performing only slightly
                   better than chance.},
  categories = {rhetorical structure theory, prosody, unsupervised
                   learning},
  month = jun,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/dacts-hlt.pdf},
  year = 2006
}
@inproceedings{Shimodaira:iwfhr06,
  author = {Junko Tokuno and Mitsuru Nakai and Hiroshi Shimodaira
                   and Shigeki Sagayama and Masaki Nakagawa},
  title = {{On-line Handwritten Character Recognition Selectively
                   employing Hierarchical Spatial Relationships among
                   Subpatterns}},
  booktitle = {{Proc. IWFHR-10}},
  address = {La Baule, France},
  abstract = { This paper proposes an on-line handwritten character
                   pattern recognition method that examines spatial
                   relationships among subpatterns which are components of
                   a character pattern. Conventional methods evaluating
                   spatial relationships among subpatterns have not
                   considered characteristics of deformed handwritings and
                   evaluate all the spatial relationships equally.
                   However, the deformations of spatial features are
                   different within a character pattern. In our approach,
                   we assume that the distortions of spatial features are
                   dependent on the hierarchy of character patterns so
                   that we selectively evaluate hierarchical spatial
                   relationships of subpatterns by employing Bayesian
                   network as a post-processor of our sub-stroke based HMM
                   recognition system. Experiments of on-line handwritten
                   Kanji character recognition with a lexicon of 1,016
                   elementary characters revealed that the approach we
                   propose improves the recognition accuracy for different
                   types of deformations. },
  categories = {online handwriting recognition},
  month = oct,
  year = 2006
}
@inproceedings{richmond2006,
  author = {Richmond, K.},
  title = {A Trajectory Mixture Density Network for the
                   Acoustic-Articulatory Inversion Mapping},
  booktitle = {Proc. Interspeech},
  address = {Pittsburgh, USA},
  abstract = {This paper proposes a trajectory model which is based
                   on a mixture density network trained with target
                   features augmented with dynamic features together with
                   an algorithm for estimating maximum likelihood
                   trajectories which respects constraints between the
                   static and derived dynamic features. This model was
                   evaluated on an inversion mapping task. We found the
                   introduction of the trajectory model successfully
                   reduced root mean square error by up to $7.5\%$, as
                   well as increasing correlation scores.},
  categories = {acoustic-articulatory, inversion mapping, MDN, MLPG,
                   trajectory modelling},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/interspeech2006_richmond.pdf},
  year = 2006
}
@inproceedings{murray06c,
  author = {G. Murray and S. Renals},
  title = {Dialogue Act Compression Via Pitch Contour
                   Preservation},
  booktitle = {Proceedings of the 9th International Conference on
                   Spoken Language Processing, Pittsburgh, USA},
  abstract = {This paper explores the usefulness of prosody in
                   automatically compressing dialogue acts from meeting
                   speech. Specifically, this work attempts to compress
                   utterances by preserving the pitch contour of the
                   original whole utterance. Two methods of doing this are
                   described in detail and are evaluated
                   \textit{subjectively} using human annotators and
                   \textit{objectively} using edit distance with a
                   human-authored gold-standard. Both metrics show that
                   such a prosodic approach is much better than the random
                   baseline approach and significantly better than a
                   simple text compression method.},
  categories = {automatic compression, prosody, summarization},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/inter2006.pdf},
  year = 2006
}