The Centre for Speech Technology Research, The university of Edinburgh

Publications by Heriberto Cuayáhuitl

s0456904.bib

@phdthesis{cuayahuitl_thesis2009,
  author = {Heriberto Cuayáhuitl},
  title = {Hierarchical Reinforcement Learning for Spoken
                   Dialogue Systems},
  school = {School of Informatics, University of Edinburgh},
  abstract = {This thesis focuses on the problem of scalable
                   optimization of dialogue behaviour in speech-based
                   conversational systems using reinforcement learning.
                   Most previous investigations in dialogue strategy
                   learning have proposed flat reinforcement learning
                   methods, which are more suitable for small-scale spoken
                   dialogue systems. This research formulates the problem
                   in terms of Semi-Markov Decision Processes (SMDPs), and
                   proposes two hierarchical reinforcement learning
                   methods to optimize sub-dialogues rather than full
                   dialogues. The first method uses a hierarchy of SMDPs,
                   where every SMDP ignores irrelevant state variables and
                   actions in order to optimize a sub-dialogue. The second
                   method extends the first one by constraining every SMDP
                   in the hierarchy with prior expert knowledge. The
                   latter method proposes a learning algorithm called
                   'HAM+HSMQ-Learning', which combines two existing
                   algorithms in the literature of hierarchical
                   reinforcement learning. Whilst the first method
                   generates fully-learnt behaviour, the second one
                   generates semi-learnt behaviour. In addition, this
                   research proposes a heuristic dialogue simulation
                   environment for automatic dialogue strategy learning.
                   Experiments were performed on simulated and real
                   environments based on a travel planning spoken dialogue
                   system. Experimental results provided evidence to
                   support the following claims: First, both methods scale
                   well at the cost of near-optimal solutions, resulting
                   in slightly longer dialogues than the optimal
                   solutions. Second, dialogue strategies learnt with
                   coherent user behaviour and conservative recognition
                   error rates can outperform a reasonable hand-coded
                   strategy. Third, semi-learnt dialogue behaviours are a
                   better alternative (because of their higher overall
                   performance) than hand-coded or fully-learnt dialogue
                   behaviours. Last, hierarchical reinforcement learning
                   dialogue agents are feasible and promising for the
                   (semi) automatic design of adaptive behaviours in
                   larger-scale spoken dialogue systems. This research
                   makes the following contributions to spoken dialogue
                   systems which learn their dialogue behaviour. First,
                   the Semi-Markov Decision Process (SMDP) model was
                   proposed to learn spoken dialogue strategies in a
                   scalable way. Second, the concept of 'partially
                   specified dialogue strategies' was proposed for
                   integrating simultaneously hand-coded and learnt spoken
                   dialogue behaviours into a single learning framework.
                   Third, an evaluation with real users of hierarchical
                   reinforcement learning dialogue agents was essential to
                   validate their effectiveness in a realistic
                   environment.},
  key = {spoken dialogue systems, (semi-)automatic dialogue
                   strategy design, hierarchical control, prior expert
                   knowledge, Semi-Markov decision processes, hierarchical
                   reinforcement learning},
  month = {January},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2009/PhDThesis-HeribertoCuayahuitl-Final.pdf},
  year = 2009
}
@inproceedings{cuayahuitletal_interspeech06,
  author = {Heriberto Cuayáhuitl and Steve Renals and Oliver
                   Lemon and Hiroshi Shimodaira},
  title = {Learning Multi-Goal Dialogue Strategies Using
                   Reinforcement Learning With Reduced State-Action Spaces},
  booktitle = {Proc. of INTERSPEECH},
  abstract = {Learning dialogue strategies using the reinforcement
                   learning framework is problematic due to its expensive
                   computational cost. In this paper we propose an
                   algorithm that reduces a state-action space to one
                   which includes only valid state-actions. We performed
                   experiments on full and reduced spaces using three
                   systems (with 5, 9 and 20 slots) in the travel domain
                   using a simulated environment. The task was to learn
                   multi-goal dialogue strategies optimizing single and
                   multiple confirmations. Average results using
                   strategies learnt on reduced spaces reveal the
                   following benefits against full spaces: 1) less
                   computer memory (94\% reduction), 2) faster learning
                   (93\% faster convergence) and better performance (8.4\%
                   less time steps and 7.7\% higher reward).},
  categories = {reinforcement learning, spoken dialogue systems},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/rss-icslp2006.pdf},
  year = 2006
}
@article{cuayahuitl2009,
  author = {Cuayáhuitl, Heriberto and Renals, Steve and Lemon,
                   Oliver and Shimodaira, Hiroshi},
  title = {Evaluation of a hierarchical reinforcement learning
                   spoken dialogue system},
  journal = {Computer Speech and Language},
  volume = {24},
  number = {2},
  pages = {395-429},
  abstract = {We describe an evaluation of spoken dialogue
                   strategies designed using hierarchical reinforcement
                   learning agents. The dialogue strategies were learnt in
                   a simulated environment and tested in a laboratory
                   setting with 32 users. These dialogues were used to
                   evaluate three types of machine dialogue behaviour:
                   hand-coded, fully-learnt and semi-learnt. These
                   experiments also served to evaluate the realism of
                   simulated dialogues using two proposed metrics
                   contrasted with ‘Precision-Recall’. The learnt
                   dialogue behaviours used the Semi-Markov Decision
                   Process (SMDP) model, and we report the first
                   evaluation of this model in a realistic conversational
                   environment. Experimental results in the travel
                   planning domain provide evidence to support the
                   following claims: (a) hierarchical semi-learnt dialogue
                   agents are a better alternative (with higher overall
                   performance) than deterministic or fully-learnt
                   behaviour; (b) spoken dialogue strategies learnt with
                   highly coherent user behaviour and conservative
                   recognition error rates (keyword error rate of 20\%)
                   can outperform a reasonable hand-coded strategy; and
                   (c) hierarchical reinforcement learning dialogue agents
                   are feasible and promising for the (semi) automatic
                   design of optimized dialogue behaviours in larger-scale
                   systems.},
  doi = {10.1016/j.csl.2009.07.001},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2009/cuayahuitl-csl09.pdf},
  year = 2009
}
@inproceedings{cuayahuitletal_slt06,
  author = {Heriberto Cuayáhuitl and Steve Renals and Oliver
                   Lemon and Hiroshi Shimodaira},
  title = {Reinforcement Learning of Dialogue Strategies With
                   Hierarchical Abstract Machines},
  booktitle = {Proc. of IEEE/ACL Workshop on Spoken Language
                   Technology (SLT)},
  abstract = {In this paper we propose partially specified dialogue
                   strategies for dialogue strategy optimization, where
                   part of the strategy is specified deterministically and
                   the rest optimized with Reinforcement Learning (RL). To
                   do this we apply RL with Hierarchical Abstract Machines
                   (HAMs). We also propose to build simulated users using
                   HAMs, incorporating a combination of hierarchical
                   deterministic and probabilistic behaviour. We performed
                   experiments using a single-goal flight booking dialogue
                   system, and compare two dialogue strategies
                   (deterministic and optimized) using three types of
                   simulated user (novice, experienced and expert). Our
                   results show that HAMs are promising for both dialogue
                   optimization and simulation, and provide evidence that
                   indeed partially specified dialogue strategies can
                   outperform deterministic ones (on average 4.7 fewer
                   system turns) with faster learning than the traditional
                   RL framework.},
  categories = {reinforcement learning, spoken dialogue systems},
  month = dec,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/ham-slt2006.pdf},
  year = 2006
}
@inproceedings{cuayahuitletal_interspeech07,
  author = {Heriberto Cuayáhuitl and Steve Renals and Oliver
                   Lemon and Hiroshi Shimodaira},
  title = {Hierarchical Dialogue Optimization Using Semi-Markov
                   Decision Processes},
  booktitle = {Proc. of INTERSPEECH},
  abstract = {This paper addresses the problem of dialogue
                   optimization on large search spaces. For such a
                   purpose, in this paper we propose to learn dialogue
                   strategies using multiple Semi-Markov Decision
                   Processes and hierarchical reinforcement learning. This
                   approach factorizes state variables and actions in
                   order to learn a hierarchy of policies. Our experiments
                   are based on a simulated flight booking dialogue system
                   and compare flat versus hierarchical reinforcement
                   learning. Experimental results show that the proposed
                   approach produced a dramatic search space reduction
                   (99.36\%), and converged four orders of magnitude
                   faster than flat reinforcement learning with a very
                   small loss in optimality (on average 0.3 system turns).
                   Results also report that the learnt policies
                   outperformed a hand-crafted one under three different
                   conditions of ASR confidence levels. This approach is
                   appealing to dialogue optimization due to faster
                   learning, reusable subsolutions, and scalability to
                   larger problems.},
  categories = {Spoken dialogue systems, semi-Markov decision
                   processes, hierarchical reinforcement learning.},
  month = aug,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2007/SMDPs-interspeech2007.pdf},
  year = 2007
}
@inproceedings{cuayahuitletal_asru05,
  author = {Heriberto Cuayáhuitl and Steve Renals and Oliver
                   Lemon and Hiroshi Shimodaira},
  title = {Human-Computer Dialogue Simulation Using Hidden Markov
                   Models},
  booktitle = {Proc. of IEEE Workshop on Automatic Speech Recognition
                   and Understanding (ASRU)},
  abstract = {This paper presents a probabilistic method to simulate
                   task-oriented human-computer dialogues at the intention
                   level, that may be used to improve or to evaluate the
                   performance of spoken dialogue systems. Our method uses
                   a network of Hidden Markov Models (HMMs) to predict
                   system and user intentions, where a ``language model''
                   predicts sequences of goals and the component HMMs
                   predict sequences of intentions. We compare standard
                   HMMs, Input HMMs and Input-Output HMMs in an effort to
                   better predict sequences of intentions. In addition, we
                   propose a dialogue similarity measure to evaluate the
                   realism of the simulated dialogues. We performed
                   experiments using the DARPA Communicator corpora and
                   report results with three different metrics: dialogue
                   length, dialogue similarity and precision-recall.},
  categories = {dialogue simulation, hidden markov models},
  month = nov,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2005/hcp-asru2005.pdf},
  year = 2005
}