The Centre for Speech Technology Research, The university of Edinburgh

Publications by Hiroshi Shimodaira

hshimoda.bib

@article{Kawamoto2002IPSJ07,
  author = {Shin-ichi Kawamoto and Hiroshi Shimodaira and others},
  title = {{Design of Software Toolkit for Anthromorphic Spoken
                   Dialog Agent Software with Customization-oriented
                   Features}},
  journal = {Information Processing Society of Japan (IPSJ) Journal},
  volume = {43},
  number = {7},
  pages = {2249--2263},
  note = {(in Japanese)},
  month = jul,
  year = 2002
}
@inproceedings{cuayahuitletal_interspeech06,
  author = {Heriberto Cuayáhuitl and Steve Renals and Oliver
                   Lemon and Hiroshi Shimodaira},
  title = {Learning Multi-Goal Dialogue Strategies Using
                   Reinforcement Learning With Reduced State-Action Spaces},
  booktitle = {Proc. of INTERSPEECH},
  abstract = {Learning dialogue strategies using the reinforcement
                   learning framework is problematic due to its expensive
                   computational cost. In this paper we propose an
                   algorithm that reduces a state-action space to one
                   which includes only valid state-actions. We performed
                   experiments on full and reduced spaces using three
                   systems (with 5, 9 and 20 slots) in the travel domain
                   using a simulated environment. The task was to learn
                   multi-goal dialogue strategies optimizing single and
                   multiple confirmations. Average results using
                   strategies learnt on reduced spaces reveal the
                   following benefits against full spaces: 1) less
                   computer memory (94\% reduction), 2) faster learning
                   (93\% faster convergence) and better performance (8.4\%
                   less time steps and 7.7\% higher reward).},
  categories = {reinforcement learning, spoken dialogue systems},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/rss-icslp2006.pdf},
  year = 2006
}
@article{Ho2003Applied,
  author = {Tu Bao Ho and Trong Dung Nguyen and Hiroshi Shimodaira
                   and Masayuki Kimura},
  title = {{A Knowledge Discovery System with Support for Model
                   Selection and Visualization}},
  journal = {Applied Intelligence},
  volume = {19},
  number = {},
  pages = {125--141},
  categories = {KDD},
  year = 2003
}
@article{Keeni1996IEICE,
  author = {Kanad Keeni and Hiroshi Shimodaira and Tetsuro Nishino
                   and Yasuo Tan},
  title = {{Recognition of Devanagari Characters Using Neural
                   Networks}},
  journal = {IEICE},
  volume = {E79-D},
  number = {5},
  pages = {523--528},
  categories = {character-recognition, ann, jaist},
  month = may,
  year = 1996
}
@misc{Carnival_SIGGRAPH_2010,
  author = {Michael Berger and Gregor Hofer and Hiroshi Shimodaira},
  title = {Carnival: a modular framework for automated facial
                   animation},
  howpublished = {Poster at SIGGRAPH 2010},
  note = {Bronze award winner, ACM Student Research Competition},
  abtract = {We present a software framework for speech- or
                   text-driven animation--including a platform-independent
                   API and an application implementing it--which unifies
                   state-of-the-art speech technology and graphics
                   technology within a single system.},
  address = {Los Angeles, Calif., USA},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2010/carnival.pdf},
  year = 2010
}
@inproceedings{Sagayama2001ISCA08a,
  author = {Shigeki Sagayama and Yutaka Kato and Mitsuru Nakai and
                   Hiroshi Shimodaira},
  title = {{Jacobian Approach to Joint Adaptation to Noise,
                   Channel and Vocal Tract Length}},
  booktitle = {Proc. ISCA Workshop on Adaptation Methods (Sophia
                   Antipolis, France)},
  pages = {117--120},
  categories = {asr, jaist},
  month = aug,
  year = 2001
}
@inproceedings{Shimodaira1997Eurospeech,
  author = {Hiroshi Shimodaira and Mitsuru Nakai and Akihiro
                   Kumata},
  title = {{Restration of Pitch Pattern of Speech Based on a
                   Pitch Gereration Model}},
  booktitle = {Proc. EuroSpeech'97},
  pages = {512--524},
  abstract = { In this paper a model-based approach for restoring a
                   continuous fundamental frequency (F0) contour from the
                   noisy output of an F0 extractor is investigated. In
                   contrast to the conventional pitch trackers based on
                   numerical curve-fitting, the proposed method employs a
                   quantitative pitch generation model, which is often
                   used for synthesizing F0 contour from prosodic event
                   commands for estimating continuous F0 pattern. An
                   inverse filtering technique is introduced for obtaining
                   the initial candidates of the prosodic commands. In
                   order to find the optimal command sequence from the
                   commands efficiently, a beam-search algorithm and an
                   N-best technique are employed. Preliminary experiments
                   for a male speaker of the ATR B-set database showed
                   promising results both in quality of the restored
                   pattern and estimation of the prosodic events. },
  categories = {f0, jaist},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1997/euro97.pdf},
  year = 1997
}
@inproceedings{Shimodaira1998ICSLP,
  author = {Hiroshi Shimodaira and Jun Rokui and Mitsuru Nakai},
  title = {{Improving The Generalization Performance Of The
                   MCE/GPD Learning}},
  booktitle = {ICSLP'98, Australia},
  abstract = { A novel method to prevent the over-fitting effect and
                   improve the generalization performance of the Minimum
                   Classification Error (MCE) / Generalized Probabilistic
                   Descent (GPD) learning is proposed. The MCE/GPD method,
                   which is one of the newest discriminative-learning
                   approaches proposed by Katagiri and Juang in 1992,
                   results in better recognition performance in various
                   areas of pattern recognition than the
                   maximum-likelihood (ML) based approach where a
                   posteriori probabilities are estimated. Despite its
                   superiority in recognition performance, it still
                   suffers from the problem of over-fitting to the
                   training samples as it is with other learning
                   algorithms. In the present study, a regularization
                   technique is employed to the MCE method to overcome
                   this problem. Feed-forward neural networks are employed
                   as a recognition platform to evaluate the recognition
                   performance of the proposed method. Recognition
                   experiments are conducted on several sorts of datasets.
                   The proposed method shows better generalization
                   performance than the original one. },
  categories = {lifelike-agent, jaist},
  month = dec,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Shimodaira1998ICSLP.pdf},
  year = 1998
}
@inproceedings{Keeni1998ICPR,
  author = {Kanad Keeni and Kenji Nakayama and Hiroshi Shimodaira},
  title = {{Automatic Generation of Initial Weights and
                   Estimation of Hidden Units for Pattern Classification
                   Using Neural Networks}},
  booktitle = {14th International Conference on Pattern Recognition
                   (ICPR'98)},
  pages = {1568--1571},
  categories = {ann, jaist},
  month = aug,
  year = 1998
}
@article{cuayahuitl2009,
  author = {Cuayáhuitl, Heriberto and Renals, Steve and Lemon,
                   Oliver and Shimodaira, Hiroshi},
  title = {Evaluation of a hierarchical reinforcement learning
                   spoken dialogue system},
  journal = {Computer Speech and Language},
  volume = {24},
  number = {2},
  pages = {395-429},
  abstract = {We describe an evaluation of spoken dialogue
                   strategies designed using hierarchical reinforcement
                   learning agents. The dialogue strategies were learnt in
                   a simulated environment and tested in a laboratory
                   setting with 32 users. These dialogues were used to
                   evaluate three types of machine dialogue behaviour:
                   hand-coded, fully-learnt and semi-learnt. These
                   experiments also served to evaluate the realism of
                   simulated dialogues using two proposed metrics
                   contrasted with ‘Precision-Recall’. The learnt
                   dialogue behaviours used the Semi-Markov Decision
                   Process (SMDP) model, and we report the first
                   evaluation of this model in a realistic conversational
                   environment. Experimental results in the travel
                   planning domain provide evidence to support the
                   following claims: (a) hierarchical semi-learnt dialogue
                   agents are a better alternative (with higher overall
                   performance) than deterministic or fully-learnt
                   behaviour; (b) spoken dialogue strategies learnt with
                   highly coherent user behaviour and conservative
                   recognition error rates (keyword error rate of 20\%)
                   can outperform a reasonable hand-coded strategy; and
                   (c) hierarchical reinforcement learning dialogue agents
                   are feasible and promising for the (semi) automatic
                   design of optimized dialogue behaviours in larger-scale
                   systems.},
  doi = {10.1016/j.csl.2009.07.001},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2009/cuayahuitl-csl09.pdf},
  year = 2009
}
@inproceedings{Keeni1997ICDAR,
  author = {Kanad Keeni and Hiroshi Shimodaira and Kenji Nakayama},
  title = {{On Distributed Representation of Output Layer for
                   Recognizing Japanese Kana Characters Using Neural
                   Networks}},
  booktitle = {Proceedings of the 4'th International Conference on
                   Document Analysis and Recognition, ICDAR'97},
  pages = {600--603},
  note = {Ulm, Germany},
  categories = {hwr, ann, jaist},
  month = jul,
  year = 1997
}
@inproceedings{Matsuda2000ICSLP10,
  author = {Shigeki Matsuda and Mitsuru Nakai and Hiroshi
                   Shimodaira and Shigeki Sagayama},
  title = {{Feature-dependent Allophone Clustering}},
  booktitle = {Proc. ICSLP2000},
  pages = {413--416},
  abstract = { We propose a novel method for clustering allophones
                   called Feature-Dependent Allophone Clustering (FD-AC)
                   that determines feature-dependent HMM topology
                   automatically. Existing methods for allophone
                   clustering are based on parameter sharing between the
                   allophone models that resemble each other in behaviors
                   of feature vector sequences. However, all the features
                   of the vector sequences may not necessarily have a
                   common allophone clustering structures. It is
                   considered that the vector sequences can be better
                   modeled by allocating the optimal allophone clustering
                   structure to each feature. In this paper, we propose
                   Feature-Dependent Successive State Splitting (FD-SSS)
                   as an implementation of FD-AC. In speaker-dependent
                   continuous phoneme recognition experiments, HMMs
                   created by FD-SSS reduced the error rates by about 10%
                   compared with the conventional HMMs that have a common
                   allophone clustering structure for all the features. },
  categories = {asr, atr, jaist},
  journal = {},
  month = oct,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Matsuda2000ICSLP10.pdf},
  year = 2000
}
@inproceedings{Tokuno2002IWFHR,
  author = {Junko Tokuno and Nobuhito Inami and Shigeki Matsuda
                   and Mitsuru Nakai and Hiroshi Shimodaira and Shigeki
                   Sagayama},
  title = {{Context-Dependent Substroke Model for {HMM}-based
                   On-line Handwriting Recognition}},
  booktitle = {Proc. of IWFHR-8},
  pages = {78--83},
  note = {},
  abstract = {This paper describes an effective modeling technique
                   in the on-line recognition for cursive Kanji
                   handwritings and Hiragana handwritings. Our
                   conventional recognition system based on substroke HMMs
                   (hidden Markov models) employs straight-type substrokes
                   as primary models and has achieved high recognition
                   rate in the recognition of careful Kanji handwritings.
                   On the other hand, the recognition rate for cursive
                   handwritings is comparatively low, since they consist
                   of mainlycurve-strokes. Therefore, we propose a
                   technique of using multiple models for each substroke
                   by considering the substroke context, which is a
                   preceding substroke and a following substroke. In order
                   to construct these context-dependent models
                   efficiently, we use the SSS (Successive State
                   Splitting) algorithm developed in speech recognition.
                   Through the experiments, the recognition rate improved
                   from 88\% to 92\% for cursive Kanji handwritings and
                   from 90\% to 98\% for Hiragana handwritings.},
  journal = {},
  month = aug,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2002/Tokuno2002IWFHR.pdf},
  year = 2002
}
@inproceedings{Nakai1995ICASSP,
  author = {Mitsuru Nakai and Singer Harald and Yoshinori Sagisaka
                   and Hiroshi Shimodaira},
  title = {{Automatic Prosodic Segmentation by F0 Clustering
                   Using Superpositional Modeling}},
  booktitle = {Proc. ICASSP-95, PR08.6},
  pages = {624--627},
  categories = {F0, atr, jaist},
  journal = {},
  month = may,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1995/Nakai1995ICASSP.pdf},
  year = 1995
}
@inproceedings{Koba1995HCIb,
  author = {Hisao Koba and hiroshi Shimodaira and Masayuki Kimura},
  title = {{Intelligent Automatic Document Transcription System
                   for Braille: To Improve Accessibility to Printed Matter
                   for the Visually Impaired}},
  booktitle = {HIC International'95},
  month = jul,
  year = 1995
}
@inproceedings{Shimodaira:mlmi05,
  author = {Hiroshi Shimodaira and Keisuke Uematsu and Shin'ichi
                   Kawamoto and Gregor Hofer and Mitsuru Nakai},
  title = {{Analysis and Synthesis of Head Motion for Lifelike
                   Conversational Agents}},
  booktitle = {Proc. MLMI2005},
  categories = {lifelike agents},
  month = jul,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2005/mlmi2005.pdf},
  year = 2005
}
@inproceedings{Keeni1999IJCNN,
  author = {Kanad Keeni and Kenji Nakayama and Hiroshi Shimodaira},
  title = {{Estimation of Initial Weights and Hidden Units for
                   Fast Learning of Multi-layer Neural Networks for
                   Pattern Classification}},
  booktitle = {IEEE International Joint Conference on Neural Networks
                   (IJCNN'99)},
  categories = {ann, jaist},
  journal = {},
  month = jul,
  year = 1999
}
@inproceedings{Shimodaira1994ICASSP,
  author = {Hiroshi Shimodaira and Mitsuru Nakai},
  title = {Prosodic Phrase Segmentation by Pitch Pattern
                   Clustering},
  booktitle = {Proc. ICASSP-94, 76.5, vol.II},
  pages = {185--188},
  note = {},
  abstract = { This paper proposes a novel method for detecting the
                   optimal sequence of prosodic phrases from continuous
                   speech based on data-driven approach. The pitch pattern
                   of input speech is divided into prosodic segments which
                   minimized the overall distortion with pitch pattern
                   templates of accent phrases by using the One Pass
                   search algorithm. The pitch pattern templates are
                   designed by clustering a large number of training
                   samples of accent phrases. On the ATR continuous speech
                   database uttered by 10 speakers, the rate of correct
                   segmentation was 91.7 \% maximum for the same sex data
                   of training and testing, 88.6 \% for the opposite sex. },
  categories = {F0, jaist},
  month = mar,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1994/Shimodaira1994ICASSP.pdf},
  year = 1994
}
@article{Iida1998IEICE06,
  author = {Eiji Iida and Susumu Kunifuji and Hiroshi Shimodaira
                   and Masayuki Kimura},
  title = {{A Scale-Down Solution of N^2-1 Puzzle}},
  journal = {Trans. IEICE(D-I)},
  volume = {J81-D-I},
  number = {6},
  pages = {604--614},
  note = {(in Japanese)},
  categories = {puzzle, jaist},
  month = jun,
  year = 1998
}
@article{Rokui2002IPSJ07,
  author = {Jun Rokui and Mitsuru Nakai and Hiroshi Shimodaira and
                   Shigeki Sagayama},
  title = {{Speaker Normalization Using Linear Transformation of
                   Vocal Tract Length Based on Maximum Likelihood
                   Estimation}},
  journal = {Information Processing Society of Japan (IPSJ)},
  volume = {43},
  number = {7},
  pages = {2030--2037},
  note = {(in Japanese)},
  abstract = { },
  categories = {asr, jaist},
  month = jul,
  year = 2002
}
@inproceedings{taylor:shimodaira:isard:king:kowtko:icslp1996,
  author = {Paul A. Taylor and Hiroshi Shimodaira and Stephen
                   Isard and Simon King and Jacqueline Kowtko},
  title = {Using Prosodic Information to Constrain Language
                   Models for Spoken dialogue},
  booktitle = {Proc. {ICSLP} `96},
  address = {Philadelphia},
  abstract = {We present work intended to improve speech recognition
                   performance for computer dialogue by taking into
                   account the way that dialogue context and intonational
                   tune interact to limit the possibilities for what an
                   utterance might be. We report here on the extra
                   constraint achieved in a bigram language model
                   expressed in terms of entropy by using separate
                   submodels for different sorts of dialogue acts and
                   trying to predict which submodel to apply by analysis
                   of the intonation of the sentence being recognised.},
  categories = {asr, intonation, dialogue, lm,id4s},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1996/Taylor_1996_a.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1996/Taylor_1996_a.ps},
  year = 1996
}
@article{Otsuki2002IPSJ,
  author = {Tomoshi Otsuki and Naoki Saitou and Mitsuru Nakai and
                   Hiroshi Shimodaira and Shigeki Sagayama},
  title = {{Musical Rhythm Recognition Using Hidden Markov Model}},
  journal = {Information Processing Society of Japan (IPSJ) JOURNAL},
  volume = {43},
  number = {2},
  note = {(in Japanese)},
  month = feb,
  year = 2002
}
@inproceedings{Bao1997-1,
  author = {Tu Bao Ho and Nguyen Trong Dung and Hiroshi Shimodaira
                   and Masayuki Kimura},
  title = {{An Interactive-Graphic Environment for Discovering
                   and Using Conceptual Knowledge}},
  booktitle = {7th European-Japanese Conference on Information
                   Modelling and Knowledge Bases},
  pages = {327--343},
  categories = {kdd, jaist},
  month = may,
  year = 1997
}
@article{Matsuda2003IEICE06,
  author = {Shigeki Matsuda and Mitsuru Nakai and Hiroshi
                   Shimodaira and Shigeki Sagayama},
  title = {{Speech Recognition Using Asynchronous Transition
                   {HMM}}},
  journal = {IEICE Trans. D-II},
  volume = {J86-D-II},
  number = {6},
  pages = {741--754},
  note = {(in Japanese)},
  abstract = {We propose asynchronous-transition HMM (AT-HMM) that
                   is based on asynchronous transition structures among
                   individual features of acoustic feature vector
                   sequences. Conventional HMM represents vector sequences
                   by using a chain of states, each state has vector
                   distributions of multi-dimensions. Therefore, the
                   conventional HMM assumes that individual features
                   change synchronously. However, this assumption seems
                   over-simplified for modeling the temporal behavior of
                   acoustic features, since cepstrum and its
                   time-derivative can not synchronize with each other. In
                   speaker-dependent continuous phoneme recognition task,
                   the AT-HMMs reduced errors by 10\% to 40\%. In
                   speaker-independent task, the performance of the
                   AT-HMMs was comparable to conventional HMMs.},
  categories = {asr, jaist},
  month = jun,
  year = 2003
}
@article{Nakai2005IEICE01,
  author = {Mitsuru Nakai and Shigeki Sagayama and Hiroshi
                   Shimodaira},
  title = {{On-line Handwriting Recognition Based on Sub-stroke
                   {HMM}}},
  journal = {Trans. IEICE D-II},
  volume = {J88-D2},
  number = {8},
  note = {(in press) (in Japanese)},
  abstract = { This paper describes context-dependent sub-stroke
                   HMMs for on-line handwritten character recognition. As
                   there are so many characters in Japanese, modeling each
                   character by an HMM leads to an infeasible
                   character-recognition system requiring huge amount of
                   memory and enormous computation time. The sub-stroke
                   HMM approach has overcomed these problems by minimizing
                   modeling unit. However, one of the drawback of this
                   approach is that the recognition accuracy deteriorates
                   for scribbled characters. In this paper, we show that
                   the context-dependent sub-stroke modeling which depends
                   on how the sub-stroke connects to the adjacent
                   substrokes is effective to achieve robust recognition
                   of low quality characters. },
  categories = {online handwritten character recognition},
  month = aug,
  year = 2005
}
@inproceedings{Fujinaga2001ICASSP,
  author = {Katsuhisa Fujinaga and Mitsuru Nakai and Hiroshi
                   Shimodaira and Shigeki Sagayama},
  title = {{Multiple-Regression Hidden Markov Model}},
  booktitle = {Proc. ICASSP 2001},
  categories = {asr, jaist},
  month = may,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Fujinaga2001ICASSP.pdf},
  year = 2001
}
@inproceedings{Shimodaira2003ICDAR,
  author = {Hiroshi Shimodaira and Takashi Sudo and Mitsuru Nakai
                   and Shigeki Sagayama},
  title = {{On-line Overlaid-Handwriting Recognition Based on
                   Substroke {HMM}s}},
  booktitle = {ICDAR'03},
  pages = {1043--1047},
  abstract = {This paper proposes a novel handwriting recognition
                   interface for wearable computing where users write
                   characters continuously without pauses on a small
                   single writing box. Since characters are written on the
                   same writing area, they are overlaid with each other.
                   Therefore the task is regarded as a special case of the
                   continuous character recognition problem. In contrast
                   to the conventional continuous character recognition
                   problem, location information of strokes does not help
                   very much in the proposed framework. To tackle the
                   problem, substroke based hidden Markov models (HMMs)
                   and a stochastic bigram language model are employed.
                   Preliminary experiments were carried out on a dataset
                   of 578 handwriting sequences with a character bigram
                   consisting of 1,016 Japanese educational Kanji and 71
                   Hiragana characters. The proposed method demonstrated
                   promising performance with 69.2\% of handwriting
                   sequences beeing correctly recognized when different
                   stroke order was permitted, and the rate was improved
                   up to 88.0\% when characters were written with fixed
                   stroke order.},
  categories = {HWR, jaist},
  journal = {},
  month = aug,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2003/Shimodaira2003ICDAR.pdf},
  year = 2003
}
@article{Nakai1994IEICE06,
  author = {Mitsuru Nakai and Hiroshi Shimodaira and Shigeki
                   Sagayama},
  title = {{Prosodic Phrase Segmentation Based on Pitch-Pattern
                   Clustering}},
  journal = {Electronics and Communications in Japan, Part 3},
  volume = {77},
  number = {6},
  pages = {80--91},
  note = {(in Japanese)},
  categories = {F0, jaist},
  month = jun,
  year = 1994
}
@inproceedings{cuayahuitletal_slt06,
  author = {Heriberto Cuayáhuitl and Steve Renals and Oliver
                   Lemon and Hiroshi Shimodaira},
  title = {Reinforcement Learning of Dialogue Strategies With
                   Hierarchical Abstract Machines},
  booktitle = {Proc. of IEEE/ACL Workshop on Spoken Language
                   Technology (SLT)},
  abstract = {In this paper we propose partially specified dialogue
                   strategies for dialogue strategy optimization, where
                   part of the strategy is specified deterministically and
                   the rest optimized with Reinforcement Learning (RL). To
                   do this we apply RL with Hierarchical Abstract Machines
                   (HAMs). We also propose to build simulated users using
                   HAMs, incorporating a combination of hierarchical
                   deterministic and probabilistic behaviour. We performed
                   experiments using a single-goal flight booking dialogue
                   system, and compare two dialogue strategies
                   (deterministic and optimized) using three types of
                   simulated user (novice, experienced and expert). Our
                   results show that HAMs are promising for both dialogue
                   optimization and simulation, and provide evidence that
                   indeed partially specified dialogue strategies can
                   outperform deterministic ones (on average 4.7 fewer
                   system turns) with faster learning than the traditional
                   RL framework.},
  categories = {reinforcement learning, spoken dialogue systems},
  month = dec,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/ham-slt2006.pdf},
  year = 2006
}
@inproceedings{Nakai1997Eurospeech,
  author = {Mitsuru Nakai and Hiroshi Shimodaira},
  title = {{On Representation of Fundamental Frequency of Speech
                   for Prosody Analysis Using Reliability Function}},
  booktitle = {Proc. EuroSpeech'97},
  pages = {243--246},
  categories = {f0, jaist},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1997/Nakai1997Eurospeech.pdf},
  year = 1997
}
@inproceedings{dziemianko_interspeech2009,
  author = {Michal Dziemianko and Gregor Hofer and Hiroshi
                   Shimodaira},
  title = {{HMM}-Based Automatic Eye-Blink Synthesis from Speech},
  booktitle = {Proc. Interspeech},
  pages = {1799--1802},
  address = {Brighton, UK},
  abstract = {In this paper we present a novel technique to
                   automatically synthesise eye blinking from a speech
                   signal. Animating the eyes of a talking head is
                   important as they are a major focus of attention during
                   interaction. The developed system predicts eye blinks
                   from the speech signal and generates animation
                   trajectories automatically employing a ''Trajectory
                   Hidden Markov Model''. The evaluation of the
                   recognition performance showed that the timing of
                   blinking can be predicted from speech with an F-score
                   value upwards of 52\%, which is well above chance.
                   Additionally, a preliminary perceptual evaluation was
                   conducted, that confirmed that adding eye blinking
                   significantly improves the perception the character.
                   Finally it showed that the speech synchronised
                   synthesised blinks outperform random blinking in
                   naturalness ratings.},
  categories = {animation, motion synthesis, time series analysis,
                   trajectory model},
  month = {September},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2009/dziemianko_interspeech2009.pdf},
  year = 2009
}
@inproceedings{Keeni2003ICEIS,
  author = {Kanad Keeni and Kunio Goto and Hiroshi Shimodaira},
  title = {{On fast learning of Multi-layer Feed-forward Neural
                   Networks Using Back Propagation}},
  booktitle = {International Conference on Enterprise and Information
                   Systems (ICEIS2003)},
  pages = {266--271},
  abstract = {This study discusses the subject of training data
                   selection for neural networks using back propagation.
                   We have made only one assumption that there are no
                   overlapping of training data belonging to different
                   classes, in other words the training data is
                   linearly/semi-linearly separable . Training data is
                   analyzed and the data that affect the learning process
                   are selected based on the idea of Critical points. The
                   proposed method is applied to a classification problem
                   where the task is to recognize the characters A,C and
                   B,D. The experimental results show that in case of
                   batch mode the proposed method takes almost 1/7 of real
                   and 1/10 of user training time required for
                   conventional method. On the other hand in case of
                   online mode the proposed method takes 1/3 of training
                   epochs, 1/9 of real and 1/20 of user and 1/3 system
                   time required for the conventional method. The
                   classification rate of training and testing data are
                   the same as it is with the conventional method. },
  month = apr,
  year = 2003
}
@article{Shimodaira2001NIPS,
  author = {Hiroshi Shimodaira and Ken-ichi Noma and Mitsuru Nakai
                   and Shigeki Sagayama},
  title = {{Dynamic Time-Alignment Kernel in Support Vector
                   Machine}},
  journal = {Advances in Neural Information Processing Systems 14,
                   NIPS2001},
  volume = {2},
  pages = {921--928},
  abstract = { A new class of Support Vector Machine (SVM) that is
                   applicable to sequential-pattern recognition such as
                   speech recognition is developed by incorporating an
                   idea of non-linear time alignment into the kernel
                   function. Since the time-alignment operation of
                   sequential pattern is embedded in the new kernel
                   function, standard SVM training and classification
                   algorithms can be employed without further
                   modifications. The proposed SVM (DTAK-SVM) is evaluated
                   in speaker-dependent speech recognition experiments of
                   hand-segmented phoneme recognition. Preliminary
                   experimental results show comparable recognition
                   performance with hidden Markov models (HMMs). },
  categories = {ml, svm, jaist},
  month = dec,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Shimodaira2001NIPS.pdf},
  year = 2001
}
@inbook{Nakai1997Book,
  author = {Mitsuru Nakai and Harald Singer and Yoshinori Sagisaka
                   and Hiroshi Shimodaira},
  title = {{Accent Phrase Segmentation by F0 Clustering Using
                   Superpositional Modeling}},
  pages = {343--360},
  booktitle = {Computing Prosody, Chapter 22},
  categories = {f0, atr, jaist},
  month = jan,
  year = 1997
}
@misc{Hofer_Shimodaira:sigg:2007,
  author = {Gregor Hofer and Hiroshi Shimodaira and Junichi
                   Yamagishi},
  title = {Speech-driven Head Motion Synthesis based on a
                   Trajectory Model},
  howpublished = {Poster at Siggraph 2007},
  address = {San Diego, USA},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2007/siggraph07.pdf},
  year = 2007
}
@inproceedings{Matsushita2002HIS03,
  author = {Yoshinori Matsushita and Shinnichi Kawamoto and
                   Mitsuru Nakai and Hiroshi Shimodaira and Shigeki
                   Sagayama},
  title = {{A Head-Behavior Synchronization Model with Utterance
                   for Anthropomorphic Spoken-Dialog Agent}},
  booktitle = {Technical Report of IEICE, HIS2001},
  note = {(in Japanese)},
  abstract = { A novel method of synchronously synthesizing the head
                   motion of an anthropomorphic spoken dialog agent with
                   its utterance is proposed. Although much efforts have
                   been taken to synchronize the lip motion with
                   utterance, very few research exist for such head-motion
                   control. A neural network is employed to learn the
                   relationship between the acoustic features of the
                   utterance and the head motion that are measured by a
                   motion-capturing system. The proposed method enables to
                   simulate the facial animation automatically that moves
                   synchronously with any given utterances. Subjective
                   evaluation of the performance of the method is reported
                   as well. },
  categories = {lifelike-agent, jaist},
  journal = {},
  month = mar,
  year = 2002
}
@inproceedings{Keeni1998ICCLSDP,
  author = {Kanad Keeni and Hiroshi Shimodaira and Kenji Nakayama
                   and Kazunori Kotani},
  title = {{On Parameter Initialization of Multi-layer
                   Feed-forward Neural Networks for Pattern Recognition}},
  booktitle = {International Conference on Computational Linguistics,
                   Speech and Document Processing (ICCLSDP-'98), Calcutta,
                   India},
  pages = {D8--12},
  categories = {ann, jaist},
  month = feb,
  year = 1998
}
@inproceedings{Shimodaira1993Eurospeech,
  author = {Hiroshi Shimodaira and Mitsuru Nakai},
  title = {Accent Phrase Segmentation Using Transition
                   Probabilities Between Pitch Pattern Templates},
  booktitle = {Proc. EuroSpeech'93},
  pages = {1767--1770},
  note = {},
  abstract = { This paper proposes a novel method for segmenting
                   continuous speech into accent phrases by using a
                   prosodic feature 'pitch pattern'. The pitch pattern
                   extracted from input speech signals is divided into the
                   accent segments automatically by using the One-Stage DP
                   algorithm, in which reference templates representing
                   various types of accent patterns and connectivity
                   between them are used to find out the optimum sequence
                   of accent segments. In case of making the reference
                   templates from a large number of training data, the LBG
                   clustering algorithm is used to represent typical
                   accent patterns by a small number of templates.
                   Evaluation tests were carried out using the ATR
                   continuous speech database of a male speaker.
                   Experimental results showed more than 91 \% of phrase
                   boundaries were correctly detected. },
  categories = {F0, jaist},
  month = sep,
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1993/euro93.ps.gz},
  year = 1993
}
@inproceedings{Shimodaira1998SPR,
  author = {Hiroshi Shimodaira and Jun Rokui and Mitsuru Nakai},
  title = {{Modified Minimum Classification Error Learning and
                   Its Application to Neural Networks}},
  booktitle = {2nd International Workshop on Statistical Techniques
                   in Pattern Recognition (SPR'98), Sydney, Australia},
  pages = {},
  abstract = { A novel method to improve the generalization
                   performance of the Minimum Classification Error (MCE) /
                   Generalized Probabilistic Descent (GPD) learning is
                   proposed. The MCE/GPD learning proposed by Juang and
                   Katagiri in 1992 results in better recognition
                   performance than the maximum-likelihood (ML) based
                   learning in various areas of pattern recognition.
                   Despite its superiority in recognition performance, as
                   well as other learning algorithms, it still suffers
                   from the problem of ``over-fitting'' to the training
                   samples. In the present study, a regularization
                   technique has been employed to the MCE learning to
                   overcome this problem. Feed-forward neural networks are
                   employed as a recognition platform to evaluate the
                   recognition performance of the proposed method.
                   Recognition experiments are conducted on several sorts
                   of data sets. },
  categories = {mce, ann, jaist},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/spr98.pdf},
  year = 1998
}
@inproceedings{Keeni1997ICPPOL,
  author = {Kanad Keeni and Hiroshi Shimodaira},
  title = {{On Representation of Output Layer for Recognizing
                   Japanese Kana Characters Using Neural Networks}},
  booktitle = {Proc. the `17'th International Conference on Computer
                   Processing of Oriental Languages},
  pages = {305--308},
  note = {Baptist University, Kowloon Tong, Hong Kong},
  categories = {ann, jaist},
  month = apr,
  year = 1997
}
@inproceedings{Shimodaira2002ICASSP,
  author = {Hiroshi Shimodaira and Nobuyoshi Sakai and Mitsuru
                   Nakai and Shigeki Sagayama},
  title = {{Jacobian Joint Adaptation to Noise, Channel and Vocal
                   Tract Length}},
  booktitle = {Proc. of ICASSP2002},
  pages = {197--200},
  abstract = {A new Jacobian approach that linearly decomposes the
                   composite of additive noise, multiplicative noise
                   (channel transfer function) and speaker's vocal tract
                   length, and adapts the acoustic model parameters
                   simultaneously to these factors is proposed in this
                   paper. Due to the fact that these factors non-linearly
                   degrade the observed features for speech recognition,
                   existing approaches fail to adapt the acoustic models
                   adequately. Approximating the nonlinear operation by a
                   linear model enables to employ the least square error
                   estimation of the factors and adapt the acoustic model
                   parameters with small amount of speech samples. Speech
                   recognition experiments on ATR isolated word database
                   demonstrate significant reduction of error rates, which
                   supports the effectiveness of the proposed scheme. },
  categories = {asr, jaist},
  journal = {},
  month = may,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2002/Shimodaira2002ICASSP.pdf},
  year = 2002
}
@article{Nakai1994IEICE02,
  author = {Mitsuru Nakai and Hiroshi Shimodaira and Shigeki
                   Sagayama},
  title = {Prosodic Phrase Segmentation Based on Pitch-Pattern
                   Clustering},
  journal = {Trans. IEICE (A)},
  volume = {J77-A},
  number = {2},
  pages = {206--214},
  note = {(in Japanese)},
  categories = {F0, jaist},
  month = feb,
  year = 1994
}
@inproceedings{Keeni2001SPPRA,
  author = {Kanad Keeni and Kunio Goto and Hiroshi Shimodaira},
  title = {{On Extraction of E-Mail Address from Fax Message for
                   Automatic Delivery to Individual Recipient}},
  booktitle = {IASTED International Conference on Siganl Processing
                   Pattern Recognition and Application},
  categories = {nn, jaist},
  month = jul,
  year = 2001
}
@article{Kawamoto2003Book,
  author = {Shin-ichi Kawamoto and Hiroshi Shimodaira and Shigeki
                   Sagayama and others},
  title = {{Galatea: Open-Source Software for Developing
                   Anthropomorphic Spoken Dialog Agents}},
  journal = {Life-Like Characters. Tools, Affective Functions, and
                   Applications. Helmut Prendinger et al. (Eds.) Springer},
  volume = {},
  number = {},
  pages = {187--212},
  abstract = {Galatea is a software toolkit to develop a human-like
                   spoken dialog agnet. In order to easily integrate the
                   modules of different characteristics including speech
                   recognizer, speech synthesizer, facial-image
                   synthesizer and dialog controller, each module is
                   modeled as a virtual machine having a simple common
                   interface and connected to each other through a broker
                   (communication manager). Galatea employs model-based
                   speech and facial-image synthesizers whose model
                   parameters are adapted easily to those for an existing
                   person if his/her training data is given. The software
                   toolkit that runs on both UNIX/Linux and Windows
                   operating systems will be publicly available in the
                   middle of 2003. },
  categories = {lifelike-agent, jaist},
  month = nov,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2003/Kawamoto2003Book.pdf},
  year = 2003
}
@inproceedings{Nakai1998ICSLP,
  author = {Mitsuru Nakai and Hiroshi Shimodaira},
  title = {{The Use of F0 Reliability Function for Prosodic
                   Command Analysis on F0 Contour Generation Model}},
  booktitle = {Proc. ICSLP'98},
  categories = {asr, atr, jaist},
  month = dec,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Nakai1998ICSLP.pdf},
  year = 1998
}
@inproceedings{Hofer_Shimodaira:proc:2007,
  author = {Gregor Hofer and Hiroshi Shimodaira},
  title = {Automatic Head Motion Prediction from Speech Data},
  booktitle = {Proc. Interspeech 2007},
  address = {Antwerp, Belgium},
  abstract = {In this paper we present a novel approach to generate
                   a sequence of head motion units given some speech. The
                   modelling approach is based on the notion that head
                   motion can be divided into a number of short
                   homogeneous units that can each be modelled
                   individually. The system is based on Hidden Markov
                   Models (HMM), which are trained on motion units and act
                   as a sequence generator. They can be evaluated by an
                   accuracy measure. A database of motion capture data was
                   collected and manually annotated for head motion and is
                   used to train the models. It was found that the model
                   is good at distinguishing high activity regions from
                   regions with less activity with accuracies around 75
                   percent. Furthermore the model is able to distinguish
                   different head motion patterns based on speech features
                   somewhat reliably, with accuracies reaching almost 70
                   percent.},
  month = aug,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2007/interspeech07.pdf},
  year = 2007
}
@misc{Hofer_Shimodaira:sca:2007,
  author = {Gregor Hofer and Hiroshi Shimodaira and Junichi
                   Yamagishi},
  title = {Lip motion synthesis using a context dependent
                   trajectory hidden {M}arkov model},
  howpublished = {Poster at SCA 2007},
  address = {San Diego, USA},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2007/sca07.pdf},
  year = 2007
}
@inproceedings{Nakai1994ICSLP,
  author = {Mitsuru Nakai and Hiroshi Shimodaira},
  title = {{Accent Phrase Segmentation by Finding N-best
                   Sequences of Pitch Pattern Templates}},
  booktitle = {Proc. ICSLP94, 8.10},
  pages = {347--350},
  categories = {F0, jaist},
  journal = {},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1994/Nakai1994ICSLP.pdf},
  year = 1994
}
@inproceedings{Nakai2001ICDAR,
  author = {Mitsuru Nakai and Naoto Akira and Hiroshi Shimodaira
                   and Shigeki Sagayama},
  title = {{Substroke Approach to {HMM}-based On-line Kanji
                   Handwriting Recognition}},
  booktitle = {Proc. of ICDAR'01},
  pages = {491--495},
  abstract = { A new method is proposed for on-line handwriting
                   recognition of Kanji characters. The method employs
                   substroke HMMs as minimum units to constitute Japanese
                   Kanji characters and utilizes the direction of pen
                   motion. The main motivation is to fully utilize the
                   continuous speech recognition algorithm by relating
                   sentence speech to Kanji character, phonemes to
                   substrokes, and grammar to Kanji structure. The
                   proposed system consists input feature analysis,
                   substroke HMMs, a character structure dictionary and a
                   decoder. The present approach has the following
                   advantages over the conventional methods that employ
                   whole character HMMs. 1) Much smaller memory
                   requirement for dictionary and models. 2) Fast
                   recognition by employing efficient substroke network
                   search. 3) Capability of recognizing characters not
                   included in the training data if defined as a sequence
                   of substrokes in the dictionary. 4) Capability of
                   recognizing characters written by various different
                   stroke orders with multiple definitions per one
                   character in the dictionary. 5) Easiness in HMM
                   adaptation to the user with a few sample character
                   data. },
  categories = {hwr, jaist},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Nakai2001ICDAR.pdf},
  year = 2001
}
@article{Kanno1997IEICE01,
  author = {Sukeyasu Kanno and Hiroshi Shimodaira},
  title = {{Voiced Sound Detection under Nonstationary and Heavy
                   Noisy Environment Using the Prediction Error of
                   Low-Frequency Spectrum}},
  journal = {Trans. IEICE(D-II)},
  volume = {J80-D-II},
  number = {1},
  pages = {26--35},
  note = {(in Japanese)},
  categories = {asr, jaist},
  month = jan,
  year = 1997
}
@inproceedings{Keeni1998ICONIP,
  author = {Kanad Keeni and Kenji Nakayama and Hiroshi Shimodaira},
  title = {{Automatic Generation of Initial Weights and Target
                   Outputs of Multi-layer Neural Networks and its
                   Application to Pattern Classification}},
  booktitle = {International Conference on Neural Information
                   Processing (ICONIP'98)},
  pages = {1622--1625},
  categories = {ann, jaist},
  journal = {},
  month = oct,
  year = 1998
}
@inproceedings{Rokui1999ICANN09,
  author = {Jun Rokui and Hiroshi Shimodaira},
  title = {{Multistage Building Learning based on
                   Misclassification Measure}},
  booktitle = {9-th International Conference on Artificial Neural
                   Networks, Edinburgh, UK},
  categories = {nn, mce, jaist},
  journal = {},
  month = sep,
  year = 1999
}
@inproceedings{cuayahuitletal_interspeech07,
  author = {Heriberto Cuayáhuitl and Steve Renals and Oliver
                   Lemon and Hiroshi Shimodaira},
  title = {Hierarchical Dialogue Optimization Using Semi-Markov
                   Decision Processes},
  booktitle = {Proc. of INTERSPEECH},
  abstract = {This paper addresses the problem of dialogue
                   optimization on large search spaces. For such a
                   purpose, in this paper we propose to learn dialogue
                   strategies using multiple Semi-Markov Decision
                   Processes and hierarchical reinforcement learning. This
                   approach factorizes state variables and actions in
                   order to learn a hierarchy of policies. Our experiments
                   are based on a simulated flight booking dialogue system
                   and compare flat versus hierarchical reinforcement
                   learning. Experimental results show that the proposed
                   approach produced a dramatic search space reduction
                   (99.36\%), and converged four orders of magnitude
                   faster than flat reinforcement learning with a very
                   small loss in optimality (on average 0.3 system turns).
                   Results also report that the learnt policies
                   outperformed a hand-crafted one under three different
                   conditions of ASR confidence levels. This approach is
                   appealing to dialogue optimization due to faster
                   learning, reusable subsolutions, and scalability to
                   larger problems.},
  categories = {Spoken dialogue systems, semi-Markov decision
                   processes, hierarchical reinforcement learning.},
  month = aug,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2007/SMDPs-interspeech2007.pdf},
  year = 2007
}
@inproceedings{Nakai2002ICPR,
  author = {Mitsuru Nakai and Takashi Sudo and Hiroshi Shimodaira
                   and Shigeki Sagayama},
  title = {{Pen Pressure Features for Writer-Independent On-Line
                   Handwriting Recognition Based on Substroke {HMM}}},
  booktitle = {Proc. of ICPR2002, III},
  pages = {220--223},
  categories = {hwr, jaist},
  journal = {},
  month = aug,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2002/Nakai2002ICPR.pdf},
  year = 2002
}
@inproceedings{Keeni1999ICCIMA,
  author = {Kanad Keeni and Kenji Nakayama and Hiroshi Shimodaira},
  title = {{A Training Scheme for Pattern Classification Using
                   Multi-layer Feed-forward Neural Networks}},
  booktitle = {IEEE International Conference on Computational
                   Intelligence and Multimedia Applications},
  pages = {307--311},
  categories = {ann, jaist},
  journal = {},
  month = sep,
  year = 1999
}
@inproceedings{lips08-gregpr,
  author = {Gregor Hofer and Junichi Yamagishi and Hiroshi
                   Shimodaira},
  title = {Speech-driven Lip Motion Generation with a Trajectory
                   {HMM}},
  booktitle = {Proc. Interspeech 2008},
  pages = {2314--2317},
  address = {Brisbane, Australia},
  abstract = {Automatic speech animation remains a challenging
                   problem that can be described as finding the optimal
                   sequence of animation parameter configurations given
                   some speech. In this paper we present a novel technique
                   to automatically synthesise lip motion trajectories
                   from a speech signal. The developed system predicts lip
                   motion units from the speech signal and generates
                   animation trajectories automatically employing a
                   ’¡ÉTrajectory Hidden Markov Model’¡É. Using the MLE
                   criterion, its parameter generation algorithm produces
                   the optimal smooth motion trajectories that are used to
                   drive control points on the lips directly.
                   Additionally, experiments were carried out to find a
                   suitable model unit that produces the most accurate
                   results. Finally a perceptual evaluation was conducted,
                   that showed that the developed motion units perform
                   better than phonemes.},
  categories = {visual speech synthesis, trajectory HMM, HTS},
  key = {lips08-gregpr},
  month = sep,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2008/GregorLipsChallenge08.pdf},
  year = 2008
}
@inproceedings{Matsuda2000ICASSP,
  author = {Shigeki Matsuda and Mitsuru Nakai and Hiroshi
                   Shimodaira and Shigeki Sagayama},
  title = {{Asynchronous-Transition {HMM}}},
  booktitle = {Proc. ICASSP 2000 (Istanbul, Turkey), Vol. II},
  pages = {1001--1004},
  abstract = { We propose a new class of hidden Markov model (HMM)
                   called asynchronous-transition HMM (AT-HMM). Opposed to
                   conventional HMMs where hidden state transition occurs
                   simultaneously to all features, the new class of HMM
                   allows state transitions asynchronous between
                   individual features to better model asynchronous
                   timings of acoustic feature changes. In this paper, we
                   focus on a particular class of AT-HMM with sequential
                   constraints introducing a concept of ``state tying
                   across time''. To maximize the advantage of the new
                   model, we also introduce feature-wise state tying
                   technique. Speaker-dependent speech recognition
                   experiments demonstrated that reduced error rates more
                   than 30\% and 50\% in phoneme and isolated word
                   recognition, respectively, compared with conventional
                   HMMs. },
  categories = {asr, atr, jaist},
  month = jun,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Matsuda2000ICASSP.pdf},
  year = 2000
}
@inproceedings{Nakai2003ICDAR,
  author = {Mitsuru Nakai and Hiroshi Shimodaira and Shigeki
                   Sagayama},
  title = {{Generation of Hierarchical Dictionary for
                   Stroke-order Free Kanji Handwriting Recognition Based
                   on Substroke {HMM}}},
  booktitle = {Proc. of ICDAR2003},
  pages = {514--518},
  abstract = {This paper describes a method of generating a
                   Kanjihierarchical structured dictionary for
                   stroke-number and stroke-order free handwriting
                   recognition based on sub-stroke HMM. In stroke-based
                   methods, a large number of stroke-order variations can
                   be easily expressed by justadding different stroke
                   sequences to the dictionary and itis not necessary to
                   train new reference patterns. The hierarchical
                   structured dictionary has an advantage that thousands
                   of stroke-order variations of Kanji characters can be
                   produced using a small number of stroke-order rules
                   defin-ing Kanji parts. Moreover, the recognition speed
                   is fast since common sequences are shared in a
                   substroke network, even if the total number of
                   stroke-order combinations becomes enormous practically.
                   In experiments, 300 differentstroke-order rules of
                   Kanji parts were statistical ly chosen by using 60
                   writers' handwritings of 1,016 educational
                   Kanjicharacters. By adding these new stroke-order rules
                   to the dictionary, about 9,000 variations of different
                   stroke-orderswere generated for 2,965 JIS 1st level
                   Kanji characters. As a result, we successfully improved
                   the recognition accuracyfrom 82.6\% to 90.2\% for
                   stroke-order free handwritings.},
  categories = {HWR, jaist},
  journal = {},
  month = aug,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2003/Nakai2003ICDAR.pdf},
  year = 2003
}
@inproceedings{Shimodaira1992ICSLP,
  author = {Hiroshi Shimodaira and Mitsuru Nakai},
  title = {Robust Pitch Detection by Narrow Band Spectrum
                   Analysis},
  booktitle = {Proc. ICSLP-92},
  pages = {1597--1600},
  abstract = {This paper proposes a new technique for detecting
                   pitch patterns which is useful for automatic speech
                   recognition, by using a narrow band spectrum analysis.
                   The motivation of this approach is that humans perceive
                   some kind of pitch in whispers where no fundamental
                   frequencies can be observed, while most of the pitch
                   determination algorithm (PDA) fails to detect such
                   perceptual pitch. The narrow band spectrum analysis
                   enable us to find pitch structure distributed locally
                   in frequency domain. Incorporating this technique into
                   PDA's is realized to applying the technique to the lag
                   window based PDA. Experimental results show that pitch
                   detection performance could be improved by 4\% for
                   voiced sounds and 8\% for voiceless sounds.},
  categories = {F0, jaist},
  month = oct,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1992/icslp92.pdf},
  year = 1992
}
@article{10.1109/MCG.2011.71,
  author = {Michael A. Berger and Gregor Hofer and Hiroshi
                   Shimodaira},
  title = {Carnival -- Combining Speech Technology and Computer
                   Animation},
  journal = {IEEE Computer Graphics and Applications},
  volume = {31},
  pages = {80-89},
  address = {Los Alamitos, CA, USA},
  doi = {10.1109/MCG.2011.71},
  issn = {0272-1716},
  publisher = {IEEE Computer Society},
  year = 2011
}
@inproceedings{cuayahuitletal_asru05,
  author = {Heriberto Cuayáhuitl and Steve Renals and Oliver
                   Lemon and Hiroshi Shimodaira},
  title = {Human-Computer Dialogue Simulation Using Hidden Markov
                   Models},
  booktitle = {Proc. of IEEE Workshop on Automatic Speech Recognition
                   and Understanding (ASRU)},
  abstract = {This paper presents a probabilistic method to simulate
                   task-oriented human-computer dialogues at the intention
                   level, that may be used to improve or to evaluate the
                   performance of spoken dialogue systems. Our method uses
                   a network of Hidden Markov Models (HMMs) to predict
                   system and user intentions, where a ``language model''
                   predicts sequences of goals and the component HMMs
                   predict sequences of intentions. We compare standard
                   HMMs, Input HMMs and Input-Output HMMs in an effort to
                   better predict sequences of intentions. In addition, we
                   propose a dialogue similarity measure to evaluate the
                   realism of the simulated dialogues. We performed
                   experiments using the DARPA Communicator corpora and
                   report results with three different metrics: dialogue
                   length, dialogue similarity and precision-recall.},
  categories = {dialogue simulation, hidden markov models},
  month = nov,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2005/hcp-asru2005.pdf},
  year = 2005
}
@inproceedings{Sagayama2001ISCA08b,
  author = {Shigeki Sagayama and Koichi Shinoda and Mitsuru Nakai
                   and Hiroshi Shimodaira},
  title = {{Analytic Methods for Acoustic Model Adaptation: A
                   Review}},
  booktitle = {Proc. ISCA Workshop on Adaptation Methods (Sophia
                   Antipolis France)},
  pages = {67--76},
  note = {Invited Paper},
  categories = {asr, jaist},
  journal = {},
  month = aug,
  year = 2001
}
@inproceedings{Shimodaira:kes06,
  author = {Chie Shimodaira and Hiroshi Shimodaira and Susumu
                   Kunifuji},
  title = {{A Divergent-Style Learning Support Tool for English
                   Learners Using a Thesaurus Diagram}},
  booktitle = {{Proc. KES2006}},
  address = {Bournemouth, United Kingdom},
  abstract = { This paper proposes an English learning support tool
                   which provides users with divergent information to find
                   the right words and expressions. In contrast to a
                   number of software tools for English translation and
                   composition, the proposed tool is designed to give
                   users not only the right answer to the user's query but
                   also a lot of words and examples which are relevant to
                   the query. Based on the lexical information provided by
                   the lexical database, WordNet, the proposed tool
                   provides users with a thesaurus diagram, in which
                   synonym sets and relation links are presented in
                   multiple windows to help users to choose adequate words
                   and understand similarities and differences between
                   words. Subjective experiments are carried out to
                   evaluate the system. },
  categories = {knowledge engineering},
  month = oct,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/kes2006.pdf},
  year = 2006
}
@inproceedings{Keeni2003ICONIP,
  author = {Kanad Keeni and Kunio Goto and Hiroshi Shimodaira},
  title = {{Automatic Filtering of Network IntrusionDetection
                   System Alarms Using Multi-layer Feed-forward Neural
                   Networks}},
  booktitle = {International Conference on Neural Information
                   Processing (ICONIP2003)},
  pages = {},
  categories = {ann},
  journal = {},
  month = jun,
  year = 2003
}
@article{Tokuno2005IEICE01,
  author = {Junko Tokuno and Nobuhito Inami and Mitsuru Nakai and
                   Hiroshi Shimodaira and Shigeki Sagayama},
  title = {{Context-dependent Sub-stroke Model for {HMM}-based
                   On-line Handwriting Recognition}},
  journal = {Trans. IEICE D-II},
  volume = {J88-D2},
  number = {8},
  note = {(in press), (in Japanese)},
  abstract = { A new method is proposed for on-line Kanji
                   handwriting recognition. The method employs sub-stroke
                   HMMs as minimum units to constitute Kanji characters
                   and utilizes the direction of pen motion. The present
                   approach has the following advantages over the
                   conventional methods that employ character HMMs. 1)
                   Much smaller memory requirement for dictionary and
                   models. 2) Fast recognition by employing efficient
                   sub-stroke network search. 3) Capability of recognizing
                   characters not included in the training data if defined
                   as a sequence of sub-strokes in the dictionary. In
                   experiments, we have achieved a correct recognition
                   rate of above 96\% by using JAIST-IIPL database that
                   includes 1,016 educational Kanji characters. },
  categories = {online handwritten character recognition},
  month = aug,
  year = 2005
}
@article{Nakai1997IEICE,
  author = {Mitsuru Nakai and Harald Singer and Yoshimori Sagisaka
                   and Hiroshi Shimodaira},
  title = {{Accent Phrase Segmentation Based on F0 Templates
                   Using a Superpositional Prosodic Model}},
  journal = {Trans. IEICE (D-II)},
  volume = {J80-D-II},
  number = {10},
  pages = {2605--2614},
  note = {(in Japanese)},
  categories = {jaist},
  month = oct,
  year = 1997
}
@inproceedings{Keeni2002AIA,
  author = {Kanad Keeni and Hiroshi Shimodaira},
  title = {{On Selection of Training Data for Fast Learning of
                   Neural Networks Using Back Propagation}},
  booktitle = {IASTED International Conference on Artificial
                   Intelligence and Application (AIA2002)},
  pages = {474--478},
  journal = {},
  month = sep,
  year = 2002
}
@inproceedings{Rokui1998ICONIP,
  author = {Jun Rokui and Hiroshi Shimodaira},
  title = {{Modified Minimum Classification Error Learning and
                   Its Application to Neural Networks}},
  booktitle = {ICONIP'98, Kitakyushu, Japan},
  categories = {ann, mce, jaist},
  month = oct,
  year = 1998
}
@inproceedings{Iida1998IIZUKA,
  author = {Eiji Iida and Hiroshi Shimodaira and Susumu Kunifuji
                   and Masayuki Kimura},
  title = {{A system to Perform Human Problem Solving}},
  booktitle = {The 5th International Conference on Soft Computing and
                   Information / Intelligent Systems (IIZUKA'98)},
  pages = {},
  categories = {jaist},
  month = oct,
  year = 1998
}
@inproceedings{Shimodaira2000ICSLP10,
  author = {Hiroshi Shimodaira and Toshihiko Akae and Mitsuru
                   Nakai and Shigeki Sagayama},
  title = {{Jacobian Adaptation of {HMM} with Initial Model
                   Selection for Noisy Speech Recognition}},
  booktitle = {Proc. ICSLP2000},
  pages = {1003--1006},
  abstract = { An extension of Jacobian Adaptation (JA) of HMMs for
                   degraded speech recognition is presented in which
                   appropriate set of initial models is selected from a
                   number of initial-model sets designed for different
                   noise environments. Based on the first order Taylor
                   series approximation in the acoustic feature domain, JA
                   adapts the acoustic model parameters trained in the
                   initial noise environment A to the new environment B
                   much faster than PMC that creates the acoustic models
                   for the target environment from scratch. Despite the
                   advantage of JA to PMC, JA has a theoretical limitation
                   that the change of acoustic parameters from the
                   environment A to B should be small in order that the
                   linear approximation holds. To extend the coverage of
                   JA, the ideas of multiple sets of initial models and
                   their automatic selection scheme are discussed.
                   Speaker-dependent isolated-word recognition experiments
                   are carried out to evaluate the proposed method. },
  categories = {asr, jaist},
  month = oct,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Shimodaira2000ICSLP10.pdf},
  year = 2000
}
@inproceedings{Koba1995HCIa,
  author = { and Hiroshi Shimodaira},
  title = {{HI Design Based on the Costs of Human
                   Information-processing Model}},
  booktitle = {HIC international'95},
  pages = {},
  categories = {HI, jaist},
  month = jul,
  year = 1995
}
@inproceedings{Tokuno2003HCII,
  author = {Tokuno Junko and Naoto Akira and Mitsuru Nakai and
                   Hiroshi Shimodaira and Shigeki Sagayama},
  title = {{Blind-handwriting Interface for Wearable Computing}},
  booktitle = {Proc. of Human - Computer Interaction (HCI)
                   International 2003, Volume 2},
  pages = {303--307},
  note = {},
  abstract = {This paper proposes a novel input interface that we
                   call "blind handwriting" for wearable computing. The
                   blind handwriting, which is a word similar to "blind
                   typing" of keyboard, is a particular writing style
                   where the user does not see the pen or the finger
                   movement. Without visual feedback, written characters
                   are distorted, as in the case when the user is
                   blindfolded, and therefore existing on-line handwriting
                   recognition systems fail to recognize them correctly.
                   The sub-stroke based hidden Markov model approach is
                   employed to tackle this problem. When the pen or touch
                   pad is used as an input device, the proposed interface
                   demonstrates a recognition rate of 83\% on a test set
                   of 61 people where each person wrote 1016 Japanese
                   Kanji characters. },
  categories = {HWR, jaist},
  journal = {},
  month = jun,
  year = 2003
}
@inproceedings{Kawamoto2002PRICAI,
  author = {Shin-ichi Kawamoto and Hiroshi Shimodaira and Tsuneo
                   Nitta and Takuya Nishimoto and Satoshi Nakamura and
                   Katsunobu Itou and Shigeo Morishima and Tatsuo
                   Yotsukura and Atsuhiko Kai and Akinobu Lee and Yoichi
                   Yamashita and Takao Kobayashi and Keiichi Tokuda and
                   Keikichi Hirose and Nobuaki Minematsu and Atsushi
                   Yamada and Yasuharu Den and Takehito Utsuro and Shigeki
                   Sagayama},
  title = {{Open-source software for developing anthropomorphic
                   spoken dialog agent}},
  booktitle = {Proc. of PRICAI-02, International Workshop on Lifelike
                   Animated Agents},
  pages = {64--69},
  categories = {lifelike-agent, jaist},
  month = aug,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2002/Kawamoto2002PRICAI.pdf},
  year = 2002
}
@inproceedings{Shimodaira:iwfhr06,
  author = {Junko Tokuno and Mitsuru Nakai and Hiroshi Shimodaira
                   and Shigeki Sagayama and Masaki Nakagawa},
  title = {{On-line Handwritten Character Recognition Selectively
                   employing Hierarchical Spatial Relationships among
                   Subpatterns}},
  booktitle = {{Proc. IWFHR-10}},
  address = {La Baule, France},
  abstract = { This paper proposes an on-line handwritten character
                   pattern recognition method that examines spatial
                   relationships among subpatterns which are components of
                   a character pattern. Conventional methods evaluating
                   spatial relationships among subpatterns have not
                   considered characteristics of deformed handwritings and
                   evaluate all the spatial relationships equally.
                   However, the deformations of spatial features are
                   different within a character pattern. In our approach,
                   we assume that the distortions of spatial features are
                   dependent on the hierarchy of character patterns so
                   that we selectively evaluate hierarchical spatial
                   relationships of subpatterns by employing Bayesian
                   network as a post-processor of our sub-stroke based HMM
                   recognition system. Experiments of on-line handwritten
                   Kanji character recognition with a lexicon of 1,016
                   elementary characters revealed that the approach we
                   propose improves the recognition accuracy for different
                   types of deformations. },
  categories = {online handwriting recognition},
  month = oct,
  year = 2006
}
@inproceedings{Takeda2002MMSP,
  author = {Haruto Takeda and Naoki Saito and Tomoshi Otsuki and
                   Mitsuru Nakai and Hiroshi Shimodaira and Shigeki
                   Sagayama},
  title = {{Hidden Markov Model for AUtomatic Transcription of
                   MIDI Signals}},
  booktitle = {2002 International Workshop on Multimedia Signal
                   Processing},
  pages = {},
  journal = {},
  month = dec,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2002/Takeda2002MMSP12.pdf},
  year = 2002
}