Publications by Hiroshi Shimodaira
hshimoda.bib
@article{Kawamoto2002IPSJ07,
author = {Shin-ichi Kawamoto and Hiroshi Shimodaira and others},
title = {{Design of Software Toolkit for Anthromorphic Spoken
Dialog Agent Software with Customization-oriented
Features}},
journal = {Information Processing Society of Japan (IPSJ) Journal},
volume = {43},
number = {7},
pages = {2249--2263},
note = {(in Japanese)},
month = jul,
year = 2002
}
@inproceedings{cuayahuitletal_interspeech06,
author = {Heriberto Cuayáhuitl and Steve Renals and Oliver
Lemon and Hiroshi Shimodaira},
title = {Learning Multi-Goal Dialogue Strategies Using
Reinforcement Learning With Reduced State-Action Spaces},
booktitle = {Proc. of INTERSPEECH},
abstract = {Learning dialogue strategies using the reinforcement
learning framework is problematic due to its expensive
computational cost. In this paper we propose an
algorithm that reduces a state-action space to one
which includes only valid state-actions. We performed
experiments on full and reduced spaces using three
systems (with 5, 9 and 20 slots) in the travel domain
using a simulated environment. The task was to learn
multi-goal dialogue strategies optimizing single and
multiple confirmations. Average results using
strategies learnt on reduced spaces reveal the
following benefits against full spaces: 1) less
computer memory (94\% reduction), 2) faster learning
(93\% faster convergence) and better performance (8.4\%
less time steps and 7.7\% higher reward).},
categories = {reinforcement learning, spoken dialogue systems},
month = sep,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/rss-icslp2006.pdf},
year = 2006
}
@article{Ho2003Applied,
author = {Tu Bao Ho and Trong Dung Nguyen and Hiroshi Shimodaira
and Masayuki Kimura},
title = {{A Knowledge Discovery System with Support for Model
Selection and Visualization}},
journal = {Applied Intelligence},
volume = {19},
number = {},
pages = {125--141},
categories = {KDD},
year = 2003
}
@article{Keeni1996IEICE,
author = {Kanad Keeni and Hiroshi Shimodaira and Tetsuro Nishino
and Yasuo Tan},
title = {{Recognition of Devanagari Characters Using Neural
Networks}},
journal = {IEICE},
volume = {E79-D},
number = {5},
pages = {523--528},
categories = {character-recognition, ann, jaist},
month = may,
year = 1996
}
@misc{Carnival_SIGGRAPH_2010,
author = {Michael Berger and Gregor Hofer and Hiroshi Shimodaira},
title = {Carnival: a modular framework for automated facial
animation},
howpublished = {Poster at SIGGRAPH 2010},
note = {Bronze award winner, ACM Student Research Competition},
abtract = {We present a software framework for speech- or
text-driven animation--including a platform-independent
API and an application implementing it--which unifies
state-of-the-art speech technology and graphics
technology within a single system.},
address = {Los Angeles, Calif., USA},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2010/carnival.pdf},
year = 2010
}
@inproceedings{Sagayama2001ISCA08a,
author = {Shigeki Sagayama and Yutaka Kato and Mitsuru Nakai and
Hiroshi Shimodaira},
title = {{Jacobian Approach to Joint Adaptation to Noise,
Channel and Vocal Tract Length}},
booktitle = {Proc. ISCA Workshop on Adaptation Methods (Sophia
Antipolis, France)},
pages = {117--120},
categories = {asr, jaist},
month = aug,
year = 2001
}
@inproceedings{Shimodaira1997Eurospeech,
author = {Hiroshi Shimodaira and Mitsuru Nakai and Akihiro
Kumata},
title = {{Restration of Pitch Pattern of Speech Based on a
Pitch Gereration Model}},
booktitle = {Proc. EuroSpeech'97},
pages = {512--524},
abstract = { In this paper a model-based approach for restoring a
continuous fundamental frequency (F0) contour from the
noisy output of an F0 extractor is investigated. In
contrast to the conventional pitch trackers based on
numerical curve-fitting, the proposed method employs a
quantitative pitch generation model, which is often
used for synthesizing F0 contour from prosodic event
commands for estimating continuous F0 pattern. An
inverse filtering technique is introduced for obtaining
the initial candidates of the prosodic commands. In
order to find the optimal command sequence from the
commands efficiently, a beam-search algorithm and an
N-best technique are employed. Preliminary experiments
for a male speaker of the ATR B-set database showed
promising results both in quality of the restored
pattern and estimation of the prosodic events. },
categories = {f0, jaist},
month = sep,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1997/euro97.pdf},
year = 1997
}
@inproceedings{Shimodaira1998ICSLP,
author = {Hiroshi Shimodaira and Jun Rokui and Mitsuru Nakai},
title = {{Improving The Generalization Performance Of The
MCE/GPD Learning}},
booktitle = {ICSLP'98, Australia},
abstract = { A novel method to prevent the over-fitting effect and
improve the generalization performance of the Minimum
Classification Error (MCE) / Generalized Probabilistic
Descent (GPD) learning is proposed. The MCE/GPD method,
which is one of the newest discriminative-learning
approaches proposed by Katagiri and Juang in 1992,
results in better recognition performance in various
areas of pattern recognition than the
maximum-likelihood (ML) based approach where a
posteriori probabilities are estimated. Despite its
superiority in recognition performance, it still
suffers from the problem of over-fitting to the
training samples as it is with other learning
algorithms. In the present study, a regularization
technique is employed to the MCE method to overcome
this problem. Feed-forward neural networks are employed
as a recognition platform to evaluate the recognition
performance of the proposed method. Recognition
experiments are conducted on several sorts of datasets.
The proposed method shows better generalization
performance than the original one. },
categories = {lifelike-agent, jaist},
month = dec,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Shimodaira1998ICSLP.pdf},
year = 1998
}
@inproceedings{Keeni1998ICPR,
author = {Kanad Keeni and Kenji Nakayama and Hiroshi Shimodaira},
title = {{Automatic Generation of Initial Weights and
Estimation of Hidden Units for Pattern Classification
Using Neural Networks}},
booktitle = {14th International Conference on Pattern Recognition
(ICPR'98)},
pages = {1568--1571},
categories = {ann, jaist},
month = aug,
year = 1998
}
@article{cuayahuitl2009,
author = {Cuayáhuitl, Heriberto and Renals, Steve and Lemon,
Oliver and Shimodaira, Hiroshi},
title = {Evaluation of a hierarchical reinforcement learning
spoken dialogue system},
journal = {Computer Speech and Language},
volume = {24},
number = {2},
pages = {395-429},
abstract = {We describe an evaluation of spoken dialogue
strategies designed using hierarchical reinforcement
learning agents. The dialogue strategies were learnt in
a simulated environment and tested in a laboratory
setting with 32 users. These dialogues were used to
evaluate three types of machine dialogue behaviour:
hand-coded, fully-learnt and semi-learnt. These
experiments also served to evaluate the realism of
simulated dialogues using two proposed metrics
contrasted with ‘Precision-Recall’. The learnt
dialogue behaviours used the Semi-Markov Decision
Process (SMDP) model, and we report the first
evaluation of this model in a realistic conversational
environment. Experimental results in the travel
planning domain provide evidence to support the
following claims: (a) hierarchical semi-learnt dialogue
agents are a better alternative (with higher overall
performance) than deterministic or fully-learnt
behaviour; (b) spoken dialogue strategies learnt with
highly coherent user behaviour and conservative
recognition error rates (keyword error rate of 20\%)
can outperform a reasonable hand-coded strategy; and
(c) hierarchical reinforcement learning dialogue agents
are feasible and promising for the (semi) automatic
design of optimized dialogue behaviours in larger-scale
systems.},
doi = {10.1016/j.csl.2009.07.001},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2009/cuayahuitl-csl09.pdf},
year = 2009
}
@inproceedings{Keeni1997ICDAR,
author = {Kanad Keeni and Hiroshi Shimodaira and Kenji Nakayama},
title = {{On Distributed Representation of Output Layer for
Recognizing Japanese Kana Characters Using Neural
Networks}},
booktitle = {Proceedings of the 4'th International Conference on
Document Analysis and Recognition, ICDAR'97},
pages = {600--603},
note = {Ulm, Germany},
categories = {hwr, ann, jaist},
month = jul,
year = 1997
}
@inproceedings{Matsuda2000ICSLP10,
author = {Shigeki Matsuda and Mitsuru Nakai and Hiroshi
Shimodaira and Shigeki Sagayama},
title = {{Feature-dependent Allophone Clustering}},
booktitle = {Proc. ICSLP2000},
pages = {413--416},
abstract = { We propose a novel method for clustering allophones
called Feature-Dependent Allophone Clustering (FD-AC)
that determines feature-dependent HMM topology
automatically. Existing methods for allophone
clustering are based on parameter sharing between the
allophone models that resemble each other in behaviors
of feature vector sequences. However, all the features
of the vector sequences may not necessarily have a
common allophone clustering structures. It is
considered that the vector sequences can be better
modeled by allocating the optimal allophone clustering
structure to each feature. In this paper, we propose
Feature-Dependent Successive State Splitting (FD-SSS)
as an implementation of FD-AC. In speaker-dependent
continuous phoneme recognition experiments, HMMs
created by FD-SSS reduced the error rates by about 10%
compared with the conventional HMMs that have a common
allophone clustering structure for all the features. },
categories = {asr, atr, jaist},
journal = {},
month = oct,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Matsuda2000ICSLP10.pdf},
year = 2000
}
@inproceedings{Tokuno2002IWFHR,
author = {Junko Tokuno and Nobuhito Inami and Shigeki Matsuda
and Mitsuru Nakai and Hiroshi Shimodaira and Shigeki
Sagayama},
title = {{Context-Dependent Substroke Model for {HMM}-based
On-line Handwriting Recognition}},
booktitle = {Proc. of IWFHR-8},
pages = {78--83},
note = {},
abstract = {This paper describes an effective modeling technique
in the on-line recognition for cursive Kanji
handwritings and Hiragana handwritings. Our
conventional recognition system based on substroke HMMs
(hidden Markov models) employs straight-type substrokes
as primary models and has achieved high recognition
rate in the recognition of careful Kanji handwritings.
On the other hand, the recognition rate for cursive
handwritings is comparatively low, since they consist
of mainlycurve-strokes. Therefore, we propose a
technique of using multiple models for each substroke
by considering the substroke context, which is a
preceding substroke and a following substroke. In order
to construct these context-dependent models
efficiently, we use the SSS (Successive State
Splitting) algorithm developed in speech recognition.
Through the experiments, the recognition rate improved
from 88\% to 92\% for cursive Kanji handwritings and
from 90\% to 98\% for Hiragana handwritings.},
journal = {},
month = aug,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2002/Tokuno2002IWFHR.pdf},
year = 2002
}
@inproceedings{Nakai1995ICASSP,
author = {Mitsuru Nakai and Singer Harald and Yoshinori Sagisaka
and Hiroshi Shimodaira},
title = {{Automatic Prosodic Segmentation by F0 Clustering
Using Superpositional Modeling}},
booktitle = {Proc. ICASSP-95, PR08.6},
pages = {624--627},
categories = {F0, atr, jaist},
journal = {},
month = may,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1995/Nakai1995ICASSP.pdf},
year = 1995
}
@inproceedings{Koba1995HCIb,
author = {Hisao Koba and hiroshi Shimodaira and Masayuki Kimura},
title = {{Intelligent Automatic Document Transcription System
for Braille: To Improve Accessibility to Printed Matter
for the Visually Impaired}},
booktitle = {HIC International'95},
month = jul,
year = 1995
}
@inproceedings{Shimodaira:mlmi05,
author = {Hiroshi Shimodaira and Keisuke Uematsu and Shin'ichi
Kawamoto and Gregor Hofer and Mitsuru Nakai},
title = {{Analysis and Synthesis of Head Motion for Lifelike
Conversational Agents}},
booktitle = {Proc. MLMI2005},
categories = {lifelike agents},
month = jul,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2005/mlmi2005.pdf},
year = 2005
}
@inproceedings{Keeni1999IJCNN,
author = {Kanad Keeni and Kenji Nakayama and Hiroshi Shimodaira},
title = {{Estimation of Initial Weights and Hidden Units for
Fast Learning of Multi-layer Neural Networks for
Pattern Classification}},
booktitle = {IEEE International Joint Conference on Neural Networks
(IJCNN'99)},
categories = {ann, jaist},
journal = {},
month = jul,
year = 1999
}
@inproceedings{Shimodaira1994ICASSP,
author = {Hiroshi Shimodaira and Mitsuru Nakai},
title = {Prosodic Phrase Segmentation by Pitch Pattern
Clustering},
booktitle = {Proc. ICASSP-94, 76.5, vol.II},
pages = {185--188},
note = {},
abstract = { This paper proposes a novel method for detecting the
optimal sequence of prosodic phrases from continuous
speech based on data-driven approach. The pitch pattern
of input speech is divided into prosodic segments which
minimized the overall distortion with pitch pattern
templates of accent phrases by using the One Pass
search algorithm. The pitch pattern templates are
designed by clustering a large number of training
samples of accent phrases. On the ATR continuous speech
database uttered by 10 speakers, the rate of correct
segmentation was 91.7 \% maximum for the same sex data
of training and testing, 88.6 \% for the opposite sex. },
categories = {F0, jaist},
month = mar,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1994/Shimodaira1994ICASSP.pdf},
year = 1994
}
@article{Iida1998IEICE06,
author = {Eiji Iida and Susumu Kunifuji and Hiroshi Shimodaira
and Masayuki Kimura},
title = {{A Scale-Down Solution of N^2-1 Puzzle}},
journal = {Trans. IEICE(D-I)},
volume = {J81-D-I},
number = {6},
pages = {604--614},
note = {(in Japanese)},
categories = {puzzle, jaist},
month = jun,
year = 1998
}
@article{Rokui2002IPSJ07,
author = {Jun Rokui and Mitsuru Nakai and Hiroshi Shimodaira and
Shigeki Sagayama},
title = {{Speaker Normalization Using Linear Transformation of
Vocal Tract Length Based on Maximum Likelihood
Estimation}},
journal = {Information Processing Society of Japan (IPSJ)},
volume = {43},
number = {7},
pages = {2030--2037},
note = {(in Japanese)},
abstract = { },
categories = {asr, jaist},
month = jul,
year = 2002
}
@inproceedings{taylor:shimodaira:isard:king:kowtko:icslp1996,
author = {Paul A. Taylor and Hiroshi Shimodaira and Stephen
Isard and Simon King and Jacqueline Kowtko},
title = {Using Prosodic Information to Constrain Language
Models for Spoken dialogue},
booktitle = {Proc. {ICSLP} `96},
address = {Philadelphia},
abstract = {We present work intended to improve speech recognition
performance for computer dialogue by taking into
account the way that dialogue context and intonational
tune interact to limit the possibilities for what an
utterance might be. We report here on the extra
constraint achieved in a bigram language model
expressed in terms of entropy by using separate
submodels for different sorts of dialogue acts and
trying to predict which submodel to apply by analysis
of the intonation of the sentence being recognised.},
categories = {asr, intonation, dialogue, lm,id4s},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1996/Taylor_1996_a.pdf},
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1996/Taylor_1996_a.ps},
year = 1996
}
@article{Otsuki2002IPSJ,
author = {Tomoshi Otsuki and Naoki Saitou and Mitsuru Nakai and
Hiroshi Shimodaira and Shigeki Sagayama},
title = {{Musical Rhythm Recognition Using Hidden Markov Model}},
journal = {Information Processing Society of Japan (IPSJ) JOURNAL},
volume = {43},
number = {2},
note = {(in Japanese)},
month = feb,
year = 2002
}
@inproceedings{Bao1997-1,
author = {Tu Bao Ho and Nguyen Trong Dung and Hiroshi Shimodaira
and Masayuki Kimura},
title = {{An Interactive-Graphic Environment for Discovering
and Using Conceptual Knowledge}},
booktitle = {7th European-Japanese Conference on Information
Modelling and Knowledge Bases},
pages = {327--343},
categories = {kdd, jaist},
month = may,
year = 1997
}
@article{Matsuda2003IEICE06,
author = {Shigeki Matsuda and Mitsuru Nakai and Hiroshi
Shimodaira and Shigeki Sagayama},
title = {{Speech Recognition Using Asynchronous Transition
{HMM}}},
journal = {IEICE Trans. D-II},
volume = {J86-D-II},
number = {6},
pages = {741--754},
note = {(in Japanese)},
abstract = {We propose asynchronous-transition HMM (AT-HMM) that
is based on asynchronous transition structures among
individual features of acoustic feature vector
sequences. Conventional HMM represents vector sequences
by using a chain of states, each state has vector
distributions of multi-dimensions. Therefore, the
conventional HMM assumes that individual features
change synchronously. However, this assumption seems
over-simplified for modeling the temporal behavior of
acoustic features, since cepstrum and its
time-derivative can not synchronize with each other. In
speaker-dependent continuous phoneme recognition task,
the AT-HMMs reduced errors by 10\% to 40\%. In
speaker-independent task, the performance of the
AT-HMMs was comparable to conventional HMMs.},
categories = {asr, jaist},
month = jun,
year = 2003
}
@article{Nakai2005IEICE01,
author = {Mitsuru Nakai and Shigeki Sagayama and Hiroshi
Shimodaira},
title = {{On-line Handwriting Recognition Based on Sub-stroke
{HMM}}},
journal = {Trans. IEICE D-II},
volume = {J88-D2},
number = {8},
note = {(in press) (in Japanese)},
abstract = { This paper describes context-dependent sub-stroke
HMMs for on-line handwritten character recognition. As
there are so many characters in Japanese, modeling each
character by an HMM leads to an infeasible
character-recognition system requiring huge amount of
memory and enormous computation time. The sub-stroke
HMM approach has overcomed these problems by minimizing
modeling unit. However, one of the drawback of this
approach is that the recognition accuracy deteriorates
for scribbled characters. In this paper, we show that
the context-dependent sub-stroke modeling which depends
on how the sub-stroke connects to the adjacent
substrokes is effective to achieve robust recognition
of low quality characters. },
categories = {online handwritten character recognition},
month = aug,
year = 2005
}
@inproceedings{Fujinaga2001ICASSP,
author = {Katsuhisa Fujinaga and Mitsuru Nakai and Hiroshi
Shimodaira and Shigeki Sagayama},
title = {{Multiple-Regression Hidden Markov Model}},
booktitle = {Proc. ICASSP 2001},
categories = {asr, jaist},
month = may,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Fujinaga2001ICASSP.pdf},
year = 2001
}
@inproceedings{Shimodaira2003ICDAR,
author = {Hiroshi Shimodaira and Takashi Sudo and Mitsuru Nakai
and Shigeki Sagayama},
title = {{On-line Overlaid-Handwriting Recognition Based on
Substroke {HMM}s}},
booktitle = {ICDAR'03},
pages = {1043--1047},
abstract = {This paper proposes a novel handwriting recognition
interface for wearable computing where users write
characters continuously without pauses on a small
single writing box. Since characters are written on the
same writing area, they are overlaid with each other.
Therefore the task is regarded as a special case of the
continuous character recognition problem. In contrast
to the conventional continuous character recognition
problem, location information of strokes does not help
very much in the proposed framework. To tackle the
problem, substroke based hidden Markov models (HMMs)
and a stochastic bigram language model are employed.
Preliminary experiments were carried out on a dataset
of 578 handwriting sequences with a character bigram
consisting of 1,016 Japanese educational Kanji and 71
Hiragana characters. The proposed method demonstrated
promising performance with 69.2\% of handwriting
sequences beeing correctly recognized when different
stroke order was permitted, and the rate was improved
up to 88.0\% when characters were written with fixed
stroke order.},
categories = {HWR, jaist},
journal = {},
month = aug,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2003/Shimodaira2003ICDAR.pdf},
year = 2003
}
@article{Nakai1994IEICE06,
author = {Mitsuru Nakai and Hiroshi Shimodaira and Shigeki
Sagayama},
title = {{Prosodic Phrase Segmentation Based on Pitch-Pattern
Clustering}},
journal = {Electronics and Communications in Japan, Part 3},
volume = {77},
number = {6},
pages = {80--91},
note = {(in Japanese)},
categories = {F0, jaist},
month = jun,
year = 1994
}
@inproceedings{cuayahuitletal_slt06,
author = {Heriberto Cuayáhuitl and Steve Renals and Oliver
Lemon and Hiroshi Shimodaira},
title = {Reinforcement Learning of Dialogue Strategies With
Hierarchical Abstract Machines},
booktitle = {Proc. of IEEE/ACL Workshop on Spoken Language
Technology (SLT)},
abstract = {In this paper we propose partially specified dialogue
strategies for dialogue strategy optimization, where
part of the strategy is specified deterministically and
the rest optimized with Reinforcement Learning (RL). To
do this we apply RL with Hierarchical Abstract Machines
(HAMs). We also propose to build simulated users using
HAMs, incorporating a combination of hierarchical
deterministic and probabilistic behaviour. We performed
experiments using a single-goal flight booking dialogue
system, and compare two dialogue strategies
(deterministic and optimized) using three types of
simulated user (novice, experienced and expert). Our
results show that HAMs are promising for both dialogue
optimization and simulation, and provide evidence that
indeed partially specified dialogue strategies can
outperform deterministic ones (on average 4.7 fewer
system turns) with faster learning than the traditional
RL framework.},
categories = {reinforcement learning, spoken dialogue systems},
month = dec,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/ham-slt2006.pdf},
year = 2006
}
@inproceedings{Nakai1997Eurospeech,
author = {Mitsuru Nakai and Hiroshi Shimodaira},
title = {{On Representation of Fundamental Frequency of Speech
for Prosody Analysis Using Reliability Function}},
booktitle = {Proc. EuroSpeech'97},
pages = {243--246},
categories = {f0, jaist},
month = sep,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1997/Nakai1997Eurospeech.pdf},
year = 1997
}
@inproceedings{dziemianko_interspeech2009,
author = {Michal Dziemianko and Gregor Hofer and Hiroshi
Shimodaira},
title = {{HMM}-Based Automatic Eye-Blink Synthesis from Speech},
booktitle = {Proc. Interspeech},
pages = {1799--1802},
address = {Brighton, UK},
abstract = {In this paper we present a novel technique to
automatically synthesise eye blinking from a speech
signal. Animating the eyes of a talking head is
important as they are a major focus of attention during
interaction. The developed system predicts eye blinks
from the speech signal and generates animation
trajectories automatically employing a ''Trajectory
Hidden Markov Model''. The evaluation of the
recognition performance showed that the timing of
blinking can be predicted from speech with an F-score
value upwards of 52\%, which is well above chance.
Additionally, a preliminary perceptual evaluation was
conducted, that confirmed that adding eye blinking
significantly improves the perception the character.
Finally it showed that the speech synchronised
synthesised blinks outperform random blinking in
naturalness ratings.},
categories = {animation, motion synthesis, time series analysis,
trajectory model},
month = {September},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2009/dziemianko_interspeech2009.pdf},
year = 2009
}
@inproceedings{Keeni2003ICEIS,
author = {Kanad Keeni and Kunio Goto and Hiroshi Shimodaira},
title = {{On fast learning of Multi-layer Feed-forward Neural
Networks Using Back Propagation}},
booktitle = {International Conference on Enterprise and Information
Systems (ICEIS2003)},
pages = {266--271},
abstract = {This study discusses the subject of training data
selection for neural networks using back propagation.
We have made only one assumption that there are no
overlapping of training data belonging to different
classes, in other words the training data is
linearly/semi-linearly separable . Training data is
analyzed and the data that affect the learning process
are selected based on the idea of Critical points. The
proposed method is applied to a classification problem
where the task is to recognize the characters A,C and
B,D. The experimental results show that in case of
batch mode the proposed method takes almost 1/7 of real
and 1/10 of user training time required for
conventional method. On the other hand in case of
online mode the proposed method takes 1/3 of training
epochs, 1/9 of real and 1/20 of user and 1/3 system
time required for the conventional method. The
classification rate of training and testing data are
the same as it is with the conventional method. },
month = apr,
year = 2003
}
@article{Shimodaira2001NIPS,
author = {Hiroshi Shimodaira and Ken-ichi Noma and Mitsuru Nakai
and Shigeki Sagayama},
title = {{Dynamic Time-Alignment Kernel in Support Vector
Machine}},
journal = {Advances in Neural Information Processing Systems 14,
NIPS2001},
volume = {2},
pages = {921--928},
abstract = { A new class of Support Vector Machine (SVM) that is
applicable to sequential-pattern recognition such as
speech recognition is developed by incorporating an
idea of non-linear time alignment into the kernel
function. Since the time-alignment operation of
sequential pattern is embedded in the new kernel
function, standard SVM training and classification
algorithms can be employed without further
modifications. The proposed SVM (DTAK-SVM) is evaluated
in speaker-dependent speech recognition experiments of
hand-segmented phoneme recognition. Preliminary
experimental results show comparable recognition
performance with hidden Markov models (HMMs). },
categories = {ml, svm, jaist},
month = dec,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Shimodaira2001NIPS.pdf},
year = 2001
}
@inbook{Nakai1997Book,
author = {Mitsuru Nakai and Harald Singer and Yoshinori Sagisaka
and Hiroshi Shimodaira},
title = {{Accent Phrase Segmentation by F0 Clustering Using
Superpositional Modeling}},
pages = {343--360},
booktitle = {Computing Prosody, Chapter 22},
categories = {f0, atr, jaist},
month = jan,
year = 1997
}
@misc{Hofer_Shimodaira:sigg:2007,
author = {Gregor Hofer and Hiroshi Shimodaira and Junichi
Yamagishi},
title = {Speech-driven Head Motion Synthesis based on a
Trajectory Model},
howpublished = {Poster at Siggraph 2007},
address = {San Diego, USA},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2007/siggraph07.pdf},
year = 2007
}
@inproceedings{Matsushita2002HIS03,
author = {Yoshinori Matsushita and Shinnichi Kawamoto and
Mitsuru Nakai and Hiroshi Shimodaira and Shigeki
Sagayama},
title = {{A Head-Behavior Synchronization Model with Utterance
for Anthropomorphic Spoken-Dialog Agent}},
booktitle = {Technical Report of IEICE, HIS2001},
note = {(in Japanese)},
abstract = { A novel method of synchronously synthesizing the head
motion of an anthropomorphic spoken dialog agent with
its utterance is proposed. Although much efforts have
been taken to synchronize the lip motion with
utterance, very few research exist for such head-motion
control. A neural network is employed to learn the
relationship between the acoustic features of the
utterance and the head motion that are measured by a
motion-capturing system. The proposed method enables to
simulate the facial animation automatically that moves
synchronously with any given utterances. Subjective
evaluation of the performance of the method is reported
as well. },
categories = {lifelike-agent, jaist},
journal = {},
month = mar,
year = 2002
}
@inproceedings{Keeni1998ICCLSDP,
author = {Kanad Keeni and Hiroshi Shimodaira and Kenji Nakayama
and Kazunori Kotani},
title = {{On Parameter Initialization of Multi-layer
Feed-forward Neural Networks for Pattern Recognition}},
booktitle = {International Conference on Computational Linguistics,
Speech and Document Processing (ICCLSDP-'98), Calcutta,
India},
pages = {D8--12},
categories = {ann, jaist},
month = feb,
year = 1998
}
@inproceedings{Shimodaira1993Eurospeech,
author = {Hiroshi Shimodaira and Mitsuru Nakai},
title = {Accent Phrase Segmentation Using Transition
Probabilities Between Pitch Pattern Templates},
booktitle = {Proc. EuroSpeech'93},
pages = {1767--1770},
note = {},
abstract = { This paper proposes a novel method for segmenting
continuous speech into accent phrases by using a
prosodic feature 'pitch pattern'. The pitch pattern
extracted from input speech signals is divided into the
accent segments automatically by using the One-Stage DP
algorithm, in which reference templates representing
various types of accent patterns and connectivity
between them are used to find out the optimum sequence
of accent segments. In case of making the reference
templates from a large number of training data, the LBG
clustering algorithm is used to represent typical
accent patterns by a small number of templates.
Evaluation tests were carried out using the ATR
continuous speech database of a male speaker.
Experimental results showed more than 91 \% of phrase
boundaries were correctly detected. },
categories = {F0, jaist},
month = sep,
ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1993/euro93.ps.gz},
year = 1993
}
@inproceedings{Shimodaira1998SPR,
author = {Hiroshi Shimodaira and Jun Rokui and Mitsuru Nakai},
title = {{Modified Minimum Classification Error Learning and
Its Application to Neural Networks}},
booktitle = {2nd International Workshop on Statistical Techniques
in Pattern Recognition (SPR'98), Sydney, Australia},
pages = {},
abstract = { A novel method to improve the generalization
performance of the Minimum Classification Error (MCE) /
Generalized Probabilistic Descent (GPD) learning is
proposed. The MCE/GPD learning proposed by Juang and
Katagiri in 1992 results in better recognition
performance than the maximum-likelihood (ML) based
learning in various areas of pattern recognition.
Despite its superiority in recognition performance, as
well as other learning algorithms, it still suffers
from the problem of ``over-fitting'' to the training
samples. In the present study, a regularization
technique has been employed to the MCE learning to
overcome this problem. Feed-forward neural networks are
employed as a recognition platform to evaluate the
recognition performance of the proposed method.
Recognition experiments are conducted on several sorts
of data sets. },
categories = {mce, ann, jaist},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/spr98.pdf},
year = 1998
}
@inproceedings{Keeni1997ICPPOL,
author = {Kanad Keeni and Hiroshi Shimodaira},
title = {{On Representation of Output Layer for Recognizing
Japanese Kana Characters Using Neural Networks}},
booktitle = {Proc. the `17'th International Conference on Computer
Processing of Oriental Languages},
pages = {305--308},
note = {Baptist University, Kowloon Tong, Hong Kong},
categories = {ann, jaist},
month = apr,
year = 1997
}
@inproceedings{Shimodaira2002ICASSP,
author = {Hiroshi Shimodaira and Nobuyoshi Sakai and Mitsuru
Nakai and Shigeki Sagayama},
title = {{Jacobian Joint Adaptation to Noise, Channel and Vocal
Tract Length}},
booktitle = {Proc. of ICASSP2002},
pages = {197--200},
abstract = {A new Jacobian approach that linearly decomposes the
composite of additive noise, multiplicative noise
(channel transfer function) and speaker's vocal tract
length, and adapts the acoustic model parameters
simultaneously to these factors is proposed in this
paper. Due to the fact that these factors non-linearly
degrade the observed features for speech recognition,
existing approaches fail to adapt the acoustic models
adequately. Approximating the nonlinear operation by a
linear model enables to employ the least square error
estimation of the factors and adapt the acoustic model
parameters with small amount of speech samples. Speech
recognition experiments on ATR isolated word database
demonstrate significant reduction of error rates, which
supports the effectiveness of the proposed scheme. },
categories = {asr, jaist},
journal = {},
month = may,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2002/Shimodaira2002ICASSP.pdf},
year = 2002
}
@article{Nakai1994IEICE02,
author = {Mitsuru Nakai and Hiroshi Shimodaira and Shigeki
Sagayama},
title = {Prosodic Phrase Segmentation Based on Pitch-Pattern
Clustering},
journal = {Trans. IEICE (A)},
volume = {J77-A},
number = {2},
pages = {206--214},
note = {(in Japanese)},
categories = {F0, jaist},
month = feb,
year = 1994
}
@inproceedings{Keeni2001SPPRA,
author = {Kanad Keeni and Kunio Goto and Hiroshi Shimodaira},
title = {{On Extraction of E-Mail Address from Fax Message for
Automatic Delivery to Individual Recipient}},
booktitle = {IASTED International Conference on Siganl Processing
Pattern Recognition and Application},
categories = {nn, jaist},
month = jul,
year = 2001
}
@article{Kawamoto2003Book,
author = {Shin-ichi Kawamoto and Hiroshi Shimodaira and Shigeki
Sagayama and others},
title = {{Galatea: Open-Source Software for Developing
Anthropomorphic Spoken Dialog Agents}},
journal = {Life-Like Characters. Tools, Affective Functions, and
Applications. Helmut Prendinger et al. (Eds.) Springer},
volume = {},
number = {},
pages = {187--212},
abstract = {Galatea is a software toolkit to develop a human-like
spoken dialog agnet. In order to easily integrate the
modules of different characteristics including speech
recognizer, speech synthesizer, facial-image
synthesizer and dialog controller, each module is
modeled as a virtual machine having a simple common
interface and connected to each other through a broker
(communication manager). Galatea employs model-based
speech and facial-image synthesizers whose model
parameters are adapted easily to those for an existing
person if his/her training data is given. The software
toolkit that runs on both UNIX/Linux and Windows
operating systems will be publicly available in the
middle of 2003. },
categories = {lifelike-agent, jaist},
month = nov,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2003/Kawamoto2003Book.pdf},
year = 2003
}
@inproceedings{Nakai1998ICSLP,
author = {Mitsuru Nakai and Hiroshi Shimodaira},
title = {{The Use of F0 Reliability Function for Prosodic
Command Analysis on F0 Contour Generation Model}},
booktitle = {Proc. ICSLP'98},
categories = {asr, atr, jaist},
month = dec,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1998/Nakai1998ICSLP.pdf},
year = 1998
}
@inproceedings{Hofer_Shimodaira:proc:2007,
author = {Gregor Hofer and Hiroshi Shimodaira},
title = {Automatic Head Motion Prediction from Speech Data},
booktitle = {Proc. Interspeech 2007},
address = {Antwerp, Belgium},
abstract = {In this paper we present a novel approach to generate
a sequence of head motion units given some speech. The
modelling approach is based on the notion that head
motion can be divided into a number of short
homogeneous units that can each be modelled
individually. The system is based on Hidden Markov
Models (HMM), which are trained on motion units and act
as a sequence generator. They can be evaluated by an
accuracy measure. A database of motion capture data was
collected and manually annotated for head motion and is
used to train the models. It was found that the model
is good at distinguishing high activity regions from
regions with less activity with accuracies around 75
percent. Furthermore the model is able to distinguish
different head motion patterns based on speech features
somewhat reliably, with accuracies reaching almost 70
percent.},
month = aug,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2007/interspeech07.pdf},
year = 2007
}
@misc{Hofer_Shimodaira:sca:2007,
author = {Gregor Hofer and Hiroshi Shimodaira and Junichi
Yamagishi},
title = {Lip motion synthesis using a context dependent
trajectory hidden {M}arkov model},
howpublished = {Poster at SCA 2007},
address = {San Diego, USA},
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2007/sca07.pdf},
year = 2007
}
@inproceedings{Nakai1994ICSLP,
author = {Mitsuru Nakai and Hiroshi Shimodaira},
title = {{Accent Phrase Segmentation by Finding N-best
Sequences of Pitch Pattern Templates}},
booktitle = {Proc. ICSLP94, 8.10},
pages = {347--350},
categories = {F0, jaist},
journal = {},
month = sep,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1994/Nakai1994ICSLP.pdf},
year = 1994
}
@inproceedings{Nakai2001ICDAR,
author = {Mitsuru Nakai and Naoto Akira and Hiroshi Shimodaira
and Shigeki Sagayama},
title = {{Substroke Approach to {HMM}-based On-line Kanji
Handwriting Recognition}},
booktitle = {Proc. of ICDAR'01},
pages = {491--495},
abstract = { A new method is proposed for on-line handwriting
recognition of Kanji characters. The method employs
substroke HMMs as minimum units to constitute Japanese
Kanji characters and utilizes the direction of pen
motion. The main motivation is to fully utilize the
continuous speech recognition algorithm by relating
sentence speech to Kanji character, phonemes to
substrokes, and grammar to Kanji structure. The
proposed system consists input feature analysis,
substroke HMMs, a character structure dictionary and a
decoder. The present approach has the following
advantages over the conventional methods that employ
whole character HMMs. 1) Much smaller memory
requirement for dictionary and models. 2) Fast
recognition by employing efficient substroke network
search. 3) Capability of recognizing characters not
included in the training data if defined as a sequence
of substrokes in the dictionary. 4) Capability of
recognizing characters written by various different
stroke orders with multiple definitions per one
character in the dictionary. 5) Easiness in HMM
adaptation to the user with a few sample character
data. },
categories = {hwr, jaist},
month = sep,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2001/Nakai2001ICDAR.pdf},
year = 2001
}
@article{Kanno1997IEICE01,
author = {Sukeyasu Kanno and Hiroshi Shimodaira},
title = {{Voiced Sound Detection under Nonstationary and Heavy
Noisy Environment Using the Prediction Error of
Low-Frequency Spectrum}},
journal = {Trans. IEICE(D-II)},
volume = {J80-D-II},
number = {1},
pages = {26--35},
note = {(in Japanese)},
categories = {asr, jaist},
month = jan,
year = 1997
}
@inproceedings{Keeni1998ICONIP,
author = {Kanad Keeni and Kenji Nakayama and Hiroshi Shimodaira},
title = {{Automatic Generation of Initial Weights and Target
Outputs of Multi-layer Neural Networks and its
Application to Pattern Classification}},
booktitle = {International Conference on Neural Information
Processing (ICONIP'98)},
pages = {1622--1625},
categories = {ann, jaist},
journal = {},
month = oct,
year = 1998
}
@inproceedings{Rokui1999ICANN09,
author = {Jun Rokui and Hiroshi Shimodaira},
title = {{Multistage Building Learning based on
Misclassification Measure}},
booktitle = {9-th International Conference on Artificial Neural
Networks, Edinburgh, UK},
categories = {nn, mce, jaist},
journal = {},
month = sep,
year = 1999
}
@inproceedings{cuayahuitletal_interspeech07,
author = {Heriberto Cuayáhuitl and Steve Renals and Oliver
Lemon and Hiroshi Shimodaira},
title = {Hierarchical Dialogue Optimization Using Semi-Markov
Decision Processes},
booktitle = {Proc. of INTERSPEECH},
abstract = {This paper addresses the problem of dialogue
optimization on large search spaces. For such a
purpose, in this paper we propose to learn dialogue
strategies using multiple Semi-Markov Decision
Processes and hierarchical reinforcement learning. This
approach factorizes state variables and actions in
order to learn a hierarchy of policies. Our experiments
are based on a simulated flight booking dialogue system
and compare flat versus hierarchical reinforcement
learning. Experimental results show that the proposed
approach produced a dramatic search space reduction
(99.36\%), and converged four orders of magnitude
faster than flat reinforcement learning with a very
small loss in optimality (on average 0.3 system turns).
Results also report that the learnt policies
outperformed a hand-crafted one under three different
conditions of ASR confidence levels. This approach is
appealing to dialogue optimization due to faster
learning, reusable subsolutions, and scalability to
larger problems.},
categories = {Spoken dialogue systems, semi-Markov decision
processes, hierarchical reinforcement learning.},
month = aug,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2007/SMDPs-interspeech2007.pdf},
year = 2007
}
@inproceedings{Nakai2002ICPR,
author = {Mitsuru Nakai and Takashi Sudo and Hiroshi Shimodaira
and Shigeki Sagayama},
title = {{Pen Pressure Features for Writer-Independent On-Line
Handwriting Recognition Based on Substroke {HMM}}},
booktitle = {Proc. of ICPR2002, III},
pages = {220--223},
categories = {hwr, jaist},
journal = {},
month = aug,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2002/Nakai2002ICPR.pdf},
year = 2002
}
@inproceedings{Keeni1999ICCIMA,
author = {Kanad Keeni and Kenji Nakayama and Hiroshi Shimodaira},
title = {{A Training Scheme for Pattern Classification Using
Multi-layer Feed-forward Neural Networks}},
booktitle = {IEEE International Conference on Computational
Intelligence and Multimedia Applications},
pages = {307--311},
categories = {ann, jaist},
journal = {},
month = sep,
year = 1999
}
@inproceedings{lips08-gregpr,
author = {Gregor Hofer and Junichi Yamagishi and Hiroshi
Shimodaira},
title = {Speech-driven Lip Motion Generation with a Trajectory
{HMM}},
booktitle = {Proc. Interspeech 2008},
pages = {2314--2317},
address = {Brisbane, Australia},
abstract = {Automatic speech animation remains a challenging
problem that can be described as finding the optimal
sequence of animation parameter configurations given
some speech. In this paper we present a novel technique
to automatically synthesise lip motion trajectories
from a speech signal. The developed system predicts lip
motion units from the speech signal and generates
animation trajectories automatically employing a
’¡ÉTrajectory Hidden Markov Model’¡É. Using the MLE
criterion, its parameter generation algorithm produces
the optimal smooth motion trajectories that are used to
drive control points on the lips directly.
Additionally, experiments were carried out to find a
suitable model unit that produces the most accurate
results. Finally a perceptual evaluation was conducted,
that showed that the developed motion units perform
better than phonemes.},
categories = {visual speech synthesis, trajectory HMM, HTS},
key = {lips08-gregpr},
month = sep,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2008/GregorLipsChallenge08.pdf},
year = 2008
}
@inproceedings{Matsuda2000ICASSP,
author = {Shigeki Matsuda and Mitsuru Nakai and Hiroshi
Shimodaira and Shigeki Sagayama},
title = {{Asynchronous-Transition {HMM}}},
booktitle = {Proc. ICASSP 2000 (Istanbul, Turkey), Vol. II},
pages = {1001--1004},
abstract = { We propose a new class of hidden Markov model (HMM)
called asynchronous-transition HMM (AT-HMM). Opposed to
conventional HMMs where hidden state transition occurs
simultaneously to all features, the new class of HMM
allows state transitions asynchronous between
individual features to better model asynchronous
timings of acoustic feature changes. In this paper, we
focus on a particular class of AT-HMM with sequential
constraints introducing a concept of ``state tying
across time''. To maximize the advantage of the new
model, we also introduce feature-wise state tying
technique. Speaker-dependent speech recognition
experiments demonstrated that reduced error rates more
than 30\% and 50\% in phoneme and isolated word
recognition, respectively, compared with conventional
HMMs. },
categories = {asr, atr, jaist},
month = jun,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Matsuda2000ICASSP.pdf},
year = 2000
}
@inproceedings{Nakai2003ICDAR,
author = {Mitsuru Nakai and Hiroshi Shimodaira and Shigeki
Sagayama},
title = {{Generation of Hierarchical Dictionary for
Stroke-order Free Kanji Handwriting Recognition Based
on Substroke {HMM}}},
booktitle = {Proc. of ICDAR2003},
pages = {514--518},
abstract = {This paper describes a method of generating a
Kanjihierarchical structured dictionary for
stroke-number and stroke-order free handwriting
recognition based on sub-stroke HMM. In stroke-based
methods, a large number of stroke-order variations can
be easily expressed by justadding different stroke
sequences to the dictionary and itis not necessary to
train new reference patterns. The hierarchical
structured dictionary has an advantage that thousands
of stroke-order variations of Kanji characters can be
produced using a small number of stroke-order rules
defin-ing Kanji parts. Moreover, the recognition speed
is fast since common sequences are shared in a
substroke network, even if the total number of
stroke-order combinations becomes enormous practically.
In experiments, 300 differentstroke-order rules of
Kanji parts were statistical ly chosen by using 60
writers' handwritings of 1,016 educational
Kanjicharacters. By adding these new stroke-order rules
to the dictionary, about 9,000 variations of different
stroke-orderswere generated for 2,965 JIS 1st level
Kanji characters. As a result, we successfully improved
the recognition accuracyfrom 82.6\% to 90.2\% for
stroke-order free handwritings.},
categories = {HWR, jaist},
journal = {},
month = aug,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2003/Nakai2003ICDAR.pdf},
year = 2003
}
@inproceedings{Shimodaira1992ICSLP,
author = {Hiroshi Shimodaira and Mitsuru Nakai},
title = {Robust Pitch Detection by Narrow Band Spectrum
Analysis},
booktitle = {Proc. ICSLP-92},
pages = {1597--1600},
abstract = {This paper proposes a new technique for detecting
pitch patterns which is useful for automatic speech
recognition, by using a narrow band spectrum analysis.
The motivation of this approach is that humans perceive
some kind of pitch in whispers where no fundamental
frequencies can be observed, while most of the pitch
determination algorithm (PDA) fails to detect such
perceptual pitch. The narrow band spectrum analysis
enable us to find pitch structure distributed locally
in frequency domain. Incorporating this technique into
PDA's is realized to applying the technique to the lag
window based PDA. Experimental results show that pitch
detection performance could be improved by 4\% for
voiced sounds and 8\% for voiceless sounds.},
categories = {F0, jaist},
month = oct,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/1992/icslp92.pdf},
year = 1992
}
@article{10.1109/MCG.2011.71,
author = {Michael A. Berger and Gregor Hofer and Hiroshi
Shimodaira},
title = {Carnival -- Combining Speech Technology and Computer
Animation},
journal = {IEEE Computer Graphics and Applications},
volume = {31},
pages = {80-89},
address = {Los Alamitos, CA, USA},
doi = {10.1109/MCG.2011.71},
issn = {0272-1716},
publisher = {IEEE Computer Society},
year = 2011
}
@inproceedings{cuayahuitletal_asru05,
author = {Heriberto Cuayáhuitl and Steve Renals and Oliver
Lemon and Hiroshi Shimodaira},
title = {Human-Computer Dialogue Simulation Using Hidden Markov
Models},
booktitle = {Proc. of IEEE Workshop on Automatic Speech Recognition
and Understanding (ASRU)},
abstract = {This paper presents a probabilistic method to simulate
task-oriented human-computer dialogues at the intention
level, that may be used to improve or to evaluate the
performance of spoken dialogue systems. Our method uses
a network of Hidden Markov Models (HMMs) to predict
system and user intentions, where a ``language model''
predicts sequences of goals and the component HMMs
predict sequences of intentions. We compare standard
HMMs, Input HMMs and Input-Output HMMs in an effort to
better predict sequences of intentions. In addition, we
propose a dialogue similarity measure to evaluate the
realism of the simulated dialogues. We performed
experiments using the DARPA Communicator corpora and
report results with three different metrics: dialogue
length, dialogue similarity and precision-recall.},
categories = {dialogue simulation, hidden markov models},
month = nov,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2005/hcp-asru2005.pdf},
year = 2005
}
@inproceedings{Sagayama2001ISCA08b,
author = {Shigeki Sagayama and Koichi Shinoda and Mitsuru Nakai
and Hiroshi Shimodaira},
title = {{Analytic Methods for Acoustic Model Adaptation: A
Review}},
booktitle = {Proc. ISCA Workshop on Adaptation Methods (Sophia
Antipolis France)},
pages = {67--76},
note = {Invited Paper},
categories = {asr, jaist},
journal = {},
month = aug,
year = 2001
}
@inproceedings{Shimodaira:kes06,
author = {Chie Shimodaira and Hiroshi Shimodaira and Susumu
Kunifuji},
title = {{A Divergent-Style Learning Support Tool for English
Learners Using a Thesaurus Diagram}},
booktitle = {{Proc. KES2006}},
address = {Bournemouth, United Kingdom},
abstract = { This paper proposes an English learning support tool
which provides users with divergent information to find
the right words and expressions. In contrast to a
number of software tools for English translation and
composition, the proposed tool is designed to give
users not only the right answer to the user's query but
also a lot of words and examples which are relevant to
the query. Based on the lexical information provided by
the lexical database, WordNet, the proposed tool
provides users with a thesaurus diagram, in which
synonym sets and relation links are presented in
multiple windows to help users to choose adequate words
and understand similarities and differences between
words. Subjective experiments are carried out to
evaluate the system. },
categories = {knowledge engineering},
month = oct,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2006/kes2006.pdf},
year = 2006
}
@inproceedings{Keeni2003ICONIP,
author = {Kanad Keeni and Kunio Goto and Hiroshi Shimodaira},
title = {{Automatic Filtering of Network IntrusionDetection
System Alarms Using Multi-layer Feed-forward Neural
Networks}},
booktitle = {International Conference on Neural Information
Processing (ICONIP2003)},
pages = {},
categories = {ann},
journal = {},
month = jun,
year = 2003
}
@article{Tokuno2005IEICE01,
author = {Junko Tokuno and Nobuhito Inami and Mitsuru Nakai and
Hiroshi Shimodaira and Shigeki Sagayama},
title = {{Context-dependent Sub-stroke Model for {HMM}-based
On-line Handwriting Recognition}},
journal = {Trans. IEICE D-II},
volume = {J88-D2},
number = {8},
note = {(in press), (in Japanese)},
abstract = { A new method is proposed for on-line Kanji
handwriting recognition. The method employs sub-stroke
HMMs as minimum units to constitute Kanji characters
and utilizes the direction of pen motion. The present
approach has the following advantages over the
conventional methods that employ character HMMs. 1)
Much smaller memory requirement for dictionary and
models. 2) Fast recognition by employing efficient
sub-stroke network search. 3) Capability of recognizing
characters not included in the training data if defined
as a sequence of sub-strokes in the dictionary. In
experiments, we have achieved a correct recognition
rate of above 96\% by using JAIST-IIPL database that
includes 1,016 educational Kanji characters. },
categories = {online handwritten character recognition},
month = aug,
year = 2005
}
@article{Nakai1997IEICE,
author = {Mitsuru Nakai and Harald Singer and Yoshimori Sagisaka
and Hiroshi Shimodaira},
title = {{Accent Phrase Segmentation Based on F0 Templates
Using a Superpositional Prosodic Model}},
journal = {Trans. IEICE (D-II)},
volume = {J80-D-II},
number = {10},
pages = {2605--2614},
note = {(in Japanese)},
categories = {jaist},
month = oct,
year = 1997
}
@inproceedings{Keeni2002AIA,
author = {Kanad Keeni and Hiroshi Shimodaira},
title = {{On Selection of Training Data for Fast Learning of
Neural Networks Using Back Propagation}},
booktitle = {IASTED International Conference on Artificial
Intelligence and Application (AIA2002)},
pages = {474--478},
journal = {},
month = sep,
year = 2002
}
@inproceedings{Rokui1998ICONIP,
author = {Jun Rokui and Hiroshi Shimodaira},
title = {{Modified Minimum Classification Error Learning and
Its Application to Neural Networks}},
booktitle = {ICONIP'98, Kitakyushu, Japan},
categories = {ann, mce, jaist},
month = oct,
year = 1998
}
@inproceedings{Iida1998IIZUKA,
author = {Eiji Iida and Hiroshi Shimodaira and Susumu Kunifuji
and Masayuki Kimura},
title = {{A system to Perform Human Problem Solving}},
booktitle = {The 5th International Conference on Soft Computing and
Information / Intelligent Systems (IIZUKA'98)},
pages = {},
categories = {jaist},
month = oct,
year = 1998
}
@inproceedings{Shimodaira2000ICSLP10,
author = {Hiroshi Shimodaira and Toshihiko Akae and Mitsuru
Nakai and Shigeki Sagayama},
title = {{Jacobian Adaptation of {HMM} with Initial Model
Selection for Noisy Speech Recognition}},
booktitle = {Proc. ICSLP2000},
pages = {1003--1006},
abstract = { An extension of Jacobian Adaptation (JA) of HMMs for
degraded speech recognition is presented in which
appropriate set of initial models is selected from a
number of initial-model sets designed for different
noise environments. Based on the first order Taylor
series approximation in the acoustic feature domain, JA
adapts the acoustic model parameters trained in the
initial noise environment A to the new environment B
much faster than PMC that creates the acoustic models
for the target environment from scratch. Despite the
advantage of JA to PMC, JA has a theoretical limitation
that the change of acoustic parameters from the
environment A to B should be small in order that the
linear approximation holds. To extend the coverage of
JA, the ideas of multiple sets of initial models and
their automatic selection scheme are discussed.
Speaker-dependent isolated-word recognition experiments
are carried out to evaluate the proposed method. },
categories = {asr, jaist},
month = oct,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/Shimodaira2000ICSLP10.pdf},
year = 2000
}
@inproceedings{Koba1995HCIa,
author = { and Hiroshi Shimodaira},
title = {{HI Design Based on the Costs of Human
Information-processing Model}},
booktitle = {HIC international'95},
pages = {},
categories = {HI, jaist},
month = jul,
year = 1995
}
@inproceedings{Tokuno2003HCII,
author = {Tokuno Junko and Naoto Akira and Mitsuru Nakai and
Hiroshi Shimodaira and Shigeki Sagayama},
title = {{Blind-handwriting Interface for Wearable Computing}},
booktitle = {Proc. of Human - Computer Interaction (HCI)
International 2003, Volume 2},
pages = {303--307},
note = {},
abstract = {This paper proposes a novel input interface that we
call "blind handwriting" for wearable computing. The
blind handwriting, which is a word similar to "blind
typing" of keyboard, is a particular writing style
where the user does not see the pen or the finger
movement. Without visual feedback, written characters
are distorted, as in the case when the user is
blindfolded, and therefore existing on-line handwriting
recognition systems fail to recognize them correctly.
The sub-stroke based hidden Markov model approach is
employed to tackle this problem. When the pen or touch
pad is used as an input device, the proposed interface
demonstrates a recognition rate of 83\% on a test set
of 61 people where each person wrote 1016 Japanese
Kanji characters. },
categories = {HWR, jaist},
journal = {},
month = jun,
year = 2003
}
@inproceedings{Kawamoto2002PRICAI,
author = {Shin-ichi Kawamoto and Hiroshi Shimodaira and Tsuneo
Nitta and Takuya Nishimoto and Satoshi Nakamura and
Katsunobu Itou and Shigeo Morishima and Tatsuo
Yotsukura and Atsuhiko Kai and Akinobu Lee and Yoichi
Yamashita and Takao Kobayashi and Keiichi Tokuda and
Keikichi Hirose and Nobuaki Minematsu and Atsushi
Yamada and Yasuharu Den and Takehito Utsuro and Shigeki
Sagayama},
title = {{Open-source software for developing anthropomorphic
spoken dialog agent}},
booktitle = {Proc. of PRICAI-02, International Workshop on Lifelike
Animated Agents},
pages = {64--69},
categories = {lifelike-agent, jaist},
month = aug,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2002/Kawamoto2002PRICAI.pdf},
year = 2002
}
@inproceedings{Shimodaira:iwfhr06,
author = {Junko Tokuno and Mitsuru Nakai and Hiroshi Shimodaira
and Shigeki Sagayama and Masaki Nakagawa},
title = {{On-line Handwritten Character Recognition Selectively
employing Hierarchical Spatial Relationships among
Subpatterns}},
booktitle = {{Proc. IWFHR-10}},
address = {La Baule, France},
abstract = { This paper proposes an on-line handwritten character
pattern recognition method that examines spatial
relationships among subpatterns which are components of
a character pattern. Conventional methods evaluating
spatial relationships among subpatterns have not
considered characteristics of deformed handwritings and
evaluate all the spatial relationships equally.
However, the deformations of spatial features are
different within a character pattern. In our approach,
we assume that the distortions of spatial features are
dependent on the hierarchy of character patterns so
that we selectively evaluate hierarchical spatial
relationships of subpatterns by employing Bayesian
network as a post-processor of our sub-stroke based HMM
recognition system. Experiments of on-line handwritten
Kanji character recognition with a lexicon of 1,016
elementary characters revealed that the approach we
propose improves the recognition accuracy for different
types of deformations. },
categories = {online handwriting recognition},
month = oct,
year = 2006
}
@inproceedings{Takeda2002MMSP,
author = {Haruto Takeda and Naoki Saito and Tomoshi Otsuki and
Mitsuru Nakai and Hiroshi Shimodaira and Shigeki
Sagayama},
title = {{Hidden Markov Model for AUtomatic Transcription of
MIDI Signals}},
booktitle = {2002 International Workshop on Multimedia Signal
Processing},
pages = {},
journal = {},
month = dec,
pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2002/Takeda2002MMSP12.pdf},
year = 2002
}