The Centre for Speech Technology Research, The university of Edinburgh

Publications by Alex Gutkin

s9900835.bib

@inproceedings{Gutkin:Gay:qr05,
  author = {Alexander Gutkin and David R. Gay},
  title = {{S}tructural {R}epresentation and {M}atching of
                   {A}rticulatory {S}peech {S}tructures based on the
                   {E}volving {T}ransformation {S}ystem ({ETS})
                   {F}ormalism},
  booktitle = {Proc. 19th International Workshop on Qualitative
                   Reasoning (QR-05)},
  editor = {Michael Hofbaur and Bernhard Rinner and Franz Wotawa},
  pages = {89--96},
  address = {Graz, Austria},
  abstract = { A formal structural representation of speech
                   consistent with the principles of combinatorial
                   structure theory is presented in this paper. The
                   representation is developed within the Evolving
                   Transformation System (ETS) formalism and encapsulates
                   speech processes at the articulatory level. We show how
                   the class structure of several consonantal phonemes of
                   English can be expressed with the help of articulatory
                   gestures---the atomic combinatorial units of speech. As
                   a preliminary step towards the design of a speech
                   recognition architecture based on the structural
                   approaches to physiology and articulatory phonology, we
                   present an algorithm for the structural detection of
                   phonemic class elements inside gestural ETS structures
                   derived from continuous speech. Experiments designed to
                   verify the adequacy of the hypothesised gestural class
                   structure conducted on the MOCHA articulatory corpus
                   are then described. Our experimental results support
                   the hypothesis that the articulatory representation
                   captures sufficient information for the accurate
                   structural identification of the phonemic classes in
                   question. },
  categories = {structural,recognition,ets,artic,mocha,edinburgh,unb},
  isbn = {3-9502019-0-4},
  month = may,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2005/gutkin_gay_qr05.pdf},
  year = 2005
}
@mastersthesis{Gutkin:00,
  author = {Alexander Gutkin},
  title = {{L}og-{L}inear {I}nterpolation of {L}anguage {M}odels},
  school = {Department of Engineering, University of Cambridge},
  type = {{MPhil.} thesis},
  address = {UK},
  categories = {statistical speech recognition, language modelling},
  month = dec,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/gutkin_mphil.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/gutkin_mphil.ps.gz},
  year = 2000
}
@inproceedings{Gutkin:King:pris05,
  author = {Alexander Gutkin and Simon King},
  title = {{I}nductive {S}tring {T}emplate-{B}ased {L}earning of
                   {S}poken {L}anguage},
  booktitle = {Proc. 5th International Workshop on Pattern
                   Recognition in Information Systems (PRIS-2005), In
                   conjunction with the 7th International Conference on
                   Enterprise Information Systems (ICEIS-2005)},
  editor = {Hugo Gamboa and Ana Fred},
  pages = {43--51},
  address = {Miami, USA},
  publisher = {INSTICC Press},
  abstract = { This paper deals with formulation of alternative
                   structural approach to the speech recognition problem.
                   In this approach, we require both the representation
                   and the learning algorithms defined on it to be
                   linguistically meaningful, which allows the speech
                   recognition system to discover the nature of the
                   linguistic classes of speech patterns corresponding to
                   the speech waveforms. We briefly discuss the current
                   formalisms and propose an alternative --- a
                   phonologically inspired string-based inductive speech
                   representation, defined within an analytical framework
                   specifically designed to address the issues of class
                   and object representation. We also present the results
                   of the phoneme classification experiments conducted on
                   the TIMIT corpus of continuous speech. },
  categories = {structural,recognition,acoustic,phonetic_feature,timit,edinburgh},
  isbn = {972-8865-28-7},
  month = may,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2005/gutkin_king_pris05.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2005/gutkin_king_pris05.ps.gz},
  year = 2005
}
@inproceedings{Gutkin:King:icslp04,
  author = {Alexander Gutkin and Simon King},
  title = {Phone classification in pseudo-{E}uclidean Vector
                   Spaces},
  booktitle = {Proc. 8th International Conference on Spoken Language
                   Processing (ICSLP)},
  volume = {II},
  pages = {1453--1457},
  address = {Jeju Island, Korea},
  abstract = { Recently we have proposed a structural framework for
                   modelling speech, which is based on patterns of
                   phonological distinctive features, a linguistically
                   well-motivated alternative to standard vector-space
                   acoustic models like HMMs. This framework gives
                   considerable representational freedom by working with
                   features that have explicit linguistic interpretation,
                   but at the expense of the ability to apply the wide
                   range of analytical decision algorithms available in
                   vector spaces, restricting oneself to more
                   computationally expensive and less-developed symbolic
                   metric tools. In this paper we show that a
                   dissimilarity-based distance-preserving transition from
                   the original structural representation to a
                   corresponding pseudo-Euclidean vector space is
                   possible. Promising results of phone classification
                   experiments conducted on the TIMIT database are
                   reported. },
  categories = {structural,recognition,acoustic,phonetic_feature,timit,edinburgh},
  issn = {1225-441x},
  month = oct,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/gutkin_king_icslp04.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/gutkin_king_icslp04.ps.gz},
  year = 2004
}
@inproceedings{Gutkin:King:icpr04,
  author = {Alexander Gutkin and Simon King},
  title = {{S}tructural {R}epresentation of {S}peech for
                   {P}honetic {C}lassification},
  booktitle = {Proc. 17th International Conference on Pattern
                   Recognition (ICPR)},
  volume = 3,
  pages = {438--441},
  address = {Cambridge, UK},
  publisher = {IEEE Computer Society Press},
  abstract = { This paper explores the issues involved in using
                   symbolic metric algorithms for automatic speech
                   recognition (ASR), via a structural representation of
                   speech. This representation is based on a set of
                   phonological distinctive features which is a
                   linguistically well-motivated alternative to the
                   ``beads-on-a-string'' view of speech that is standard
                   in current ASR systems. We report the promising results
                   of phoneme classification experiments conducted on a
                   standard continuous speech task. },
  categories = {structural,recognition,acoustic,phonetic_feature,timit,edinburgh},
  isbn = {0-7695-2128-2},
  month = aug,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/gutkin_king_icpr04.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/gutkin_king_icpr04.ps.gz},
  year = 2004
}
@inproceedings{Gutkin:King:icassp05,
  author = {Alexander Gutkin and Simon King},
  title = {{D}etection of {S}ymbolic {G}estural {E}vents in
                   {A}rticulatory {D}ata for {U}se in {S}tructural
                   {R}epresentations of {C}ontinuous {S}peech},
  booktitle = {Proc. IEEE International Conference on Acoustics,
                   Speech, and Signal Processing (ICASSP-05)},
  volume = {I},
  pages = {885--888},
  address = {Philadelphia, PA, USA},
  publisher = {IEEE Signal Processing Society Press},
  abstract = { One of the crucial issues which often needs to be
                   addressed in structural approaches to speech
                   representation is the choice of fundamental symbolic
                   units of representation. In this paper, a
                   physiologically inspired methodology for defining these
                   symbolic atomic units in terms of primitive
                   articulatory events is proposed. It is shown how the
                   atomic articulatory events (gestures) can be detected
                   directly in the articulatory data. An algorithm for
                   evaluating the reliability of the articulatory events
                   is described and promising results of the experiments
                   conducted on MOCHA articulatory database are presented.
                   },
  categories = {structural,recognition,artic,mocha,edinburgh},
  isbn = {0-7803-8875-5},
  month = mar,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2005/gutkin_king_icassp2005.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2005/gutkin_king_icassp2005.ps.gz},
  year = 2005
}
@inproceedings{Gutkin:etal:ets-cam04,
  author = {Alexander Gutkin and David Gay and Lev Goldfarb and
                   Mirjam Wester},
  title = {On the {A}rticulatory {R}epresentation of {S}peech
                   within the {E}volving {T}ransformation {S}ystem
                   {F}ormalism},
  booktitle = {Pattern Representation and the Future of Pattern
                   Recognition (Proc. Satellite Workshop of 17th
                   International Conference on Pattern Recognition)},
  editor = {Lev Goldfarb},
  pages = {57--76},
  address = {Cambridge, UK},
  abstract = { This paper deals with the formulation of an
                   alternative, structural, approach to the speech
                   representation and recognition problem. In this
                   approach, we require both the representation and the
                   learning algorithms to be linguistically meaningful and
                   to naturally represent the linguistic data at hand.
                   This allows the speech recognition system to discover
                   the emergent combinatorial structure of the linguistic
                   classes. The proposed approach is developed within the
                   ETS formalism, the first formalism in applied
                   mathematics specifically designed to address the issues
                   of class and object/event representation. We present an
                   initial application of ETS to the articulatory
                   modelling of speech based on elementary physiological
                   gestures that can be reliably represented as the ETS
                   primitives. We discuss the advantages of this gestural
                   approach over prevalent methods and its promising
                   potential to mathematical modelling and representation
                   in linguistics. },
  categories = {structural,recognition,ets,artic,mocha,edinburgh,unb},
  month = aug,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/ets_cam04_dasr.pdf},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/ets_cam04_dasr.ps.gz},
  year = 2004
}
@phdthesis{Gutkin:phd:05,
  author = {Alexander Gutkin},
  title = {{T}owards {F}ormal {S}tructural {R}epresentation of
                   {S}poken {L}anguage: {A}n {E}volving {T}ransformation
                   {S}ystem ({ETS}) {A}pproach},
  school = {School of Informatics, University of Edinburgh},
  address = {UK},
  note = {Internal version},
  categories = {structural,representation,recognition,edinburgh,unb,ets},
  month = dec,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2005/gutkin_phd_full.pdf},
  year = 2005
}
@inproceedings{Gutkin:Gay:ijcai05,
  author = {Alexander Gutkin and David R. Gay},
  title = {Structural Representation and Matching of Articulatory
                   Speech Structures based on the Evolving Transformation
                   System ({ETS}) Formalism},
  booktitle = {Proc. Nineteenth International Joint Conference on
                   Artificial Intelligence (IJCAI-05)},
  address = {Edinburgh, UK},
  categories = {structural,recognition,ets,artic,mocha,edinburgh,unb},
  month = aug,
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2005/gutkin_gay_ijcai05.pdf},
  year = 2005
}