The Centre for Speech Technology Research, The university of Edinburgh

Publications by Alex Gutkin

s9900835.bib

@inproceedings{Gutkin:Gay:qr05,
  editor = {Hofbaur, Michael and Rinner, Bernhard and Wotawa, Franz},
  author = {Gutkin, Alexander and Gay, David R.},
  isbn = {3-9502019-0-4},
  title = {{S}tructural {R}epresentation and {M}atching of {A}rticulatory {S}peech {S}tructures based on the {E}volving {T}ransformation {S}ystem ({ETS}) {F}ormalism},
  booktitle = {Proc. 19th International Workshop on Qualitative Reasoning (QR-05)},
  address = {Graz, Austria},
  month = {May},
  pages = {89--96},
  year = {2005},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2005/gutkin_gay_qr05.pdf},
  abstract = {A formal structural representation of speech consistent with the principles of combinatorial structure theory is presented in this paper. The representation is developed within the Evolving Transformation System (ETS) formalism and encapsulates speech processes at the articulatory level. We show how the class structure of several consonantal phonemes of English can be expressed with the help of articulatory gestures---the atomic combinatorial units of speech. As a preliminary step towards the design of a speech recognition architecture based on the structural approaches to physiology and articulatory phonology, we present an algorithm for the structural detection of phonemic class elements inside gestural ETS structures derived from continuous speech. Experiments designed to verify the adequacy of the hypothesised gestural class structure conducted on the MOCHA articulatory corpus are then described. Our experimental results support the hypothesis that the articulatory representation captures sufficient information for the accurate structural identification of the phonemic classes in question.},
  categories = {structural,recognition,ets,artic,mocha,edinburgh,unb}
}
@mastersthesis{Gutkin:00,
  author = {Gutkin, Alexander},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/gutkin_mphil.ps.gz},
  school = {Department of Engineering, University of Cambridge},
  title = {{L}og-{L}inear {I}nterpolation of {L}anguage {M}odels},
  address = {UK},
  month = {December},
  year = {2000},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2000/gutkin_mphil.pdf},
  type = {{MPhil.} thesis},
  categories = {statistical speech recognition, language modelling}
}
@inproceedings{Gutkin:King:pris05,
  editor = {Gamboa, Hugo and Fred, Ana},
  author = {Gutkin, Alexander and King, Simon},
  publisher = {INSTICC Press},
  isbn = {972-8865-28-7},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2005/gutkin_king_pris05.ps.gz},
  booktitle = {Proc. 5th International Workshop on Pattern Recognition in Information Systems (PRIS-2005), In conjunction with the 7th International Conference on Enterprise Information Systems (ICEIS-2005)},
  title = {{I}nductive {S}tring {T}emplate-{B}ased {L}earning of {S}poken {L}anguage},
  year = {2005},
  month = {May},
  pages = {43--51},
  address = {Miami, USA},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2005/gutkin_king_pris05.pdf},
  abstract = {This paper deals with formulation of alternative structural approach to the speech recognition problem. In this approach, we require both the representation and the learning algorithms defined on it to be linguistically meaningful, which allows the speech recognition system to discover the nature of the linguistic classes of speech patterns corresponding to the speech waveforms. We briefly discuss the current formalisms and propose an alternative --- a phonologically inspired string-based inductive speech representation, defined within an analytical framework specifically designed to address the issues of class and object representation. We also present the results of the phoneme classification experiments conducted on the TIMIT corpus of continuous speech.},
  categories = {structural,recognition,acoustic,phonetic_feature,timit,edinburgh}
}
@inproceedings{Gutkin:King:icslp04,
  author = {Gutkin, Alexander and King, Simon},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/gutkin_king_icslp04.ps.gz},
  title = {Phone classification in pseudo-{E}uclidean Vector Spaces},
  booktitle = {Proc. 8th International Conference on Spoken Language Processing (ICSLP)},
  issn = {1225-441x},
  year = {2004},
  month = {October},
  volume = {II},
  pages = {1453--1457},
  address = {Jeju Island, Korea},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/gutkin_king_icslp04.pdf},
  abstract = {Recently we have proposed a structural framework for modelling speech, which is based on patterns of phonological distinctive features, a linguistically well-motivated alternative to standard vector-space acoustic models like HMMs. This framework gives considerable representational freedom by working with features that have explicit linguistic interpretation, but at the expense of the ability to apply the wide range of analytical decision algorithms available in vector spaces, restricting oneself to more computationally expensive and less-developed symbolic metric tools. In this paper we show that a dissimilarity-based distance-preserving transition from the original structural representation to a corresponding pseudo-Euclidean vector space is possible. Promising results of phone classification experiments conducted on the TIMIT database are reported.},
  categories = {structural,recognition,acoustic,phonetic_feature,timit,edinburgh}
}
@inproceedings{Gutkin:King:icpr04,
  author = {Gutkin, Alexander and King, Simon},
  publisher = {IEEE Computer Society Press},
  isbn = {0-7695-2128-2},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/gutkin_king_icpr04.ps.gz},
  booktitle = {Proc. 17th International Conference on Pattern Recognition (ICPR)},
  title = {{S}tructural {R}epresentation of {S}peech for {P}honetic {C}lassification},
  year = {2004},
  month = {August},
  volume = {3},
  pages = {438--441},
  address = {Cambridge, UK},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/gutkin_king_icpr04.pdf},
  abstract = {This paper explores the issues involved in using symbolic metric algorithms for automatic speech recognition (ASR), via a structural representation of speech. This representation is based on a set of phonological distinctive features which is a linguistically well-motivated alternative to the ``beads-on-a-string'' view of speech that is standard in current ASR systems. We report the promising results of phoneme classification experiments conducted on a standard continuous speech task.},
  categories = {structural,recognition,acoustic,phonetic_feature,timit,edinburgh}
}
@inproceedings{Gutkin:King:icassp05,
  author = {Gutkin, Alexander and King, Simon},
  publisher = {IEEE Signal Processing Society Press},
  isbn = {0-7803-8875-5},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2005/gutkin_king_icassp2005.ps.gz},
  booktitle = {Proc. IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP-05)},
  title = {{D}etection of {S}ymbolic {G}estural {E}vents in {A}rticulatory {D}ata for {U}se in {S}tructural {R}epresentations of {C}ontinuous {S}peech},
  year = {2005},
  month = {March},
  volume = {I},
  pages = {885--888},
  address = {Philadelphia, PA, USA},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2005/gutkin_king_icassp2005.pdf},
  abstract = {One of the crucial issues which often needs to be addressed in structural approaches to speech representation is the choice of fundamental symbolic units of representation. In this paper, a physiologically inspired methodology for defining these symbolic atomic units in terms of primitive articulatory events is proposed. It is shown how the atomic articulatory events (gestures) can be detected directly in the articulatory data. An algorithm for evaluating the reliability of the articulatory events is described and promising results of the experiments conducted on MOCHA articulatory database are presented.},
  categories = {structural,recognition,artic,mocha,edinburgh}
}
@inproceedings{Gutkin:etal:ets-cam04,
  editor = {Goldfarb, Lev},
  author = {Gutkin, Alexander and Gay, David and Goldfarb, Lev and Wester, Mirjam},
  ps = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/ets_cam04_dasr.ps.gz},
  title = {On the {A}rticulatory {R}epresentation of {S}peech within the {E}volving {T}ransformation {S}ystem {F}ormalism},
  booktitle = {Pattern Representation and the Future of Pattern Recognition (Proc. Satellite Workshop of 17th International Conference on Pattern Recognition)},
  address = {Cambridge, UK},
  month = {August},
  pages = {57--76},
  year = {2004},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2004/ets_cam04_dasr.pdf},
  abstract = {This paper deals with the formulation of an alternative, structural, approach to the speech representation and recognition problem. In this approach, we require both the representation and the learning algorithms to be linguistically meaningful and to naturally represent the linguistic data at hand. This allows the speech recognition system to discover the emergent combinatorial structure of the linguistic classes. The proposed approach is developed within the ETS formalism, the first formalism in applied mathematics specifically designed to address the issues of class and object/event representation. We present an initial application of ETS to the articulatory modelling of speech based on elementary physiological gestures that can be reliably represented as the ETS primitives. We discuss the advantages of this gestural approach over prevalent methods and its promising potential to mathematical modelling and representation in linguistics.},
  categories = {structural,recognition,ets,artic,mocha,edinburgh,unb}
}
@phdthesis{Gutkin:phd:05,
  author = {Gutkin, Alexander},
  school = {School of Informatics, University of Edinburgh},
  title = {{T}owards {F}ormal {S}tructural {R}epresentation of {S}poken {L}anguage: {A}n {E}volving {T}ransformation {S}ystem ({ETS}) {A}pproach},
  address = {UK},
  month = {December},
  note = {Internal version},
  year = {2005},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2005/gutkin_phd_full.pdf},
  categories = {structural,representation,recognition,edinburgh,unb,ets}
}
@inproceedings{Gutkin:Gay:ijcai05,
  author = {Gutkin, Alexander and Gay, David R.},
  title = {Structural Representation and Matching of Articulatory Speech Structures based on the Evolving Transformation System ({ETS}) Formalism},
  booktitle = {Proc. Nineteenth International Joint Conference on Artificial Intelligence (IJCAI-05)},
  address = {Edinburgh, UK},
  month = {August},
  year = {2005},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2005/gutkin_gay_ijcai05.pdf},
  categories = {structural,recognition,ets,artic,mocha,edinburgh,unb}
}