# s0345701.bib

@article{murray2009,
author = {Murray, Gabriel and Kleinbauer, Thomas and Poller, Peter and Becker, Tilman and Renals, Steve and Kilgour, Jonathan},
doi = {10.1145/1596517.1596518},
title = {Extrinsic Summarization Evaluation: A Decision Audit Task},
url = {http://doi.acm.org/10.1145/1596517.1596518},
journal = {ACM Transactions on Speech and Language Processing},
number = {2},
abstract = {In this work we describe a large-scale extrinsic evaluation of automatic speech summarization technologies for meeting speech. The particular task is a decision audit, wherein a user must satisfy a complex information need, navigating several meetings in order to gain an understanding of how and why a given decision was made. We compare the usefulness of extractive and abstractive technologies in satisfying this information need, and assess the impact of automatic speech recognition (ASR) errors on user performance. We employ several evaluation methods for participant performance, including post-questionnaire data, human subjective and objective judgments, and a detailed analysis of participant browsing behavior. We find that while ASR errors affect user satisfaction on an information retrieval task, users can adapt their browsing behavior to complete the task satisfactorily. Results also indicate that users consider extractive summaries to be intuitive and useful tools for browsing multimodal meeting data. We discuss areas in which automatic summarization techniques can be improved in comparison with gold-standard meeting abstracts.},
volume = {6},
year = {2009},
pages = {1--29}
}

@incollection{murray2008c,
author = {Murray, Gabriel and Kleinbauer, Thomas and Poller, Peter and Renals, Steve and Kilgour, Jonathan},
publisher = {Springer},
doi = {10.1007/978-3-540-85853-9_32},
title = {Extrinsic Summarization Evaluation: A Decision Audit Task},
series = {Lecture Notes in Computer Science},
booktitle = {Machine Learning for Multimodal Interaction (Proc. MLMI '08)},
number = {5237},
abstract = {In this work we describe a large-scale extrinsic evaluation of automatic speech summarization technologies for meeting speech. The particular task is a decision audit, wherein a user must satisfy a complex information need, navigating several meetings in order to gain an understanding of how and why a given decision was made. We compare the usefulness of extractive and abstractive technologies in satisfying this information need, and assess the impact of automatic speech recognition (ASR) errors on user performance. We employ several evaluation methods for participant performance, including post-questionnaire data, human subjective and objective judgments, and an analysis of participant browsing behaviour.},
year = {2008},
pages = {349--361}
}

@incollection{murray2008b,
author = {Murray, Gabriel and Renals, Steve},
publisher = {Springer},
doi = {10.1007/978-3-540-85853-9_19},
title = {Detecting Action Items in Meetings},
url = {http://dx.doi.org/10.1007/978-3-540-85853-9_19},
series = {Lecture Notes in Computer Science},
booktitle = {Machine Learning for Multimodal Interaction (Proc. MLMI '08)},
number = {5237},
abstract = {We present a method for detecting action items in spontaneous meeting speech. Using a supervised approach incorporating prosodic, lexical and structural features, we can classify such items with a high degree of accuracy. We also examine how well various feature subclasses can perform this task on their own.},
year = {2008},
pages = {208--213}
}

@inproceedings{Hachey05,
author = {Hachey, B. and Murray, G. and Reitter, D.},
title = {The {E}mbra System at {DUC} 2005: Query-oriented Multi-document Summarization with a Very Large Latent Semantic Space},
booktitle = {Proceedings of the Document Understanding Conference (DUC) 2005, Vancouver, BC, Canada},
month = {October},
year = {2005},
abstract = {Our summarization system submitted to DUC 2005, Embra (or Edinburgh), is novel in that it relies on building a very large semantic space for the purposes of determining relevance and redundancy in an MMR-style framework. We address specificity by detecting the presence or absence of Named Entities in our extract candidates, and we implemented a sentence-ordering algorithm to maximize sentence cohesion in our final summaries.},
categories = {summarization, latent semantic analysis}
}

@inproceedings{murray06,
author = {Murray, G. and Renals, S. and Moore, J. and Carletta, J.},
title = {Incorporating Speaker and Discourse Features into Speech Summarization},
booktitle = {Proceedings of the Human Language Technology Conference - North American Chapter of the Association for Computational Linguistics Meeting (HLT-NAACL) 2006, New York City, USA},
month = {June},
year = {2006},
abstract = {The research presented herein explores the usefulness of incorporating speaker and discourse features in an automatic speech summarization system applied to meeting recordings from the ICSI Meetings corpus. By analyzing speaker activity, turn-taking and discourse cues, it is hypothesized that a system can outperform solely text-based methods inherited from the field of text summarization. The summarization methods are described, two evaluation methods are applied and compared, and the results clearly show that utilizing such features is advantageous and efficient. Even simple methods relying on discourse cues and speaker activity can outperform text summarization approaches.},
categories = {summarization, speech summarization, prosody, latent semantic analysis}
}

@inproceedings{murray2007-interspeech,
author = {Murray, Gabriel and Renals, Steve},
booktitle = {Proc. Interspeech '07},
year = {2007},
abstract = {The majority of speech summarization research has focused on extracting the most informative dialogue acts from recorde d, archived data. However, a potential use case for speech sum- marization in the meetings domain is to facilitate a meeting in progress by providing the participants - whether they are at tend- ing in-person or remotely - with an indication of the most im- portant parts of the discussion so far. This requires being a ble to determine whether a dialogue act is extract-worthy befor e the global meeting context is available. This paper introduces a novel method for weighting dialogue acts using only very lim- ited local context, and shows that high summary precision is possible even when information about the meeting as a whole is lacking. A new evaluation framework consisting of weighted precision, recall and f-score is detailed, and the novel onl ine summarization method is shown to significantly increase recall and f-score compared with a method using no contextual infor- mation.},
title = {Towards online speech summarization}
}

@inproceedings{hachey06,
author = {Hachey, B. and Murray, G. and Reitter, D.},
title = {Dimensionality Reduction Aids Term Co-Occurrence Based Multi-Document Summarization},
booktitle = {Proceedings of ACL Summarization Workshop 2006, Sydney, Australia},
month = {June},
year = {2006},
abstract = {A key task in an extraction system for query-oriented multi-document summarisation, necessary for computing relevance and redundancy, is modelling text semantics. In the Embra system, we use a representation derived from the singular value decomposition of a term co-occurrence matrix. We present methods to show the reliability of performance improvements. We find that Embra performs better with dimensionality reduction.},
categories = {summarization, latent semantic analysis}
}

@incollection{murray2007-mlmi,
editor = {Popescu-Belis, A. and Renals, S. and Bourlard, H.},
author = {Murray, Gabriel and Renals, Steve},
publisher = {Springer},
title = {Term-weighting for summarization of multi-party spoken dialogues},
series = {Lecture Notes in Computer Science},
booktitle = {Machine Learning for Multimodal Interaction IV},
abstract = {This paper explores the issue of term-weighting in the genre of spontaneous, multi-party spoken dialogues, with the intent of using such term-weights in the creation of extractive meeting summaries. The field of text information retrieval has yielded many term-weighting tech- niques to import for our purposes; this paper implements and compares several of these, namely tf.idf, Residual IDF and Gain. We propose that term-weighting for multi-party dialogues can exploit patterns in word us- age among participant speakers, and introduce the su.idf metric as one attempt to do so. Results for all metrics are reported on both manual and automatic speech recognition (ASR) transcripts, and on both the ICSI and AMI meeting corpora.},
volume = {4892},
year = {2007},
pages = {155--166}
}

@inproceedings{Murray05b,
author = {Murray, G. and Renals, S. and Carletta, J. and Moore, J.},
title = {Evaluating Automatic Summaries of Meeting Recordings},
booktitle = {Proceedings of the 43rd Annual Meeting of the Association for Computational Linguistics, Ann Arbor, MI, USA},
month = {June},
year = {2005},
abstract = {The research below explores schemes for evaluating automatic summaries of business meetings, using the ICSI Meeting Corpus. Both automatic and subjective evaluations were carried out, with a central interest being whether or not the two types of evaluations correlate with each other. The evaluation metrics were used to compare and contrast differing approaches to automatic summarization, the deterioration of summary quality on ASR output versus manual transcripts, and to determine whether manual extracts are rated significantly higher than automatic extracts.},
categories = {ami,summarization, speech summarization, prosody, latent semantic analysis, summarization evaluation, edinburgh}
}

@incollection{murray2008a,
author = {Murray, Gabriel and Renals, Steve},
publisher = {Springer},
doi = {10.1007/978-3-540-85853-9_22},
title = {Meta Comments for Summarizing Meeting Speech},
url = {http://dx.doi.org/10.1007/978-3-540-85853-9_22},
series = {Lecture Notes in Computer Science},
booktitle = {Machine Learning for Multimodal Interaction (Proc. MLMI '08)},
number = {5237},
abstract = {This paper is about the extractive summarization of meeting speech, using the ICSI and AMI corpora. In the first set of experiments we use prosodic, lexical, structural and speaker-related features to select the most informative dialogue acts from each meeting, with the hypothesis being that such a rich mixture of features will yield the best results. In the second part, we present an approach in which the identification of meta-comments'' is used to create more informative summaries that provide an increased level of abstraction. We find that the inclusion of these meta comments improves summarization performance according to several evaluation metrics.},
year = {2008},
pages = {236--247}
}

@inproceedings{murray-interspeech05,
author = {Murray, G. and Renals, S. and Carletta, J.},
title = {Extractive Summarization of Meeting Recordings},
booktitle = {Proc. Interspeech},
month = {September},
year = {2005},
abstract = {Several approaches to automatic speech summarization are discussed below, using the ICSI Meetings corpus. We contrast feature-based approaches using prosodic and lexical features with maximal marginal relevance and latent semantic analysis approaches to summarization. While the latter two techniques are borrowed directly from the field of text summarization, feature-based approaches using prosodic information are able to utilize characteristics unique to speech data. We also investigate how the summarization results might deteriorate when carried out on ASR output as opposed to manual transcripts. All of the summaries are of an extractive variety, and are compared using the software ROUGE.},
categories = {ami,summarization,prosody, latent semantic analysis,edinburgh}
}

@inproceedings{uriaIS2012,
author = {Uria, Benigno and Murray, Iain and Renals, Steve and Richmond, Korin},
title = {Deep Architectures for Articulatory Inversion},
booktitle = {Proc. Interspeech},
year = {2012},
month = {September},
keywords = {Articulatory inversion, deep neural network, deep belief network, deep regression network, pretraining},
abstract = {We implement two deep architectures for the acoustic-articulatory inversion mapping problem: a deep neural network and a deep trajectory mixture density network. We find that in both cases, deep architectures produce more accurate predictions than shallow architectures and that this is due to the higher expressive capability of a deep model and not a consequence of adding more adjustable parameters. We also find that a deep trajectory mixture density network is able to obtain better inversion accuracies than smoothing the results of a deep neural network. Our best model obtained an average root mean square error of 0.885 mm on the MNGU0 test dataset.},
categories = {Articulatory inversion, deep neural network, deep belief network, deep regression network, pretraining}
}

@inproceedings{murray06b,
author = {Murray, G. and Renals, S. and Taboada, M.},
title = {Prosodic Correlates of Rhetorical Relations},
booktitle = {Proceedings of HLT/NAACL ACTS Workshop, 2006, New York City, USA},
month = {June},
year = {2006},
abstract = {This paper investigates the usefulness of prosodic features in classifying rhetorical relations between utterances in meeting recordings. Five rhetorical relations of \textit{contrast}, \textit{elaboration}, \textit{summary}, \textit{question} and \textit{cause} are explored. Three training methods - supervised, unsupervised, and combined - are compared, and classification is carried out using support vector machines. The results of this pilot study are encouraging but mixed, with pairwise classification achieving an average of 68\% accuracy in discerning between relation pairs using only prosodic features, but multi-class classification performing only slightly better than chance.},
categories = {rhetorical structure theory, prosody, unsupervised learning}
}

@inproceedings{murray06c,
author = {Murray, G. and Renals, S.},
title = {Dialogue Act Compression Via Pitch Contour Preservation},
booktitle = {Proceedings of the 9th International Conference on Spoken Language Processing, Pittsburgh, USA},
month = {September},
year = {2006},