@inproceedings{bd9afb598af84fe484d5327b940d4835,
title = "Making test corpora for question answering more representative",
abstract = "Despite two high profile series of challenges devoted to question answering technologies there remains no formal study into the representativeness that question corpora bear to real end-user inputs. We examine the corpora used presently and historically in the TREC and QALD challenges in juxtaposition with two more from natural sources and identify a degree of disjointedness between the two. We analyse these differences in depth before discussing a candidate approach to question corpora generation and provide a juxtaposition on its own representativeness. We conclude that these artificial corpora have good overall coverage of grammatical structures but the distribution is skewed, meaning performance measures may be inaccurate.",
author = "Andrew Walker and Andrew Starkey and Pan, {Jeff Z.} and Advaith Siddharthan",
year = "2014",
doi = "10.1007/978-3-319-11382-1_1",
language = "English",
isbn = "9783319113814",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer-Verlag",
pages = "1--6",
editor = "Evangelos Kanoulas and Mihai Lupu and Paul Clough and Mark Sanderson and Mark Hall and Allan Hanbury and Elaine Toms",
booktitle = "Information Access Evaluation. Multilinguality, Multimodality, and Interaction",
note = "5th International Conference of the CLEF Initiative, CLEF 2014 ; Conference date: 15-09-2014 Through 18-09-2014",
}