数据集:
bigbio/euadr
Corpora with specific entities and relationships annotated are essential to train and evaluate text-mining systems that are developed to extract specific structured information from a large corpus. In this paper we describe an approach where a named-entity recognition system produces a first annotation and annotators revise this annotation using a web-based interface. The agreement figures achieved show that the inter-annotator agreement is much better than the agreement with the system provided annotations. The corpus has been annotated for drugs, disorders, genes and their inter-relationships. For each of the drug-disorder, drug-target, and target-disorder relations three experts have annotated a set of 100 abstracts. These annotated relationships will be used to train and evaluate text-mining software to capture these relationships in texts.
@article{VANMULLIGEN2012879,
title = {The EU-ADR corpus: Annotated drugs, diseases, targets, and their relationships},
journal = {Journal of Biomedical Informatics},
volume = {45},
number = {5},
pages = {879-884},
year = {2012},
note = {Text Mining and Natural Language Processing in Pharmacogenomics},
issn = {1532-0464},
doi = {https://doi.org/10.1016/j.jbi.2012.04.004},
url = {https://www.sciencedirect.com/science/article/pii/S1532046412000573},
author = {Erik M. {van Mulligen} and Annie Fourrier-Reglat and David Gurwitz and Mariam Molokhia and Ainhoa Nieto and Gianluca Trifiro and Jan A. Kors and Laura I. Furlong},
keywords = {Text mining, Corpus development, Machine learning, Adverse drug reactions},
abstract = {Corpora with specific entities and relationships annotated are essential to train and evaluate text-mining systems that are developed to extract specific structured information from a large corpus. In this paper we describe an approach where a named-entity recognition system produces a first annotation and annotators revise this annotation using a web-based interface. The agreement figures achieved show that the inter-annotator agreement is much better than the agreement with the system provided annotations. The corpus has been annotated for drugs, disorders, genes and their inter-relationships. For each of the drug–disorder, drug–target, and target–disorder relations three experts have annotated a set of 100 abstracts. These annotated relationships will be used to train and evaluate text-mining software to capture these relationships in texts.}
}