from mowl.datasets import RemoteDataset, PathDataset, OWLClasses
GDA2_URL = 'https://bio2vec.net/data/mowl/gda2.tar.gz'
GDA2_EL_URL = 'https://bio2vec.net/data/mowl/gda2_el.tar.gz'
[docs]
class GDADatasetV2(RemoteDataset):
"""Gene--Disease Association Dataset version 2. This dataset was
used as benchmark in [hoehndorf2025]_.
"""
def __init__(self, url=GDA2_URL):
super().__init__(url=url)
@property
def evaluation_classes(self):
if self._evaluation_classes is None:
genes = set()
diseases = set()
for owl_name, owl_cls in self.classes.as_dict.items():
if "mowl.borg" in owl_name and owl_name.split("/")[-1].isnumeric():
genes.add(owl_cls)
if "OMIM_" in owl_name:
diseases.add(owl_cls)
genes = OWLClasses(genes)
diseases = OWLClasses(diseases)
self._evaluation_classes = (genes, diseases)
return self._evaluation_classes
@property
def evaluation_object_property(self):
return "http://mowl.borg/associated_with"
[docs]
class GDADatasetV2EL(GDADatasetV2):
def __init__(self, url=GDA2_EL_URL):
super().__init__(url=url)