Source code for mowl.datasets.builtin.ppi_yeast

from ..base import RemoteDataset, OWLClasses
from deprecated.sphinx import versionchanged


YEAST_DATA_URL = 'https://bio2vec.net/data/mowl/ppi_yeast.tar.gz'
YEAST_SLIM_DATA_URL = 'https://bio2vec.net/data/mowl/ppi_yeast_slim.tar.gz'

HUMAN_DATA_URL = 'https://bio2vec.net/data/mowl/ppi_human.tar.gz'

[docs] class PPIYeastDataset(RemoteDataset): """ Dataset containing protein-protein interactions in yeast. The data used for this dataset consists of the \ `Gene Ontology <http://geneontology.org/docs/download-ontology/>`_ released on 20-10-2021 and \ protein interaction data found in \ `String Database <https://string-db.org/cgi/download?sessionId=bJLbZRTBIir4>`_ version 11.5. \ Protein interaction data was randomly split 90:5:5 across training, validation and testing \ ontologies and Gene Ontology functional annotations of proteins is part of the training \ ontology only. Protein interactions are represented as an axiom of the form \ :math:`protein_1 \sqsubseteq interacts\_with .protein_2.` This dataset was used as a benchmark in [chen2025]_. """ def __init__(self, url=None): super().__init__(url=YEAST_DATA_URL if not url else url) @versionchanged(reason="Return pair of classes for evaluation", version="0.4.0") @property def evaluation_classes(self): """Classes that are used in evaluation """ if self._evaluation_classes is None: proteins = set() for owl_name, owl_cls in self.classes.as_dict.items(): if "http://4932" in owl_name: proteins.add(owl_cls) self._evaluation_classes = OWLClasses(proteins), OWLClasses(proteins) return self._evaluation_classes
[docs] def get_evaluation_property(self): return "http://interacts_with"
[docs] class PPIYeastSlimDataset(PPIYeastDataset): """ Reduced version of :class:`PPIYeastDataset`. Tranining ontology is built from the Slim Yeast subset of Gene Ontology. """ def __init__(self): super().__init__(url=YEAST_SLIM_DATA_URL)
[docs] class PPIHumanDataset(RemoteDataset): """ Dataset containing protein-protein interactions in human. This dataset was used as benchmark in [chen2025]_ """ def __init__(self, url=None): super().__init__(url=HUMAN_DATA_URL if not url else url) @versionchanged(reason="Return pair of classes for evaluation", version="0.4.0") @property def evaluation_classes(self): """Classes that are used in evaluation """ if self._evaluation_classes is None: proteins = set() for owl_name, owl_cls in self.classes.as_dict.items(): if "http://9606" in owl_name: proteins.add(owl_cls) self._evaluation_classes = OWLClasses(proteins), OWLClasses(proteins) return self._evaluation_classes
[docs] def get_evaluation_property(self): return "http://interacts_with"