Source code for mowl.base_models.syntactic_model

from mowl.base_models.model import Model
import mowl.error.messages as msg
from mowl.corpus import extract_annotation_corpus, extract_and_save_annotation_corpus, extract_axiom_corpus, extract_and_save_axiom_corpus
import tempfile

from deprecated.sphinx import versionadded


import logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

[docs] class SyntacticModel(Model): """ Base class for syntactic methods. By *syntactic*, we mean methods that use the syntax of the ontology to generate the corpus. :param corpus_filepath: the filepath where the corpus is saved. If None, the corpus file is saved in a temporary file. :type corpus_filepath: str """ def __init__(self, *args, corpus_filepath=None, **kwargs): super().__init__(*args, **kwargs) if corpus_filepath is not None and not isinstance(corpus_filepath, str): raise TypeError("Optional parameter 'corpus_filepath' must be of type str.") self._corpus_filepath = corpus_filepath self._corpus = None self._save_corpus = True self._with_annotations = False @property def corpus_filepath(self): """Path for saving the corpus. :rtype: str """ if self._corpus_filepath is None: corpus_filepath = tempfile.NamedTemporaryFile() self._corpus_filepath = corpus_filepath.name return self._corpus_filepath @property def corpus(self): """ Corpus generated from the ontology. :rtype: list """ if self._corpus is None: raise AttributeError(msg.CORPUS_NOT_GENERATED) return self._corpus
[docs] def generate_corpus(self, save = True, with_annotations=False): """Generates the corpus of the training ontology. It uses the Manchester OWL Syntax. :param save: if True, the corpus is saved into the model filepath, otherwise, the corpus is returned as a list of sentences. Default is True. :type save: bool, optional :param with_annotations: if True, the corpus is generated with the annotations, otherwise, the corpus is generated only with the axioms. Default is False. :type with_annotations: bool, optional """ if save: extract_and_save_axiom_corpus(self.dataset.ontology, self.corpus_filepath, mode="w") if with_annotations: extract_and_save_annotation_corpus(self.dataset.ontology, self.corpus_filepath, mode="a") else: corpus = extract_axiom_corpus(self.dataset.ontology) if with_annotations: corpus += extract_annotation_corpus(self.dataset.ontology) if not save: return corpus else: logger.info(f"Corpus saved in {self.corpus_filepath}") self._save_corpus = save self._with_annotations = with_annotations
[docs] @versionadded(version="0.4.0") def load_corpus(self): """Loads the corpus from the corpus filepath. :rtype: list """ if self._corpus is None: with open(self.corpus_filepath, "r") as f: corpus = f.readlines() corpus = [sentence.strip() for sentence in corpus] self._corpus = corpus return self._corpus
[docs] def train(self): raise NotImplementedError