From 954ff800bfbbce0690a1153a8476341983956780 Mon Sep 17 00:00:00 2001 From: Markus Konrad Date: Fri, 11 Mar 2022 15:36:53 +0100 Subject: [PATCH] fix test_corpus_init on Windows --- tests/test_corpus.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/tests/test_corpus.py b/tests/test_corpus.py index c851d9e..db428ac 100644 --- a/tests/test_corpus.py +++ b/tests/test_corpus.py @@ -176,19 +176,6 @@ def test_corpus_init(): _check_copies(corp, copy(corp), same_nlp_instance=True) _check_copies(corp, deepcopy(corp), same_nlp_instance=False) - if 'en_core_web_md' in spacy.util.get_installed_models(): - corp = c.Corpus(textdata_en, language='en', load_features={'vectors', 'tok2vec', 'tagger', 'morphologizer', - 'parser', 'attribute_ruler', 'lemmatizer', 'ner'}) - assert corp.has_sents - assert corp.language_model == 'en_core_web_md' - _check_corpus_docs(corp, has_sents=True) - assert 'ner' in corp.nlp.pipe_names - - _check_copies(corp, copy(corp), same_nlp_instance=True) - _check_copies(corp, deepcopy(corp), same_nlp_instance=False) - else: - raise RuntimeWarning('language model "en_core_web_md" not installed') - corp = c.Corpus(textdata_en, language='en', load_features={'tok2vec', 'senter'}) assert corp.has_sents assert corp.language_model == 'en_core_web_sm' @@ -261,6 +248,20 @@ def test_corpus_init(): _check_copies(corp, deepcopy(corp), same_nlp_instance=False) +@pytest.mark.skipif('en_core_web_md' not in spacy.util.get_installed_models(), + reason='language model "en_core_web_md" not installed') +def test_corpus_init_md_model_required(): + corp = c.Corpus(textdata_en, language='en', load_features={'vectors', 'tok2vec', 'tagger', 'morphologizer', + 'parser', 'attribute_ruler', 'lemmatizer', 'ner'}) + assert corp.has_sents + assert corp.language_model == 'en_core_web_md' + _check_corpus_docs(corp, has_sents=True) + assert 'ner' in corp.nlp.pipe_names + + _check_copies(corp, copy(corp), same_nlp_instance=True) + _check_copies(corp, deepcopy(corp), same_nlp_instance=False) + + @settings(deadline=None) @given(docs=strategy_str_str_dict_printable(), punctuation=st.one_of(st.none(), st.lists(st.text(string.punctuation, min_size=1, max_size=1))),