From ae12ba408599533d9c94513cc967ffac76576e5e Mon Sep 17 00:00:00 2001 From: xxyzz Date: Tue, 25 Feb 2025 10:19:32 +0800 Subject: [PATCH] [de] add "Hilfsverb haben", "Hilfsverb sein" tags in Flexion pages --- src/wiktextract/extractor/de/flexion.py | 13 ++++++++----- src/wiktextract/extractor/de/tags.py | 3 +++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/wiktextract/extractor/de/flexion.py b/src/wiktextract/extractor/de/flexion.py index 130380b8..bf21cfb2 100644 --- a/src/wiktextract/extractor/de/flexion.py +++ b/src/wiktextract/extractor/de/flexion.py @@ -14,13 +14,13 @@ from .models import Form, WordEntry from .tags import GRAMMATICAL_TAGS, translate_raw_tags -LEVEL2_TAGS = frozenset(["untrennbar"]) - def parse_flexion_page( wxr: WiktextractContext, word_entry: WordEntry, page_title: str ) -> None: # https://de.wiktionary.org/wiki/Hilfe:Flexionsseiten + LEVEL2_TAGS = ["Hilfsverb haben", "Hilfsverb sein"] + flexion_page = wxr.wtp.get_page_body( page_title, wxr.wtp.NAMESPACE_DATA["Flexion"]["id"] ) @@ -37,10 +37,13 @@ def parse_flexion_page( section_str = clean_node(wxr, None, node.largs) for word in section_str.split(" "): word = word.strip(", ") - if ( - word in LEVEL2_TAGS or word in GRAMMATICAL_TAGS - ) and not page_title.endswith(f":{word}"): + if word in GRAMMATICAL_TAGS and not page_title.endswith( + f":{word}" + ): shared_raw_tags.append(word) + for raw_tag in LEVEL2_TAGS: + if raw_tag in section_str: + shared_raw_tags.append(raw_tag) case NodeKind.TEMPLATE: if node.template_name.startswith("Deklinationsseite"): process_deklinationsseite_template( diff --git a/src/wiktextract/extractor/de/tags.py b/src/wiktextract/extractor/de/tags.py index e43157ce..944bc627 100644 --- a/src/wiktextract/extractor/de/tags.py +++ b/src/wiktextract/extractor/de/tags.py @@ -338,6 +338,9 @@ # Vorlage:Deutsch Verb schwach trennbar reflexiv "Nebensatzkonjugation": "subordinate-clause", "Hauptsatzkonjugation": "main-clause", + "regelmäßig": "regular", + "untrennbar": "inseparable", + "trennbar": "separable", } GRAMMATICAL_TAGS = {