From 7a4e5f3173e61de9ca311d6ebfa2871abaf4d3a7 Mon Sep 17 00:00:00 2001 From: xxyzz Date: Mon, 24 Feb 2025 14:53:34 +0800 Subject: [PATCH 1/2] =?UTF-8?q?[ku]=20add=20"Nim=C3=AEnok"(appendix)=20nam?= =?UTF-8?q?espace=20pages?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit page "Nimînok:Ferhengok" is used in "Modul:glossary/data" and "Şablon:nimînok" --- src/wiktextract/data/ku/config.json | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/wiktextract/data/ku/config.json b/src/wiktextract/data/ku/config.json index ca8e5e58f..842a7e36b 100644 --- a/src/wiktextract/data/ku/config.json +++ b/src/wiktextract/data/ku/config.json @@ -1,3 +1,9 @@ { - "save_ns_names": ["Main", "Template", "Module", "Tewandin"] + "save_ns_names": [ + "Main", + "Template", + "Module", + "Tewandin", + "Nimînok" + ] } From 3e22526cb5d757804fc6741c5d0187f0d1062db3 Mon Sep 17 00:00:00 2001 From: xxyzz Date: Mon, 24 Feb 2025 15:49:44 +0800 Subject: [PATCH 2/2] [ku] extract "mj" tag template in "kol3" list template --- src/wiktextract/extractor/ku/linkage.py | 10 +++++++++- tests/test_ku_linkage.py | 20 ++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/wiktextract/extractor/ku/linkage.py b/src/wiktextract/extractor/ku/linkage.py index 247fd4d6b..09cf8a9ad 100644 --- a/src/wiktextract/extractor/ku/linkage.py +++ b/src/wiktextract/extractor/ku/linkage.py @@ -222,8 +222,11 @@ def extract_linkage_list_item( shared_tags: list[str] = [], ) -> None: raw_tags = [] + forms = [] for node in list_item.children: - if isinstance(node, WikiNode) and node.kind == NodeKind.LINK: + if ( + isinstance(node, WikiNode) and node.kind == NodeKind.LINK + ) or isinstance(node, str): word = clean_node(wxr, None, node) if word != "": if linkage_type != "": @@ -233,10 +236,12 @@ def extract_linkage_list_item( raw_tags=raw_tags, tags=shared_tags, ) + forms.append(l_data) translate_raw_tags(l_data) getattr(word_entry, linkage_type).append(l_data) else: form = Form(form=word, raw_tags=raw_tags, tags=shared_tags) + forms.append(l_data) translate_raw_tags(form) word_entry.forms.append(form) elif isinstance(node, TemplateNode): @@ -268,6 +273,9 @@ def extract_linkage_list_item( raw_tag = clean_node(wxr, None, node).strip("() ") if raw_tag != "": raw_tags.append(raw_tag) + for form in forms: + form.raw_tags.append(raw_tag) + translate_raw_tags(form) def extract_stûn_template( diff --git a/tests/test_ku_linkage.py b/tests/test_ku_linkage.py index 81d659e16..fe61da476 100644 --- a/tests/test_ku_linkage.py +++ b/tests/test_ku_linkage.py @@ -149,3 +149,23 @@ def test_kol_tag(self): self.assertEqual( page_data[0]["derived"], [{"word": "aqil kirin", "tags": ["verb"]}] ) + + def test_kol_mj(self): + self.wxr.wtp.add_page("Şablon:ziman", 10, "Îngilîzî") + self.wxr.wtp.add_page("Şablon:mj", 10, "(xweşbêjî)") + page_data = parse_page( + self.wxr, + "go the way of all flesh", + """== {{ziman|en}} == +=== Biwêj === +# [[texsîrî]] +==== Jê ==== +==== Hevmane ==== +{{kol3|en|cure=Hevmane +|exit {{mj|{{nimînok|xweşbêjî}}}} +}}""", + ) + self.assertEqual( + page_data[0]["synonyms"], + [{"word": "exit", "tags": ["euphemistic"]}], + )