Skip to content

Commit

Permalink
Merge pull request #13 from bretttolbert/catalan-contd
Browse files Browse the repository at this point in the history
improved support for Catalan
  • Loading branch information
bretttolbert authored Dec 14, 2023
2 parents 1d86bd4 + 3e2320a commit 612a803
Show file tree
Hide file tree
Showing 13 changed files with 9,140 additions and 55,657 deletions.
7 changes: 7 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"

[project]
name = "verbecc"
version = "1.9.0"
Expand Down Expand Up @@ -60,3 +64,6 @@ Changelog = "https://github.com/bretttolbert/verbecc/blob/master/CHANGELOG.md"

[tool.setuptools.package-data]
trained_models = ['verbecc/data/models/*']

[tool.setuptools.packages]
find = {}
25 changes: 12 additions & 13 deletions tests/test_conjugator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@

cg = conjugator.Conjugator(lang='fr')

def test_get_verbs_list():
verbs_list = cg.get_verbs_list()
assert len(verbs_list) > 7000
assert 'parler' in verbs_list
def test_get_infinitives():
infinitives = cg.get_infinitives()
assert len(infinitives) > 7000
assert 'parler' in infinitives

def test_get_templates_list():
templates_list = cg.get_templates_list()
assert len(templates_list) >= 146
assert 'aim:er' in templates_list
def test_get_template_names():
template_names = cg.get_template_names()
assert len(template_names) >= 146
assert 'aim:er' in template_names

test_verbs = [
(u"manger"),
Expand All @@ -28,11 +28,10 @@ def test_get_templates_list():
(u"pleuvoir")
]

@pytest.mark.parametrize("infinitif", test_verbs)
def test_conjugator_conjugate(infinitive):
for infinitive in test_verbs:
output = cg.conjugate(infinitive)
assert output
@pytest.mark.parametrize("infinitive", test_verbs)
def test_conjugator_conjugate_basic(infinitive):
output = cg.conjugate(infinitive)
assert output

def test_conjugator_predict_conjugation_er_verb_indicative_present():
if config.ml:
Expand Down
41 changes: 41 additions & 0 deletions tests/test_inflector_ca.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,22 @@

cg = Conjugator(lang='ca')

@pytest.mark.skip("known failure")
def test_all_verbs_have_templates():
"""Have not finished adding templates for all verbs, so this should fail"""
verbs = cg.get_verbs()
template_names = cg.get_template_names()
missing_templates = set()
for verb in verbs:
if verb.template not in template_names:
missing_templates.add(verb.template)
assert len(missing_templates) == 0

def test_find_verb_by_infinitive():
v = cg.find_verb_by_infinitive('abandonar')
assert v.infinitive == 'abandonar'
assert v.template == 'cant:ar'

test_ca_conjugate_mood_tense_data = [
('ser', 'indicatiu', 'present',
['jo sóc', 'tu ets', 'ell és', 'nosaltres som', 'vosaltres sou', 'ells són']),
Expand Down Expand Up @@ -67,6 +83,20 @@
['jo hagi', 'tu hagis', 'ell hagi', 'nosaltres hàgim', 'vosaltres hàgiu', 'ells hagin']),
('tenir', 'indicatiu', 'present',
['jo tinc', 'tu tens', 'ell té', 'nosaltres tenim', 'vosaltres teniu', 'ells tenen']),
('fer', 'indicatiu', 'present',
['jo faig', 'tu fas', 'ell fa', 'nosaltres fem', 'vosaltres feu', 'ells fan']),
('fer', 'indicatiu', 'imperfet',
['jo feia', 'tu feies', 'ell feia', 'nosaltres fèiem', 'vosaltres fèieu', 'ells feien']),
('servir', 'indicatiu', 'present',
['jo serveixo', 'tu serveixes', 'ell serveix', 'nosaltres servim', 'vosaltres serviu', 'ells serveixen']),
('veure', 'indicatiu', 'present',
['jo veig', 'tu veus', 'ell veu', 'nosaltres veiem', 'vosaltres veieu', 'ells veuen']),
('abandonar', 'indicatiu', 'present',
['jo abandono', 'tu abandones', 'ell abandona', 'nosaltres abandonem', 'vosaltres abandoneu', 'ells abandonen']),
('rebre', 'indicatiu', 'present',
['jo rebo', 'tu reps', 'ell rep', 'nosaltres rebem', 'vosaltres rebeu', 'ells reben']),
('cabre', 'indicatiu', 'present',
['jo cabo', 'tu caps', 'ell cap', 'nosaltres cabem', 'vosaltres cabeu', 'ells caben']),
]

@pytest.mark.parametrize("infinitive,mood,tense,expected_result",
Expand All @@ -78,11 +108,22 @@ def test_inflector_ca_get_conj_obs():
co = cg._inflector._get_conj_obs('parlar')
assert co.verb.infinitive == "parlar"
assert co.verb_stem == "parl"
assert co.template.name == "cant:ar"

def test_inflector_ca_get_conj_obs_2():
co = cg._inflector._get_conj_obs('abandonar')
assert co.verb.infinitive == "abandonar"
assert co.verb_stem == "abandon"
assert co.template.name == "cant:ar"

def test_inflector_ca_get_verb_stem():
verb_stem = cg._inflector._get_verb_stem(u"parlar", u"cant:ar")
assert verb_stem == u"parl"

def test_inflector_ca_get_verb_stem_2():
verb_stem = cg._inflector._get_verb_stem(u"abandonar", u"cant:ar")
assert verb_stem == u"abandon"

def test_inflector_ca_conjugate_simple_mood_tense():
verb_stem = u"parl"
tense_elem = etree.fromstring(
Expand Down
9 changes: 9 additions & 0 deletions tests/test_inflector_es.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@

cg = Conjugator(lang='es')

def test_all_verbs_have_templates():
verbs = cg.get_verbs()
template_names = cg.get_template_names()
missing_templates = set()
for verb in verbs:
if verb.template not in template_names:
missing_templates.add(verb.template)
assert len(missing_templates) == 0

# presente = Subjunctive Present (yo haya)
# pretérito-perfecto = Subjunctive Perfect (yo haya habido)
# pretérito-imperfecto-1 = Subjunctive Past 1 (yo hubiera)
Expand Down
12 changes: 12 additions & 0 deletions tests/test_inflector_fr.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,22 @@
import pytest
from lxml import etree

from verbecc import Conjugator
from verbecc import inflector_fr
from verbecc.tense_template import TenseTemplate
from verbecc.exceptions import ConjugatorError

cg = Conjugator(lang='fr')

def test_all_verbs_have_templates():
verbs = cg.get_verbs()
template_names = cg.get_template_names()
missing_templates = set()
for verb in verbs:
if verb.template not in template_names:
missing_templates.add(verb.template)
assert len(missing_templates) == 0

inf = inflector_fr.InflectorFr()

def test_add_subjunctive_relative_prounoun():
Expand Down
9 changes: 9 additions & 0 deletions tests/test_inflector_it.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@

cg = Conjugator(lang='it')

def test_all_verbs_have_templates():
verbs = cg.get_verbs()
template_names = cg.get_template_names()
missing_templates = set()
for verb in verbs:
if verb.template not in template_names:
missing_templates.add(verb.template)
assert len(missing_templates) == 0

test_it_conjugate_mood_tense_data = [
('avere', 'indicativo', 'presente',
['io ho', 'tu hai', 'lui ha', 'noi abbiamo', 'voi avete', 'loro hanno']),
Expand Down
9 changes: 9 additions & 0 deletions tests/test_inflector_pt.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@

cg = Conjugator(lang='pt')

def test_all_verbs_have_templates():
verbs = cg.get_verbs()
template_names = cg.get_template_names()
missing_templates = set()
for verb in verbs:
if verb.template not in template_names:
missing_templates.add(verb.template)
assert len(missing_templates) == 0

test_pt_conjugate_mood_tense_data = [
('ter', 'indicativo', 'presente',
['eu tenho', 'tu tens', 'ele tem', 'nós temos', 'vós tendes', 'eles têm']),
Expand Down
9 changes: 9 additions & 0 deletions tests/test_inflector_ro.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@

cg = Conjugator(lang='ro')

def test_all_verbs_have_templates():
verbs = cg.get_verbs()
template_names = cg.get_template_names()
missing_templates = set()
for verb in verbs:
if verb.template not in template_names:
missing_templates.add(verb.template)
assert len(missing_templates) == 0

test_ro_conjugate_mood_tense_data = [
('avea', 'participiu', 'participiu', False,
['avut']),
Expand Down
2 changes: 1 addition & 1 deletion verbecc/conjugations_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
class ConjugationsParser:
def __init__(self, lang: str='fr'):
self.templates: List[conjugation_template.ConjugationTemplate] = []
parser = etree.XMLParser(dtd_validation=True, encoding='utf-8')
parser = etree.XMLParser(dtd_validation=True, encoding='utf-8', remove_comments=True)
tree = etree.parse(
resource_filename("verbecc",
"data/conjugations-{}.xml".format(lang)),
Expand Down
17 changes: 12 additions & 5 deletions verbecc/conjugator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from typing import Dict, List

from verbecc import verb
from verbecc.exceptions import InvalidLangError
from verbecc import conjugation_template

Expand Down Expand Up @@ -53,13 +54,19 @@ def conjugate_mood_tense(self, infinitive: str, mood_name: str, tense_name: str,
return self._inflector.conjugate_mood_tense(
infinitive, mood_name, tense_name, alternate)

def get_verbs_list(self) -> List[str]:
return self._inflector.get_verbs_list()
def get_verbs(self) -> List[verb.Verb]:
return self._inflector.get_verbs()

def get_templates_list(self):
return self._inflector.get_templates_list()
def get_infinitives(self) -> List[str]:
return self._inflector.get_infinitives()

def find_verb_by_infinitive(self, infinitive: str):
def get_templates(self) -> List[conjugation_template.ConjugationTemplate]:
return self._inflector.get_templates()

def get_template_names(self) -> List[str]:
return self._inflector.get_template_names()

def find_verb_by_infinitive(self, infinitive: str) -> verb.Verb:
return self._inflector.find_verb_by_infinitive(infinitive)

def find_template(self, name: str) -> conjugation_template.ConjugationTemplate:
Expand Down
Loading

0 comments on commit 612a803

Please sign in to comment.