Skip to content

Commit

Permalink
refactor!: Remove specialized new method for INSEE geographical forma…
Browse files Browse the repository at this point in the history
…ts (#41)

# Goal

The goal of this PR is to remove the specific `new_geo` method (used
exclusively to create INSEE geo formats), and handle all `VersionedSet`
instances within a single class `VersionedSetFormat`.

BREAKING CHANGE: the validators formerly named `cog` parameter is
replaced with `version`. Its expected value is unchanged, only the name
changes.

---------

Co-authored-by: Pierre Camilleri <[email protected]>
  • Loading branch information
Sarrabah and pierrecamilleri authored Jan 29, 2025
1 parent 8002c30 commit f792bfa
Show file tree
Hide file tree
Showing 19 changed files with 447 additions and 419 deletions.
52 changes: 26 additions & 26 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ shapely = "^2.0.2"
optional = true

[tool.poetry.group.linting.dependencies]
black = "^23.12.1"
black = "^24.10"
isort = "^5.13.2"
pyright = "^1.1.369"
flake8 = "^7.0.0"
Expand Down
3 changes: 1 addition & 2 deletions src/frformat/custom_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@ class CustomFormat(ABC, Generic[ValueType]):
formatter: Formatter = DefaultFormatter[ValueType]()

@abstractmethod
def is_valid(self, value: ValueType) -> bool:
...
def is_valid(self, value: ValueType) -> bool: ...

def format(self, value: ValueType) -> str:
if not self.is_valid(value):
Expand Down
2 changes: 1 addition & 1 deletion src/frformat/formats/canton.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@
canton_versioned_data.add_version(Millesime.M2023, CANTON_COG_2023)
canton_versioned_data.add_version(Millesime.M2024, CANTON_COG_2024)

Canton = set_format.new_geo("Canton", name, description, canton_versioned_data)
Canton = set_format.new("Canton", name, description, canton_versioned_data)
2 changes: 1 addition & 1 deletion src/frformat/formats/code_commune_insee.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,6 @@
Millesime.M2024, CODES_COMMUNES_INSEE_COG_2024
)

CodeCommuneInsee = set_format.new_geo(
CodeCommuneInsee = set_format.new(
"CodeCommuneInsee", name, description, code_commune_insee_versioned_data
)
4 changes: 2 additions & 2 deletions src/frformat/formats/code_pays.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
code_pays_IS02_versioned_data.add_version(Millesime.M2023, CODES_PAYS_ISO2_COG_2023)
code_pays_IS02_versioned_data.add_version(Millesime.M2024, CODES_PAYS_ISO2_COG_2024)

CodePaysISO2 = set_format.new_geo(
CodePaysISO2 = set_format.new(
"CodePaysISO2", name, description, code_pays_IS02_versioned_data
)

Expand All @@ -27,6 +27,6 @@
code_pays_IS03_versioned_data.add_version(Millesime.M2023, CODES_PAYS_ISO3_COG_2023)
code_pays_IS03_versioned_data.add_version(Millesime.M2024, CODES_PAYS_ISO3_COG_2024)

CodePaysISO3 = set_format.new_geo(
CodePaysISO3 = set_format.new(
"CodePaysISO3", name, description, code_pays_IS03_versioned_data
)
4 changes: 1 addition & 3 deletions src/frformat/formats/code_region.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,4 @@
code_region_versioned_data.add_version(Millesime.M2023, CODES_REGIONS_COG_2023)
code_region_versioned_data.add_version(Millesime.M2024, CODES_REGIONS_COG_2024)

CodeRegion = set_format.new_geo(
"CodeRegion", name, description, code_region_versioned_data
)
CodeRegion = set_format.new("CodeRegion", name, description, code_region_versioned_data)
2 changes: 1 addition & 1 deletion src/frformat/formats/commune.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
commune_versioned_data.add_version(Millesime.M2023, COMMUNES_COG_2023)
commune_versioned_data.add_version(Millesime.M2024, COMMUNES_COG_2024)

Commune = set_format.new_geo("Commune", name, description, commune_versioned_data)
Commune = set_format.new("Commune", name, description, commune_versioned_data)
2 changes: 1 addition & 1 deletion src/frformat/formats/departement.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
departement_versioned_data.add_version(Millesime.M2023, DEPARTEMENTS_COG_2023)
departement_versioned_data.add_version(Millesime.M2024, DEPARTEMENTS_COG_2024)

Departement = set_format.new_geo(
Departement = set_format.new(
"Departement", name, description, departement_versioned_data
)
2 changes: 1 addition & 1 deletion src/frformat/formats/numero_departement.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@
Millesime.M2024, NUMEROS_DEPARTEMENTS_COG_2024
)

NumeroDepartement = set_format.new_geo(
NumeroDepartement = set_format.new(
"NumeroDepartement", name, description, numero_departement_versioned_data
)
2 changes: 1 addition & 1 deletion src/frformat/formats/pays.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@
pays_versioned_data = VersionedSet[Millesime]()
pays_versioned_data.add_version(Millesime.M2024, PAYS_COG_2024)

Pays = set_format.new_geo("Pays", name, description, pays_versioned_data)
Pays = set_format.new("Pays", name, description, pays_versioned_data)
3 changes: 2 additions & 1 deletion src/frformat/formats/region.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
"Vérifie les régions françaises valides pour un Code Officiel Géographique donné"
)


region_versioned_data = VersionedSet[Millesime]()
region_versioned_data.add_version(Millesime.M2023, REGIONS_COG_2023)
region_versioned_data.add_version(Millesime.M2024, REGIONS_COG_2024)

Region = set_format.new_geo("Region", name, description, region_versioned_data)
Region = set_format.new("Region", name, description, region_versioned_data)
3 changes: 1 addition & 2 deletions src/frformat/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@


class Formatter(Protocol, Generic[ValueType]):
def format(self, value: ValueType) -> str:
...
def format(self, value: ValueType) -> str: ...


class DefaultFormatter(Generic[ValueType]):
Expand Down
161 changes: 89 additions & 72 deletions src/frformat/set_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,36 +6,65 @@
- GenericSetFormat creates a validator with valid data passed on the fly
- `new` creates specialized versions where data is tied to the class
- `new_geo` creates an even more specialized version for geographical data
from INSEE
"""

from enum import Enum
from functools import total_ordering
from typing import FrozenSet, Type, TypeVar, Union
from typing import FrozenSet, Generic, Type, TypeVar, Union, overload

from frformat import CustomStrFormat, Metadata
from frformat.common import normalize_value
from frformat.options import Options
from frformat.versioned_set import Version, VersionedSet


class GenericSetFormat(CustomStrFormat):
"""A format that checks if a value is among a set of valid values.
@total_ordering
class Millesime(Enum):
"""Millesime class implements the `Version` protocol methods."""

M2023 = "2023"
M2024 = "2024"
LATEST = "latest"

def __eq__(self, other) -> bool:
return self.value == other.value

def __lt__(self, other) -> bool:
return self.value < other.value

def get_id(self) -> str:
return self.value

@classmethod
def is_sorted(cls) -> bool:
return True


class SingleSetFormat(CustomStrFormat):
"""This format defines a closed list of valid values"""

_valid_values: FrozenSet = frozenset()
"""Dataset of valid values.
Technical details:
In the generic version, valid data is passed at object initialisation.
Beware, child classes may define an instance `_valid_values` attribute, which
will always take precedence over the class attribute for the validation.
"""

def __init__(self, valid_data: FrozenSet, options: Options = Options()):
def __init__(self, options: Options = Options()):
self._options = options
self._data = valid_data

normalized_extra_values = {
normalize_value(e, self._options) for e in self._options.extra_valid_values
}

self._normalized_values = {
normalize_value(e, self._options) for e in self._data
normalize_value(e, self._options)
for e in self._valid_values
# in child classes, `self._valid_values` can reference an instance
# attribute, if applicable ; otherwise the class attribute will
# be used
}.union(normalized_extra_values)

def is_valid(self, value: str) -> bool:
Expand All @@ -46,12 +75,52 @@ def is_valid(self, value: str) -> bool:
V = TypeVar("V", bound="Version")


class VersionedSetFormat(SingleSetFormat, Generic[V]):
"""This format defines a closed set of valid values, with different
versions to choose from.
Specific implementation details :
- the type will hint at which version class to use for initializing the format validator.
- a description of the format can be consulted with `MyClass.metadata.description` or at the top of `help(MyClass)`
Technical details:
- In the versioned set format, the `_valid_values` attribute is an instance attribute,
which takes precedence over the class attribute of the mother class. The
reason for this is that the exact valid values set is only known on instantiation.
"""

_versioned_valid_values: VersionedSet = VersionedSet()

def __init__(self, version: Union[V, str], options: Options = Options()):
version_id = version if isinstance(version, str) else version.get_id()
data = self._versioned_valid_values.get_data(version_id)
if data is None:
raise ValueError(f"No data available for version: {version_id}")

self._valid_values = data
super().__init__(options)


@overload
def new(
class_name: str, name: str, description: str, valid_data: VersionedSet[V]
) -> Type[VersionedSetFormat[V]]: ...


@overload
def new(
class_name: str, name: str, description: str, valid_data: FrozenSet
) -> Type[SingleSetFormat]: ...


def new(
class_name: str,
name: str,
description: str,
valid_data: Union[VersionedSet[V], FrozenSet[str]],
) -> Type:
) -> Union[Type[VersionedSetFormat[V]], Type[SingleSetFormat]]:
"""Utility function to create a specialized version of a SetFormat.
The returned class is a fully featured format that once initialized
Expand All @@ -62,76 +131,24 @@ def new(
"""
if isinstance(valid_data, VersionedSet):

class VersionedSetFormat(GenericSetFormat):
def __init__(self, version: Union[V, str], options: Options = Options()):
version_id = version if isinstance(version, str) else version.get_id()
data = valid_data.get_data(version_id)
if data is None:
raise ValueError(f"No data available for version: {version_id}")

super().__init__(data, options)
class NewVersionedFormat(VersionedSetFormat):
_versioned_valid_values = valid_data

specialized_set_format = VersionedSetFormat
specialized_set_format = NewVersionedFormat

else:

class SingleSetFormat(GenericSetFormat):
def __init__(self, options: Options = Options()):
super().__init__(valid_data, options)
class NewFormat(SingleSetFormat):
_valid_values = valid_data

specialized_set_format = SingleSetFormat
specialized_set_format = NewFormat

specialized_set_format.__name__ = class_name
specialized_set_format.__qualname__ = class_name
specialized_set_format.__doc__ = (
f"{description}\n\n{specialized_set_format.__doc__}"
)

specialized_set_format.metadata = Metadata(name, description)

return specialized_set_format


################################
# Insee Geo format #############
################################


@total_ordering
class Millesime(Enum):
"""Millesime class implements the `Version` protocol methods."""

M2023 = "2023"
M2024 = "2024"
LATEST = "latest"

def __eq__(self, other) -> bool:
return self.value == other.value

def __lt__(self, other) -> bool:
return self.value < other.value

def get_id(self) -> str:
return self.value

@classmethod
def is_sorted(cls) -> bool:
return True


def new_geo(
class_name: str, name: str, description: str, valid_data: VersionedSet[Millesime]
) -> Type:
"""A set format specialized on Insee geographical data, versioned by
the Millesime enum.
The main difference is that the __init__ function takes a "cog" parameter for the version,
which means "Code officiel géographique" (Official Geographical Code).
"""
VersionedSetFormat = new(class_name, name, description, valid_data)

original_init = VersionedSetFormat.__init__

def new_init(self, cog: Union[Millesime, str], options=Options()):
original_init(self, cog, options) # type: ignore

setattr(VersionedSetFormat, "__init__", new_init)

return VersionedSetFormat
Loading

0 comments on commit f792bfa

Please sign in to comment.