From 296a0ec856b22da847b2ccd0e55e146e071cb8fd Mon Sep 17 00:00:00 2001 From: Shyue Ping Ong Date: Fri, 24 Jan 2025 09:20:48 -0800 Subject: [PATCH 1/7] Improve changelog generation. --- docs/CHANGES.md | 61 ++++++++++++++++++++++--------------------------- tasks.py | 6 ++--- 2 files changed, 30 insertions(+), 37 deletions(-) diff --git a/docs/CHANGES.md b/docs/CHANGES.md index 4b3281da215..8329c85e677 100644 --- a/docs/CHANGES.md +++ b/docs/CHANGES.md @@ -8,40 +8,33 @@ nav_order: 4 ## v2025.1.24 -**PR #4159** by @DanielYang59 - -- **Objective**: Enhance the reliability and efficiency of float comparison in the codebase. -- **Improvements**: - - Avoid using `==` for float comparisons to fix issue #4158. - - Replace `assert_array_equal` with suitable alternatives like `assert_allclose` for floating-point arrays to accommodate numerical imprecision. - - Improve the `_proj` function implementation, resulting in approximately a threefold speed increase. - - Substitute sequences of float comparisons using `==` in list/tuple/dict structures. - - Conduct various type and comment enhancements. - -**PR #4190** by @benrich37 - -- **Objective**: Introduce a structured and organized approach to represent data from JDFTx output files. -- **Key Features**: - - **Hierarchical Class Structure**: Implemented a hierarchy of classes to represent JDFTx output file data, without inheriting from one another. Key classes include: - - `JDFTXOutputs`, `JDFTXOutfile`, `JDFTXOutfileSlice` - - Sub-structures like `JOutStructures`, `JElSteps`, etc. - - **Modules Introduced**: - - `outputs.py`: Provides a robust Pythonic representation of a JDFTx output file. - - `jdftxoutfileslice.py`: Represents a “slice” of a JDFTx output file. - - `joutstructures.py`: Represents a series of structures within an output file slice. - - `joutstructure.py`: Represents a single structure within an output file. - - `jelstep.py`: Manages SCF steps and convergence data. - - `jminsettings.py`: Abstract class for managing input minimization settings, with subclasses for various settings types. - -**PR #4189** by @benrich37 - -- **Objective**: Develop a Pythonic representation for inputs used in JDFTx calculations. -- **Key Features**: - - `inputs.py` module introducing `JDFTXInfile` class. - - Helper modules: - - `generic_tags.py`: Includes "Tag" objects to define structures expected by JDFTx inputs. - - `jdftxinfile_master_format.py`: Facilitates the creation of appropriate "Tag" objects. - - `jdftxinfile_ref_options.py`: Contains lists of valid inputs for various tags, such as XC functionals for the "elec-ex-corr" tag. +1. **PR #4159 by @DanielYang59** + - Avoid using full equality `==` to compare float values to address issue #4158. + - Recommend using `assert_allclose` over `assert_array_equal` for float arrays due to numerical imprecision. + - Implement a ~3x speedup tweak to the `_proj` implementation. + - Partially replace sequence of float comparison using `==` for list/tuple/dict as referenced [here](https://github.com/materialsproject/pymatgen/blob/bd9fba9ec62437b5b62fbd0b2c2c723216cc5a2c/tests/core/test_bonds.py#L56). + - Introduce other type and comment tweaks. + +2. **PR #4190 by @benrich37** + - **Feature 0:** Hierarchical structure using class objects to represent data within a JDFTx out file. + - Main hierarchy classes: + - `JDFTXOutputs` + - `JDFTXOutputs.outfile` + - `JDFTXOutfile` + - `JDFTXOutfile.slices[i]` + - `JDFTXOutfileSlice`, etc. + - **Feature 1:** `outputs.py` module with `JDFTXOutfile` for representing a JDFTx out file. + - **Feature 2:** `jdftxoutfileslice.py` module with `JDFTXOutfileSlice` for file slices of a single JDFTx call. + - **Feature 3:** `joutstructures.py` with `JOutStructures` for representing structures from an out file slice. + - **Feature 4:** `joutstructure.py` with `JOutStructure` for each single structure within an out file. + - **Feature 5:** `jelstep.py` with `JElStep` and `JElSteps` for SCF steps and convergences. + - **Feature 6:** `jminsettings.py` with `JMinSettings` for minimization settings representations. + +3. **PR #4189 by @benrich37** + - **Feature 1:** `inputs.py` module containing `JDFTXInfile` for Pythonic representation of JDFTx calculation inputs. + - **Feature 2:** `generic_tags.py` module with "Tag" objects (`AbstractTag` and its inheritors) for JDFTx input structure representation. + - **Feature 3:** `jdftxinfile_master_format.py` for creating proper "Tag" objects for inputs. + - **Feature 4:** `jdftxinfile_ref_options.py` for holding lists of acceptable strings for input tags. ## v2025.1.23 diff --git a/tasks.py b/tasks.py index ae8f2a69d75..cc234cfeecb 100644 --- a/tasks.py +++ b/tasks.py @@ -182,12 +182,12 @@ def update_changelog(ctx: Context, version: str | None = None, dry_run: bool = F client = OpenAI(api_key=os.environ["OPENAPI_KEY"]) - messages = [{"role": "user", "content": f"summarize, include authors: '{body}'"}] + messages = [{"role": "user", "content": f"summarize as a markdown numbered list, include authors: '{body}'"}] chat = client.chat.completions.create(model="gpt-4o", messages=messages) reply = chat.choices[0].message.content - body = "\n".join(reply.split("\n")[1:]) - body = body.strip() + body = "\n".join(reply.split("\n")[1:-1]) + body = body.strip().strip("`") print(f"ChatGPT Summary of Changes:\n{body}") except BaseException as ex: From f82ce1f476313478b4658037e1cf737ab59fefdb Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Tue, 28 Jan 2025 15:10:06 -0500 Subject: [PATCH 2/7] Add `seed: int = 0` parameter to `Structure.perturb()` method (#4270) * Add seed parameter to Structure.perturb() method - Update tests to verify reproducible random perturbations with same seed - Add test cases for different seed values and min_distance parameter * fix test_perturb --- src/pymatgen/core/structure.py | 10 +++---- tests/core/test_structure.py | 52 +++++++++++++++++++++++----------- 2 files changed, 41 insertions(+), 21 deletions(-) diff --git a/src/pymatgen/core/structure.py b/src/pymatgen/core/structure.py index 9cde191afa7..9863bd9427e 100644 --- a/src/pymatgen/core/structure.py +++ b/src/pymatgen/core/structure.py @@ -4635,16 +4635,16 @@ def rotate_sites( return self - def perturb(self, distance: float, min_distance: float | None = None) -> Self: + def perturb(self, distance: float, min_distance: float | None = None, seed: int = 0) -> Self: """Perform a random perturbation of the sites in a structure to break symmetries. Modifies the structure in place. Args: distance (float): Distance in angstroms by which to perturb each site. min_distance (None, int, or float): if None, all displacements will - be equal amplitude. If int or float, perturb each site a - distance drawn from the uniform distribution between - 'min_distance' and 'distance'. + be equal amplitude. If int or float, perturb each site a distance drawn + from the uniform distribution between 'min_distance' and 'distance'. + seed (int): Seed for the random number generator. Defaults to 0. Returns: Structure: self with perturbed sites. @@ -4652,7 +4652,7 @@ def perturb(self, distance: float, min_distance: float | None = None) -> Self: def get_rand_vec(): # Deal with zero vectors - rng = np.random.default_rng() + rng = np.random.default_rng(seed=seed) vector = rng.standard_normal(3) vnorm = np.linalg.norm(vector) dist = distance diff --git a/tests/core/test_structure.py b/tests/core/test_structure.py index b26db3edcb9..c400709415c 100644 --- a/tests/core/test_structure.py +++ b/tests/core/test_structure.py @@ -1213,22 +1213,42 @@ def test_propertied_structure(self): assert dct == struct.as_dict() def test_perturb(self): - dist = 0.1 - pre_perturbation_sites = self.struct.copy() - returned = self.struct.perturb(distance=dist) - assert returned is self.struct - post_perturbation_sites = self.struct.sites - - for idx, site in enumerate(pre_perturbation_sites): - assert site.distance(post_perturbation_sites[idx]) == approx(dist), "Bad perturbation distance" - - structure2 = pre_perturbation_sites.copy() - structure2.perturb(distance=dist, min_distance=0) - post_perturbation_sites2 = structure2.sites - - for idx, site in enumerate(pre_perturbation_sites): - assert site.distance(post_perturbation_sites2[idx]) <= dist - assert site.distance(post_perturbation_sites2[idx]) >= 0 + struct = self.get_structure("Li2O") + struct_orig = struct.copy() + struct.perturb(0.1) + # Ensure all sites were perturbed by a distance of at most 0.1 Angstroms + for site, site_orig in zip(struct, struct_orig, strict=True): + cart_dist = site.distance(site_orig) + # allow 1e-6 to account for numerical precision + assert cart_dist <= 0.1 + 1e-6, f"Distance {cart_dist} > 0.1" + + # Test that same seed gives same perturbation + s1 = self.get_structure("Li2O") + s2 = self.get_structure("Li2O") + s1.perturb(0.1, seed=42) + s2.perturb(0.1, seed=42) + for site1, site2 in zip(s1, s2, strict=True): + assert site1.distance(site2) < 1e-7 # should be exactly equal up to numerical precision + + # Test that different seeds give different perturbations + s3 = self.get_structure("Li2O") + s3.perturb(0.1, seed=100) + any_different = False + for site1, site3 in zip(s1, s3, strict=True): + if site1.distance(site3) > 1e-7: + any_different = True + break + assert any_different, "Different seeds should give different perturbations" + + # Test min_distance + s4 = self.get_structure("Li2O") + s4.perturb(0.1, min_distance=0.05, seed=42) + any_different = False + for site1, site4 in zip(s1, s4, strict=True): + if site1.distance(site4) > 1e-7: + any_different = True + break + assert any_different, "Using min_distance should give different perturbations" def test_add_oxidation_state_by_element(self): oxidation_states = {"Si": -4} From 9656ff9ebf753fff89631a5c87ab3c8811a344ab Mon Sep 17 00:00:00 2001 From: Shyue Ping Ong Date: Tue, 28 Jan 2025 12:27:09 -0800 Subject: [PATCH 3/7] Do not support plotly 6.0 for now. Someone is welcome to submit a PR to fix. #4272 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2609e6a3d95..669b1e27e2c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,7 +60,7 @@ dependencies = [ "networkx>=2.7", # PR4116 "palettable>=3.3.3", "pandas>=2", - "plotly>=4.5.0", + "plotly>=4.5.0,<6.0.0", "pybtex>=0.24.0", "requests>=2.32", "ruamel.yaml>=0.17.0", From ed80c873ba1a8c7ce750735d73f5206242cb7304 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel) YANG" Date: Wed, 29 Jan 2025 04:28:38 +0800 Subject: [PATCH 4/7] Add missing parenthesis to `electronic_structure.boltztrap.BoltztrapAnalyzer.get_extreme.is_isotropic` (#4271) * add missing parenthesis * minor cleanup of type, comment and var name --------- Co-authored-by: Shyue Ping Ong --- .../electronic_structure/boltztrap.py | 96 ++++++++++--------- tests/electronic_structure/test_boltztrap.py | 2 +- 2 files changed, 50 insertions(+), 48 deletions(-) diff --git a/src/pymatgen/electronic_structure/boltztrap.py b/src/pymatgen/electronic_structure/boltztrap.py index 9fd3c8c0ce7..73dc0ed40da 100644 --- a/src/pymatgen/electronic_structure/boltztrap.py +++ b/src/pymatgen/electronic_structure/boltztrap.py @@ -41,7 +41,7 @@ from pymatgen.symmetry.bandstructure import HighSymmKpath if TYPE_CHECKING: - from typing import Literal + from typing import Any, Literal from numpy.typing import ArrayLike from typing_extensions import Self @@ -1426,96 +1426,98 @@ def get_complexity_factor( def get_extreme( self, - target_prop, - maximize=True, - min_temp=None, - max_temp=None, - min_doping=None, - max_doping=None, - isotropy_tolerance=0.05, - use_average=True, - ): + target_prop: Literal["seebeck", "power factor", "conductivity", "kappa", "zt"], + maximize: bool = True, + min_temp: float | None = None, + max_temp: float | None = None, + min_doping: float | None = None, + max_doping: float | None = None, + isotropy_tolerance: float = 0.05, + use_average: bool = True, + ) -> dict[Literal["p", "n", "best"], dict[str, Any]]: """Use eigenvalues over a range of carriers, temperatures, and doping levels, to estimate the "best" value that can be achieved for the given target_property. Note that this method searches the doping dict only, not the full mu dict. Args: - target_prop: target property, i.e. "seebeck", "power factor", "conductivity", "kappa", or "zt" - maximize: True to maximize, False to minimize (e.g. kappa) - min_temp: minimum temperature allowed - max_temp: maximum temperature allowed - min_doping: minimum doping allowed (e.g., 1E18) - max_doping: maximum doping allowed (e.g., 1E20) - isotropy_tolerance: tolerance for isotropic (0.05 = 5%) - use_average: True for avg of eigenval, False for max eigenval + target_prop ("seebeck", "power factor", "conductivity", "kappa", "zt"): target property. + maximize (bool): True to maximize, False to minimize (e.g. kappa) + min_temp (float): minimum temperature allowed + max_temp (float): maximum temperature allowed + min_doping (float): minimum doping allowed (e.g., 1E18) + max_doping (float): maximum doping allowed (e.g., 1E20) + isotropy_tolerance (float): tolerance for isotropic (0.05 = 5%) + use_average (bool): True for average of eigenval, False for max eigenval. Returns: - A dictionary with keys {"p", "n", "best"} with sub-keys: - {"value", "temperature", "doping", "isotropic"} + A dictionary with the following keys: {"p", "n", "best"}. + Each key maps to a sub-dictionary with the following keys: + {"value", "temperature", "doping", "isotropic", "carrier_type"}. """ - def is_isotropic(x, isotropy_tolerance) -> bool: - """Internal method to tell you if 3-vector "x" is isotropic. + def is_isotropic(x, isotropy_tolerance: float) -> bool: + """Helper function to determine if 3D vector is isotropic. Args: x: the vector to determine isotropy for - isotropy_tolerance: tolerance, e.g. 0.05 is 5% + isotropy_tolerance (float): tolerance, e.g. 0.05 is 5% """ if len(x) != 3: - raise ValueError("Invalid input to is_isotropic!") + raise ValueError("Invalid vector length to is_isotropic!") st = sorted(x) + return bool( all([st[0], st[1], st[2]]) and (abs((st[1] - st[0]) / st[1]) <= isotropy_tolerance) - and (abs(st[2] - st[0]) / st[2] <= isotropy_tolerance) + and (abs((st[2] - st[0]) / st[2]) <= isotropy_tolerance) and (abs((st[2] - st[1]) / st[2]) <= isotropy_tolerance) ) if target_prop.lower() == "seebeck": - d = self.get_seebeck(output="eigs", doping_levels=True) + dct = self.get_seebeck(output="eigs", doping_levels=True) elif target_prop.lower() == "power factor": - d = self.get_power_factor(output="eigs", doping_levels=True) + dct = self.get_power_factor(output="eigs", doping_levels=True) elif target_prop.lower() == "conductivity": - d = self.get_conductivity(output="eigs", doping_levels=True) + dct = self.get_conductivity(output="eigs", doping_levels=True) elif target_prop.lower() == "kappa": - d = self.get_thermal_conductivity(output="eigs", doping_levels=True) + dct = self.get_thermal_conductivity(output="eigs", doping_levels=True) elif target_prop.lower() == "zt": - d = self.get_zt(output="eigs", doping_levels=True) + dct = self.get_zt(output="eigs", doping_levels=True) else: raise ValueError(f"Unrecognized {target_prop=}") - abs_val = True # take the absolute value of properties + abs_val: bool = True # take the absolute value of properties x_val = x_temp = x_doping = x_isotropic = None - output = {} + output: dict[Literal["p", "n", "best"], dict[str, Any]] = {} min_temp = min_temp or 0 max_temp = max_temp or float("inf") min_doping = min_doping or 0 max_doping = max_doping or float("inf") - for pn in ("p", "n"): - for t in d[pn]: # temperatures - if min_temp <= float(t) <= max_temp: - for didx, evs in enumerate(d[pn][t]): - doping_lvl = self.doping[pn][didx] + for pn_type in ("p", "n"): + for temperature in dct[pn_type]: + if min_temp <= float(temperature) <= max_temp: + for idx, eig_vals in enumerate(dct[pn_type][temperature]): + doping_lvl = self.doping[pn_type][idx] if min_doping <= doping_lvl <= max_doping: - isotropic = is_isotropic(evs, isotropy_tolerance) + isotropic: bool = is_isotropic(eig_vals, isotropy_tolerance) if abs_val: - evs = [abs(x) for x in evs] - val = float(sum(evs)) / len(evs) if use_average else max(evs) + eig_vals = [abs(x) for x in eig_vals] + val = float(sum(eig_vals)) / len(eig_vals) if use_average else max(eig_vals) if x_val is None or (val > x_val and maximize) or (val < x_val and not maximize): x_val = val - x_temp = t + x_temp = temperature x_doping = doping_lvl x_isotropic = isotropic - output[pn] = { + output[pn_type] = { "value": x_val, "temperature": x_temp, "doping": x_doping, @@ -1524,7 +1526,7 @@ def is_isotropic(x, isotropy_tolerance) -> bool: x_val = None if maximize: - max_type = "p" if output["p"]["value"] >= output["n"]["value"] else "n" + max_type: Literal["p", "n"] = "p" if output["p"]["value"] >= output["n"]["value"] else "n" else: max_type = "p" if output["p"]["value"] <= output["n"]["value"] else "n" @@ -1589,10 +1591,10 @@ def get_complete_dos(self, structure: Structure, analyzer_for_second_spin=None): Example of use in case of spin polarized case: BoltztrapRunner(bs=bs,nelec=10,run_type="DOS",spin=1).run(path_dir='dos_up/') - an_up=BoltztrapAnalyzer.from_files("dos_up/boltztrap/",dos_spin=1) + an_up=BoltztrapAnalyzer.from_files("dos_up/boltztrap/", dos_spin=1) BoltztrapRunner(bs=bs,nelec=10,run_type="DOS",spin=-1).run(path_dir='dos_dw/') - an_dw=BoltztrapAnalyzer.from_files("dos_dw/boltztrap/",dos_spin=-1) + an_dw=BoltztrapAnalyzer.from_files("dos_dw/boltztrap/", dos_spin=-1) cdos=an_up.get_complete_dos(bs.structure,an_dw) """ @@ -1602,10 +1604,10 @@ def get_complete_dos(self, structure: Structure, analyzer_for_second_spin=None): if analyzer_for_second_spin: if not np.all(self.dos.energies == analyzer_for_second_spin.dos.energies): - raise BoltztrapError("Dos merging error: energies of the two dos are different") + raise BoltztrapError("DOS merging error: energies of the two DOS are different") if spin_1 == spin_2: - raise BoltztrapError("Dos merging error: spin component are the same") + raise BoltztrapError("DOS merging error: spin component are the same") for s in self._dos_partial: idx = int(s) diff --git a/tests/electronic_structure/test_boltztrap.py b/tests/electronic_structure/test_boltztrap.py index 0de31759def..765cfc29455 100644 --- a/tests/electronic_structure/test_boltztrap.py +++ b/tests/electronic_structure/test_boltztrap.py @@ -242,7 +242,7 @@ def test_extreme(self): extreme = self.bz.get_extreme("seebeck") assert extreme["best"]["carrier_type"] == "n" assert extreme["p"]["value"] == approx(1255.365, abs=1e-2) - assert extreme["n"]["isotropic"] + assert extreme["n"]["isotropic"] is False assert extreme["n"]["temperature"] == 600 extreme = self.bz.get_extreme("kappa", maximize=False, min_temp=400, min_doping=1e20) From 4375e342f31a10fc023c9b07df5db6f316c738d4 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Tue, 28 Jan 2025 15:29:30 -0500 Subject: [PATCH 5/7] Add `Structure.get_symmetry_dataset` convenience method (#4268) * add Structure.get_moyo_dataset -> moyopy.MoyoDataset convenience method * Add Structure.get_symmetry_dataset() method with multiple backend support, currently moyopy and spglib - remove get_moyo_dataset method - implement method overloading for type hints - update tests to cover both backends and error cases * add @due.dcite decorator for citation tracking with DOI for Spglib - also include citation details in method doc string * fix duecredit import from pymatgen.util.due module - update pyproject.toml to install symmetry optional deps in CI * fix ImportError on ase: Try installing dependencies with `pip install moyopy[interface]` --- pyproject.toml | 6 ++-- src/pymatgen/core/structure.py | 60 +++++++++++++++++++++++++++++++++- tests/core/test_structure.py | 27 +++++++++++++++ 3 files changed, 90 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 669b1e27e2c..4bef5c2d6be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,7 +68,7 @@ dependencies = [ # scipy<1.14.1 is incompatible with NumPy 2.0 on Windows # https://github.com/scipy/scipy/issues/21052 "scipy>=1.14.1; platform_system == 'Windows'", - "spglib>=2.5.0", + "spglib>=2.5", "sympy>=1.3", # PR #4116 "tabulate>=0.9", "tqdm>=4.60", @@ -89,7 +89,7 @@ Pypi = "https://pypi.org/project/pymatgen" [project.optional-dependencies] abinit = ["netcdf4>=1.7.2"] ase = ["ase>=3.23.0"] -ci = ["pytest-cov>=4", "pytest-split>=0.8", "pytest>=8"] +ci = ["pytest-cov>=4", "pytest-split>=0.8", "pytest>=8", "pymatgen[symmetry]"] docs = ["invoke", "sphinx", "sphinx_markdown_builder", "sphinx_rtd_theme"] electronic_structure = ["fdint>=2.0.2"] mlp = ["chgnet>=0.3.8", "matgl>=1.1.3"] @@ -110,6 +110,8 @@ optional = [ "phonopy>=2.33.3", "seekpath>=2.0.1", ] +# moyopy[interface] includes ase +symmetry = ["moyopy[interface]>=0.3", "spglib>=2.5"] # tblite only support Python 3.12+ through conda-forge # https://github.com/tblite/tblite/issues/175 tblite = [ "tblite[ase]>=0.3.0; platform_system=='Linux' and python_version<'3.12'"] diff --git a/src/pymatgen/core/structure.py b/src/pymatgen/core/structure.py index 9863bd9427e..95e341cb1fe 100644 --- a/src/pymatgen/core/structure.py +++ b/src/pymatgen/core/structure.py @@ -20,7 +20,7 @@ from abc import ABC, abstractmethod from collections import defaultdict from fnmatch import fnmatch -from typing import TYPE_CHECKING, Literal, cast, get_args +from typing import TYPE_CHECKING, Literal, cast, get_args, overload import numpy as np from monty.dev import deprecated @@ -43,12 +43,15 @@ from pymatgen.electronic_structure.core import Magmom from pymatgen.symmetry.maggroups import MagneticSpaceGroup from pymatgen.util.coord import all_distances, get_angle, lattice_points_in_supercell +from pymatgen.util.due import Doi, due if TYPE_CHECKING: from collections.abc import Callable, Iterable, Iterator, Sequence from typing import Any, ClassVar, SupportsIndex, TypeAlias + import moyopy import pandas as pd + import spglib from ase import Atoms from ase.calculators.calculator import Calculator from ase.io.trajectory import Trajectory @@ -3338,6 +3341,61 @@ def to_conventional(self, **kwargs) -> Structure: """ return self.to_cell("conventional", **kwargs) + @overload + def get_symmetry_dataset(self, backend: Literal["moyopy"], **kwargs) -> moyopy.MoyoDataset: ... + + @due.dcite( + Doi("10.1080/27660400.2024.2384822"), + description="Spglib: a software library for crystal symmetry search", + ) + @overload + def get_symmetry_dataset(self, backend: Literal["spglib"], **kwargs) -> spglib.SpglibDataset: ... + + def get_symmetry_dataset( + self, backend: Literal["moyopy", "spglib"] = "spglib", **kwargs + ) -> moyopy.MoyoDataset | spglib.SpglibDataset: + """Get a symmetry dataset from the structure using either moyopy or spglib backend. + + If using the spglib backend (default), please cite: + + Togo, A., Shinohara, K., & Tanaka, I. (2024). Spglib: a software library for crystal + symmetry search. Science and Technology of Advanced Materials: Methods, 4(1), 2384822-2384836. + https://doi.org/10.1080/27660400.2024.2384822 + + Args: + backend ("moyopy" | "spglib"): Which symmetry analysis backend to use. + Defaults to "spglib". + **kwargs: Additional arguments passed to the respective backend's constructor. + For spglib, these are passed to SpacegroupAnalyzer (e.g. symprec, angle_tolerance). + For moyopy, these are passed to MoyoDataset constructor. + + Returns: + MoyoDataset | SpglibDataset: Symmetry dataset from the chosen backend. + + Raises: + ImportError: If the requested backend is not installed. + ValueError: If an invalid backend is specified. + """ + if backend == "moyopy": + try: + import moyopy + import moyopy.interface + except ImportError: + raise ImportError("moyopy is not installed. Run pip install moyopy.") + + # Convert structure to MoyoDataset format + moyo_cell = moyopy.interface.MoyoAdapter.from_structure(self) + return moyopy.MoyoDataset(cell=moyo_cell, **kwargs) + + if backend == "spglib": + from pymatgen.symmetry.analyzer import SpacegroupAnalyzer + + sga = SpacegroupAnalyzer(self, **kwargs) + return sga.get_symmetry_dataset() + + valid_backends = ("moyopy", "spglib") + raise ValueError(f"Invalid {backend=}, must be one of {valid_backends}") + class IMolecule(SiteCollection, MSONable): """Basic immutable Molecule object without periodicity. Essentially a diff --git a/tests/core/test_structure.py b/tests/core/test_structure.py index c400709415c..d8bb9f365f1 100644 --- a/tests/core/test_structure.py +++ b/tests/core/test_structure.py @@ -6,6 +6,7 @@ from fractions import Fraction from pathlib import Path from shutil import which +from unittest import mock import numpy as np import pytest @@ -965,6 +966,32 @@ def test_sites_setter(self): struct.sites = new_sites assert struct.sites == new_sites + def test_get_symmetry_dataset(self): + """Test getting symmetry dataset from structure using different backends.""" + # Test spglib backend + dataset = self.struct.get_symmetry_dataset(backend="spglib") + assert dataset.number == 227 # Fd-3m space group + assert dataset.international == "Fd-3m" + assert len(dataset.rotations) > 0 + assert len(dataset.translations) > 0 + + # Test moyopy backend if available + moyopy = pytest.importorskip("moyopy") + dataset = self.struct.get_symmetry_dataset(backend="moyopy") + assert isinstance(dataset, moyopy.MoyoDataset) + assert dataset.prim_std_cell.numbers == [14, 14] # Si atomic number is 14 + + # Test import error + with ( + mock.patch.dict("sys.modules", {"moyopy": None}), + pytest.raises(ImportError, match="moyopy is not installed. Run pip install moyopy."), + ): + self.struct.get_symmetry_dataset(backend="moyopy") + + # Test invalid backend + with pytest.raises(ValueError, match="Invalid backend='42'"): + self.struct.get_symmetry_dataset(backend="42") + class TestStructure(PymatgenTest): def setUp(self): From 3b6a0574bafc644adf6df3562b5625f75433035b Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel) YANG" Date: Wed, 29 Jan 2025 04:32:18 +0800 Subject: [PATCH 6/7] Clarify return float/int type for `core.Composition.reduced_composition` and siblings, minor type clean up for `core.Composition` (#4265) * add some types * more types * fix factor type * fix IUPAC writing * fix another factor type * fix factor * fix type * add test for composition error * add test for reduce formula --- src/pymatgen/core/composition.py | 97 +++++++++++++++++--------------- src/pymatgen/core/ion.py | 2 +- tests/core/test_composition.py | 18 +++++- 3 files changed, 69 insertions(+), 48 deletions(-) diff --git a/src/pymatgen/core/composition.py b/src/pymatgen/core/composition.py index 9411f06deac..e400a42c9b0 100644 --- a/src/pymatgen/core/composition.py +++ b/src/pymatgen/core/composition.py @@ -24,7 +24,7 @@ from pymatgen.util.string import Stringify, formula_double_format if TYPE_CHECKING: - from collections.abc import Generator, ItemsView, Iterator + from collections.abc import Generator, ItemsView, Iterator, Mapping from typing import Any, ClassVar, Literal from typing_extensions import Self @@ -115,11 +115,11 @@ class Composition(collections.abc.Hashable, collections.abc.Mapping, MSONable, S # Tolerance in distinguishing different composition amounts. # 1e-8 is fairly tight, but should cut out most floating point arithmetic # errors. - amount_tolerance = 1e-8 - charge_balanced_tolerance = 1e-8 + amount_tolerance: ClassVar[float] = 1e-8 + charge_balanced_tolerance: ClassVar[float] = 1e-8 # Special formula handling for peroxides and certain elements. This is so - # that formula output does not write LiO instead of Li2O2 for example. + # that formula output does not write "LiO" instead of "Li2O2" for example. special_formulas: ClassVar[dict[str, str]] = { "LiO": "Li2O2", "NaO": "Na2O2", @@ -134,7 +134,8 @@ class Composition(collections.abc.Hashable, collections.abc.Mapping, MSONable, S "H": "H2", } - oxi_prob = None # prior probability of oxidation used by oxi_state_guesses + # Prior probability of oxidation used by oxi_state_guesses + oxi_prob: ClassVar[dict | None] = None def __init__(self, *args, strict: bool = False, **kwargs) -> None: """Very flexible Composition construction, similar to the built-in Python @@ -417,10 +418,10 @@ def get_reduced_composition_and_factor(self) -> tuple[Self, float]: """Calculate a reduced composition and factor. Returns: - A normalized composition and a multiplicative factor, i.e., - Li4Fe4P4O16 returns (Composition("LiFePO4"), 4). + tuple[Composition, float]: Normalized Composition and multiplicative factor, + i.e. "Li4Fe4P4O16" returns (Composition("LiFePO4"), 4). """ - factor = self.get_reduced_formula_and_factor()[1] + factor: float = self.get_reduced_formula_and_factor()[1] return self / factor, factor def get_reduced_formula_and_factor(self, iupac_ordering: bool = False) -> tuple[str, float]: @@ -428,24 +429,27 @@ def get_reduced_formula_and_factor(self, iupac_ordering: bool = False) -> tuple[ Args: iupac_ordering (bool, optional): Whether to order the - formula by the iupac "electronegativity" series, defined in + formula by the IUPAC "electronegativity" series, defined in Table VI of "Nomenclature of Inorganic Chemistry (IUPAC Recommendations 2005)". This ordering effectively follows - the groups and rows of the periodic table, except the + the groups and rows of the periodic table, except for Lanthanides, Actinides and hydrogen. Note that polyanions will still be determined based on the true electronegativity of the elements. Returns: - A pretty normalized formula and a multiplicative factor, i.e., - Li4Fe4P4O16 returns (LiFePO4, 4). + tuple[str, float]: Normalized formula and multiplicative factor, + i.e., "Li4Fe4P4O16" returns (LiFePO4, 4). """ - all_int = all(abs(val - round(val)) < type(self).amount_tolerance for val in self.values()) + all_int: bool = all(abs(val - round(val)) < type(self).amount_tolerance for val in self.values()) if not all_int: return self.formula.replace(" ", ""), 1 - el_amt_dict = {key: round(val) for key, val in self.get_el_amt_dict().items()} + + el_amt_dict: dict[str, int] = {key: round(val) for key, val in self.get_el_amt_dict().items()} + factor: float formula, factor = reduce_formula(el_amt_dict, iupac_ordering=iupac_ordering) + # Do not "completely reduce" certain formulas if formula in type(self).special_formulas: formula = type(self).special_formulas[formula] factor /= 2 @@ -458,10 +462,10 @@ def get_integer_formula_and_factor( """Calculate an integer formula and factor. Args: - max_denominator (int): all amounts in the el:amt dict are - first converted to a Fraction with this maximum denominator + max_denominator (int): all amounts in the el_amt dict are + first converted to a Fraction with this maximum denominator. iupac_ordering (bool, optional): Whether to order the - formula by the iupac "electronegativity" series, defined in + formula by the IUPAC "electronegativity" series, defined in Table VI of "Nomenclature of Inorganic Chemistry (IUPAC Recommendations 2005)". This ordering effectively follows the groups and rows of the periodic table, except the @@ -470,13 +474,14 @@ def get_integer_formula_and_factor( the elements. Returns: - A pretty normalized formula and a multiplicative factor, i.e., - Li0.5O0.25 returns (Li2O, 0.25). O0.25 returns (O2, 0.125) + A normalized formula and a multiplicative factor, i.e., + "Li0.5O0.25" returns (Li2O, 0.25). "O0.25" returns (O2, 0.125) """ - el_amt = self.get_el_amt_dict() - _gcd = gcd_float(list(el_amt.values()), 1 / max_denominator) + el_amt: dict[str, float] = self.get_el_amt_dict() + _gcd: float = gcd_float(list(el_amt.values()), 1 / max_denominator) - dct = {key: round(val / _gcd) for key, val in el_amt.items()} + dct: dict[str, int] = {key: round(val / _gcd) for key, val in el_amt.items()} + factor: float formula, factor = reduce_formula(dct, iupac_ordering=iupac_ordering) if formula in type(self).special_formulas: formula = type(self).special_formulas[formula] @@ -485,8 +490,8 @@ def get_integer_formula_and_factor( @property def reduced_formula(self) -> str: - """A pretty normalized formula, i.e., LiFePO4 instead of - Li4Fe4P4O16. + """A normalized formula, i.e., "LiFePO4" instead of + "Li4Fe4P4O16". """ return self.get_reduced_formula_and_factor()[0] @@ -1055,7 +1060,7 @@ def _get_oxi_state_guesses( raise ValueError(f"Composition {comp} cannot accommodate max_sites setting!") # Load prior probabilities of oxidation states, used to rank solutions - if not type(self).oxi_prob: + if type(self).oxi_prob is None: all_data = loadfn(f"{MODULE_DIR}/../analysis/icsd_bv.yaml") type(self).oxi_prob = {Species.from_str(sp): data for sp, data in all_data["occurrence"].items()} oxi_states_override = oxi_states_override or {} @@ -1319,38 +1324,38 @@ def _parse_chomp_and_rank(match, formula: str, m_dict: dict[str, float], m_point def reduce_formula( - sym_amt: dict[str, float] | dict[str, int], + sym_amt: Mapping[str, float], iupac_ordering: bool = False, -) -> tuple[str, float]: - """Helper function to reduce a sym_amt dict to a reduced formula and factor. +) -> tuple[str, int]: + """Helper function to reduce a symbol-amount mapping. Args: - sym_amt (dict): {symbol: amount}. + sym_amt (dict[str, float]): Symbol to amount mapping. iupac_ordering (bool, optional): Whether to order the - formula by the iupac "electronegativity" series, defined in + formula by the IUPAC "electronegativity" series, defined in Table VI of "Nomenclature of Inorganic Chemistry (IUPAC Recommendations 2005)". This ordering effectively follows - the groups and rows of the periodic table, except the + the groups and rows of the periodic table, except for Lanthanides, Actinides and hydrogen. Note that polyanions will still be determined based on the true electronegativity of the elements. Returns: - tuple[str, float]: reduced formula and factor. + tuple[str, int]: reduced formula and factor. """ - syms = sorted(sym_amt, key=lambda x: [get_el_sp(x).X, x]) + syms: list[str] = sorted(sym_amt, key=lambda x: [get_el_sp(x).X, x]) syms = list(filter(lambda x: abs(sym_amt[x]) > Composition.amount_tolerance, syms)) - factor = 1 - # Enforce integers for doing gcd. + # Enforce integer for calculating greatest common divisor + factor: int = 1 if all(int(i) == i for i in sym_amt.values()): factor = abs(gcd(*(int(i) for i in sym_amt.values()))) - poly_anions = [] - # if the composition contains a poly anion + # If the composition contains polyanion + poly_anions: list[str] = [] if len(syms) >= 3 and get_el_sp(syms[-1]).X - get_el_sp(syms[-2]).X < 1.65: - poly_sym_amt = {syms[i]: sym_amt[syms[i]] / factor for i in [-2, -1]} + poly_sym_amt: dict[str, float] = {syms[i]: sym_amt[syms[i]] / factor for i in (-2, -1)} poly_form, poly_factor = reduce_formula(poly_sym_amt, iupac_ordering=iupac_ordering) if poly_factor != 1: @@ -1369,9 +1374,17 @@ def reduce_formula( return "".join([*reduced_form, *poly_anions]), factor +class CompositionError(Exception): + """Composition exceptions.""" + + class ChemicalPotential(dict, MSONable): - """Represent set of chemical potentials. Can be: multiplied/divided by a Number - multiplied by a Composition (returns an energy) added/subtracted with other ChemicalPotentials. + """Represent set of chemical potentials. + + Can be: + - multiplied/divided by a Number + - multiplied by a Composition (returns an energy) + - added/subtracted with other ChemicalPotentials """ def __init__(self, *args, **kwargs) -> None: @@ -1424,7 +1437,3 @@ def get_energy(self, composition: Composition, strict: bool = True) -> float: if strict and (missing := set(composition) - set(self)): raise ValueError(f"Potentials not specified for {missing}") return sum(self.get(key, 0) * val for key, val in composition.items()) - - -class CompositionError(Exception): - """Exception class for composition errors.""" diff --git a/src/pymatgen/core/ion.py b/src/pymatgen/core/ion.py index 6286cbf1217..99d010829ff 100644 --- a/src/pymatgen/core/ion.py +++ b/src/pymatgen/core/ion.py @@ -141,7 +141,7 @@ def get_reduced_formula_and_factor( Args: iupac_ordering (bool, optional): Whether to order the - formula by the iupac "electronegativity" series, defined in + formula by the IUPAC "electronegativity" series, defined in Table VI of "Nomenclature of Inorganic Chemistry (IUPAC Recommendations 2005)". This ordering effectively follows the groups and rows of the periodic table, except the diff --git a/tests/core/test_composition.py b/tests/core/test_composition.py index 37fd52d4d4b..ab8a7c8666f 100644 --- a/tests/core/test_composition.py +++ b/tests/core/test_composition.py @@ -12,7 +12,7 @@ from pytest import approx from pymatgen.core import Composition, DummySpecies, Element, Species -from pymatgen.core.composition import ChemicalPotential +from pymatgen.core.composition import ChemicalPotential, CompositionError, reduce_formula from pymatgen.util.testing import PymatgenTest @@ -316,7 +316,7 @@ def test_reduced_formula(self): all_formulas = [c.reduced_formula for c in self.comps] assert all_formulas == correct_reduced_formulas - # test iupac reduced formula (polyanions should still appear at the end) + # test IUPAC reduced formula (polyanions should still appear at the end) all_formulas = [c.get_reduced_formula_and_factor(iupac_ordering=True)[0] for c in self.comps] assert all_formulas == correct_reduced_formulas assert Composition("H6CN").get_integer_formula_and_factor(iupac_ordering=True)[0] == "CNH6" @@ -347,7 +347,7 @@ def test_integer_formula(self): assert formula == "Li(BH)6" assert factor == approx(1 / 6) - # test iupac reduced formula (polyanions should still appear at the end) + # test IUPAC reduced formula (polyanions should still appear at the end) all_formulas = [c.get_integer_formula_and_factor(iupac_ordering=True)[0] for c in self.comps] assert all_formulas == correct_reduced_formulas assert Composition("H6CN0.5").get_integer_formula_and_factor(iupac_ordering=True) == ("C2NH12", 0.5) @@ -860,6 +860,18 @@ def test_isotopes(self): assert "Deuterium" in [elem.long_name for elem in composition.elements] +def test_reduce_formula(): + assert reduce_formula({"Li": 2, "Mn": 4, "O": 8}) == ("LiMn2O4", 2) + assert reduce_formula({"Li": 4, "O": 4}) == ("LiO", 4) + assert reduce_formula({"Zn": 2, "O": 2, "H": 2}) == ("ZnHO", 2) + + +def test_composition_error(): + error = CompositionError("Composition error") + assert isinstance(error, CompositionError) + assert str(error) == "Composition error" + + class TestChemicalPotential: def test_init(self): dct = {"Fe": 1, Element("Fe"): 1} From bf6c1086cc909de2eec429d118a503e3f9d6fdcc Mon Sep 17 00:00:00 2001 From: Shyue Ping Ong Date: Tue, 28 Jan 2025 15:41:49 -0800 Subject: [PATCH 7/7] Return dict output. --- src/pymatgen/core/structure.py | 35 +++++++++++++++++++++++++++------- tests/core/test_structure.py | 13 +++++++++++-- 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/src/pymatgen/core/structure.py b/src/pymatgen/core/structure.py index 95e341cb1fe..af6a5e3c3d7 100644 --- a/src/pymatgen/core/structure.py +++ b/src/pymatgen/core/structure.py @@ -3352,8 +3352,8 @@ def get_symmetry_dataset(self, backend: Literal["moyopy"], **kwargs) -> moyopy.M def get_symmetry_dataset(self, backend: Literal["spglib"], **kwargs) -> spglib.SpglibDataset: ... def get_symmetry_dataset( - self, backend: Literal["moyopy", "spglib"] = "spglib", **kwargs - ) -> moyopy.MoyoDataset | spglib.SpglibDataset: + self, backend: Literal["moyopy", "spglib"] = "spglib", return_raw_dataset=False, **kwargs + ) -> dict | moyopy.MoyoDataset | spglib.SpglibDataset: """Get a symmetry dataset from the structure using either moyopy or spglib backend. If using the spglib backend (default), please cite: @@ -3365,6 +3365,9 @@ def get_symmetry_dataset( Args: backend ("moyopy" | "spglib"): Which symmetry analysis backend to use. Defaults to "spglib". + return_raw_dataset (bool): Whether to return the raw Dataset object from the backend. The default is + False, which returns a dict with a common subset of the data present in both datasets. If you use the + raw Dataset object, we do not guarantee that the format of the output is not going to change. **kwargs: Additional arguments passed to the respective backend's constructor. For spglib, these are passed to SpacegroupAnalyzer (e.g. symprec, angle_tolerance). For moyopy, these are passed to MoyoDataset constructor. @@ -3376,6 +3379,9 @@ def get_symmetry_dataset( ImportError: If the requested backend is not installed. ValueError: If an invalid backend is specified. """ + if backend not in ("moyopy", "spglib"): + raise ValueError(f"Invalid {backend=}, must be one of moyopy or spglib.") + if backend == "moyopy": try: import moyopy @@ -3385,16 +3391,31 @@ def get_symmetry_dataset( # Convert structure to MoyoDataset format moyo_cell = moyopy.interface.MoyoAdapter.from_structure(self) - return moyopy.MoyoDataset(cell=moyo_cell, **kwargs) + dataset = moyopy.MoyoDataset(cell=moyo_cell, **kwargs) - if backend == "spglib": + else: from pymatgen.symmetry.analyzer import SpacegroupAnalyzer sga = SpacegroupAnalyzer(self, **kwargs) - return sga.get_symmetry_dataset() + dataset = sga.get_symmetry_dataset() + + if return_raw_dataset: + return dataset + + dictdata = {k: getattr(dataset, k) for k in ("hall_number", "number", "site_symmetry_symbols", "wyckoffs")} + + if backend == "spglib": + dictdata["international"] = dataset.international + dictdata["orbits"] = dataset.crystallographic_orbits + dictdata["std_origin_shift"] = dataset.origin_shift + else: + from pymatgen.symmetry.groups import SpaceGroup + + dictdata["international"] = SpaceGroup.from_int_number(dataset.number).symbol + dictdata["orbits"] = dataset.orbits + dictdata["std_origin_shift"] = dataset.std_origin_shift - valid_backends = ("moyopy", "spglib") - raise ValueError(f"Invalid {backend=}, must be one of {valid_backends}") + return dictdata class IMolecule(SiteCollection, MSONable): diff --git a/tests/core/test_structure.py b/tests/core/test_structure.py index d8bb9f365f1..e5ececa44e6 100644 --- a/tests/core/test_structure.py +++ b/tests/core/test_structure.py @@ -969,7 +969,16 @@ def test_sites_setter(self): def test_get_symmetry_dataset(self): """Test getting symmetry dataset from structure using different backends.""" # Test spglib backend - dataset = self.struct.get_symmetry_dataset(backend="spglib") + for backend in ("spglib", "moyopy"): + dataset = self.struct.get_symmetry_dataset(backend=backend) + assert isinstance(dataset, dict) + assert dataset["number"] == 227 + assert dataset["international"] == "Fd-3m" + assert len(dataset["orbits"]) == 2 + pytest.approx(dataset["std_origin_shift"], [0, 0, 0]) + + dataset = self.struct.get_symmetry_dataset(backend="spglib", return_raw_dataset=True) + assert dataset.number == 227 # Fd-3m space group assert dataset.international == "Fd-3m" assert len(dataset.rotations) > 0 @@ -977,7 +986,7 @@ def test_get_symmetry_dataset(self): # Test moyopy backend if available moyopy = pytest.importorskip("moyopy") - dataset = self.struct.get_symmetry_dataset(backend="moyopy") + dataset = self.struct.get_symmetry_dataset(backend="moyopy", return_raw_dataset=True) assert isinstance(dataset, moyopy.MoyoDataset) assert dataset.prim_std_cell.numbers == [14, 14] # Si atomic number is 14