diff --git a/python/.gitignore b/python/.gitignore new file mode 100644 index 0000000..68bc17f --- /dev/null +++ b/python/.gitignore @@ -0,0 +1,160 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md new file mode 100644 index 0000000..098a38c --- /dev/null +++ b/python/CHANGELOG.md @@ -0,0 +1,5 @@ +# Changelog + +## [0.1.0] - YYYY-MM-DD + +- Initial public release. diff --git a/python/Cargo.toml b/python/Cargo.toml new file mode 100644 index 0000000..d77cba6 --- /dev/null +++ b/python/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "geo-index" +version = "0.1.0" +authors = ["Kyle Barron "] +edition = "2021" +description = "Fast, memory-efficient 2D spatial indexes for Python." +readme = "README.md" +repository = "https://github.com/kylebarron/geo-index" +license = "MIT OR Apache-2.0" +keywords = ["python", "geospatial"] +categories = ["science::geo"] +rust-version = "1.75" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +name = "_rust" +crate-type = ["cdylib"] + +[dependencies] +bytes = "1" +geo-index = { path = "../", features = ["rayon"] } +numpy = "0.20" +pyo3 = { version = "0.20", features = ["abi3-py38"] } +thiserror = "1" diff --git a/python/README.md b/python/README.md new file mode 100644 index 0000000..16430f9 --- /dev/null +++ b/python/README.md @@ -0,0 +1,3 @@ +# geo-index + +Fast, memory-efficient 2D spatial indexes for Python. diff --git a/python/pyproject.toml b/python/pyproject.toml new file mode 100644 index 0000000..295d650 --- /dev/null +++ b/python/pyproject.toml @@ -0,0 +1,18 @@ +[build-system] +requires = ["maturin>=1.4.0,<2.0"] +build-backend = "maturin" + +[project] +name = "geo-index" +requires-python = ">=3.8" +dependencies = [] +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] + +[tool.maturin] +features = ["pyo3/extension-module"] +module-name = "geo_index._rust" +python-source = "python" diff --git a/python/python/geo_index/__init__.py b/python/python/geo_index/__init__.py new file mode 100644 index 0000000..69bf76d --- /dev/null +++ b/python/python/geo_index/__init__.py @@ -0,0 +1,4 @@ +from ._rust import * +from ._rust import ___version + +__version__: str = ___version() diff --git a/python/python/geo_index/_rust.pyi b/python/python/geo_index/_rust.pyi new file mode 100644 index 0000000..7c866fc --- /dev/null +++ b/python/python/geo_index/_rust.pyi @@ -0,0 +1,54 @@ +from typing import Literal, Optional, Self, Union + +import numpy as np +from numpy.typing import NDArray + +from .enums import RTreeMethod + +IntFloat = Union[int, float] +RTreeMethodT = Literal["hilbert", "str"] + +class KDTree: + @classmethod + def from_interleaved( + cls, + coords: NDArray[np.float64], + *, + node_size: Optional[int] = None, + ) -> Self: ... + @classmethod + def from_separated( + cls, + x: NDArray[np.float64], + y: NDArray[np.float64], + *, + node_size: Optional[int] = None, + ) -> Self: ... + def range( + self, min_x: IntFloat, min_y: IntFloat, max_x: IntFloat, max_y: IntFloat + ) -> NDArray[np.uintc]: ... + def within(self, qx: IntFloat, qy: IntFloat, r: IntFloat) -> NDArray[np.uintc]: ... + +class RTree: + @classmethod + def from_interleaved( + cls, + boxes: NDArray[np.float64], + *, + method: RTreeMethod | RTreeMethodT = RTreeMethod.Hilbert, + node_size: Optional[int] = None, + ) -> Self: ... + @classmethod + def from_separated( + cls, + min_x: NDArray[np.float64], + min_y: NDArray[np.float64], + max_x: NDArray[np.float64], + max_y: NDArray[np.float64], + *, + method: RTreeMethod | RTreeMethodT = RTreeMethod.Hilbert, + node_size: Optional[int] = None, + ) -> Self: ... + def search( + self, min_x: IntFloat, min_y: IntFloat, max_x: IntFloat, max_y: IntFloat + ) -> NDArray[np.uintc]: ... diff --git a/python/python/geo_index/enums.py b/python/python/geo_index/enums.py new file mode 100644 index 0000000..c9180fa --- /dev/null +++ b/python/python/geo_index/enums.py @@ -0,0 +1,26 @@ +from enum import Enum, auto + + +class StrEnum(str, Enum): + def __new__(cls, value, *args, **kwargs): + if not isinstance(value, (str, auto)): + raise TypeError( + f"Values of StrEnums must be strings: {value!r} is a {type(value)}" + ) + return super().__new__(cls, value, *args, **kwargs) + + def __str__(self): + return str(self.value) + + def _generate_next_value_(name, *_): + return name.lower() + + +class RTreeMethod(StrEnum): + Hilbert = auto() + """Use hilbert curves for sorting the RTree + """ + + STR = auto() + """Use the Sort-Tile-Recursive algorithm for sorting the RTree + """ diff --git a/python/python/geo_index/py.typed b/python/python/geo_index/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/python/src/kdtree.rs b/python/src/kdtree.rs new file mode 100644 index 0000000..ac04825 --- /dev/null +++ b/python/src/kdtree.rs @@ -0,0 +1,111 @@ +use geo_index::kdtree::{KDTreeBuilder, KDTreeIndex, OwnedKDTree}; +use numpy::{PyArray1, PyReadonlyArray1, PyReadonlyArray2}; +use pyo3::prelude::*; +use pyo3::types::PyType; + +#[pyclass] +pub struct KDTree(OwnedKDTree); + +#[pymethods] +impl KDTree { + #[classmethod] + #[pyo3( + signature = (coords, *, node_size = None), + text_signature = "(coords, *, node_size = None)") + ] + pub fn from_interleaved( + _cls: &PyType, + coords: PyReadonlyArray2, + node_size: Option, + ) -> Self { + let shape = coords.shape(); + assert_eq!(shape.len(), 2); + assert_eq!(shape[1], 4); + + let num_items = shape[0]; + + let coords = coords.as_array(); + + let mut builder = if let Some(node_size) = node_size { + KDTreeBuilder::new_with_node_size(num_items, node_size) + } else { + KDTreeBuilder::new(num_items) + }; + + for i in 0..num_items { + builder.add(*coords.get((i, 0)).unwrap(), *coords.get((i, 1)).unwrap()); + } + + Self(builder.finish()) + } + + #[classmethod] + #[pyo3( + signature = (x, y, *, node_size = None), + text_signature = "(x, y, *, node_size = None)") + ] + pub fn from_separated( + _cls: &PyType, + x: PyReadonlyArray1, + y: PyReadonlyArray1, + node_size: Option, + ) -> Self { + assert_eq!(x.len(), y.len()); + + let num_items = x.len(); + + let x = x.as_array(); + let y = y.as_array(); + + let mut builder = if let Some(node_size) = node_size { + KDTreeBuilder::new_with_node_size(num_items, node_size) + } else { + KDTreeBuilder::new(num_items) + }; + + for i in 0..num_items { + builder.add(*x.get(i).unwrap(), *y.get(i).unwrap()); + } + + Self(builder.finish()) + } + + /// Search the index for items within a given bounding box. + /// + /// Args: + /// min_x + /// min_y + /// max_x + /// max_y + /// + /// Returns indices of found items + pub fn range<'py>( + &'py self, + py: Python<'py>, + min_x: f64, + min_y: f64, + max_x: f64, + max_y: f64, + ) -> &'py PyArray1 { + let result = py.allow_threads(move || self.0.as_ref().range(min_x, min_y, max_x, max_y)); + PyArray1::from_vec(py, result) + } + + /// Search the index for items within a given radius. + /// + /// - qx: x value of query point + /// - qy: y value of query point + /// - r: radius + /// + /// Returns indices of found items + pub fn within<'py>( + &'py self, + py: Python<'py>, + qx: f64, + qy: f64, + r: f64, + ) -> &'py PyArray1 { + let result = py.allow_threads(move || self.0.as_ref().within(qx, qy, r)); + PyArray1::from_vec(py, result) + } +} diff --git a/python/src/lib.rs b/python/src/lib.rs new file mode 100644 index 0000000..8abdd0c --- /dev/null +++ b/python/src/lib.rs @@ -0,0 +1,21 @@ +pub mod kdtree; +pub mod rtree; + +use pyo3::prelude::*; + +const VERSION: &str = env!("CARGO_PKG_VERSION"); + +#[pyfunction] +fn ___version() -> &'static str { + VERSION +} + +#[pymodule] +fn _rust(_py: Python, m: &PyModule) -> PyResult<()> { + m.add_wrapped(wrap_pyfunction!(___version))?; + + m.add_class::()?; + m.add_class::()?; + + Ok(()) +} diff --git a/python/src/rtree.rs b/python/src/rtree.rs new file mode 100644 index 0000000..c8ec3ad --- /dev/null +++ b/python/src/rtree.rs @@ -0,0 +1,137 @@ +use geo_index::rtree::sort::{HilbertSort, STRSort}; +use geo_index::rtree::{OwnedRTree, RTreeBuilder, RTreeIndex}; +use numpy::{PyArray1, PyReadonlyArray1, PyReadonlyArray2}; +use pyo3::exceptions::PyValueError; +use pyo3::prelude::*; +use pyo3::types::PyType; + +pub enum RTreeMethod { + Hilbert, + STR, +} + +impl<'a> FromPyObject<'a> for RTreeMethod { + fn extract(ob: &'a PyAny) -> PyResult { + let s: String = ob.extract()?; + match s.to_lowercase().as_str() { + "hilbert" => Ok(Self::Hilbert), + "str" => Ok(Self::STR), + _ => Err(PyValueError::new_err( + "Unexpected method. Should be one of 'hilbert' or 'str'.", + )), + } + } +} + +#[pyclass] +pub struct RTree(OwnedRTree); + +// TODO: add support for constructing from a buffer. Need to be able to construct (and validate) an +// OwnedRTree +// impl<'a> FromPyObject<'a> for RTree { +// fn extract(ob: &'a PyAny) -> PyResult { +// let s: Vec = ob.extract()?; +// OwnedRTree::from(value) +// } +// } + +#[pymethods] +impl RTree { + #[classmethod] + #[pyo3( + signature = (boxes, *, method = RTreeMethod::Hilbert, node_size = None), + text_signature = "(boxes, *, method = 'hilbert', node_size = None)") + ] + pub fn from_interleaved( + _cls: &PyType, + boxes: PyReadonlyArray2, + method: RTreeMethod, + node_size: Option, + ) -> Self { + let shape = boxes.shape(); + assert_eq!(shape.len(), 2); + assert_eq!(shape[1], 4); + + let num_items = shape[0]; + + let boxes = boxes.as_array(); + + let mut builder = if let Some(node_size) = node_size { + RTreeBuilder::new_with_node_size(num_items, node_size) + } else { + RTreeBuilder::new(num_items) + }; + + for i in 0..num_items { + builder.add( + *boxes.get((i, 0)).unwrap(), + *boxes.get((i, 1)).unwrap(), + *boxes.get((i, 2)).unwrap(), + *boxes.get((i, 3)).unwrap(), + ); + } + + match method { + RTreeMethod::Hilbert => Self(builder.finish::()), + RTreeMethod::STR => Self(builder.finish::()), + } + } + + #[classmethod] + #[pyo3( + signature = (min_x, min_y, max_x, max_y, *, method = RTreeMethod::Hilbert, node_size = None), + text_signature = "(min_x, min_y, max_x, max_y, *, method = 'hilbert', node_size = None)") + ] + pub fn from_separated( + _cls: &PyType, + min_x: PyReadonlyArray1, + min_y: PyReadonlyArray1, + max_x: PyReadonlyArray1, + max_y: PyReadonlyArray1, + method: RTreeMethod, + node_size: Option, + ) -> Self { + assert_eq!(min_x.len(), min_y.len()); + assert_eq!(min_x.len(), max_x.len()); + assert_eq!(min_x.len(), max_y.len()); + + let num_items = min_x.len(); + + let min_x = min_x.as_array(); + let min_y = min_y.as_array(); + let max_x = max_x.as_array(); + let max_y = max_y.as_array(); + + let mut builder = if let Some(node_size) = node_size { + RTreeBuilder::new_with_node_size(num_items, node_size) + } else { + RTreeBuilder::new(num_items) + }; + + for i in 0..num_items { + builder.add( + *min_x.get(i).unwrap(), + *min_y.get(i).unwrap(), + *max_x.get(i).unwrap(), + *max_y.get(i).unwrap(), + ); + } + + match method { + RTreeMethod::Hilbert => Self(builder.finish::()), + RTreeMethod::STR => Self(builder.finish::()), + } + } + + pub fn search<'py>( + &'py self, + py: Python<'py>, + min_x: f64, + min_y: f64, + max_x: f64, + max_y: f64, + ) -> &'py PyArray1 { + let result = py.allow_threads(move || self.0.search(min_x, min_y, max_x, max_y)); + PyArray1::from_vec(py, result) + } +}