diff --git a/CMakeLists.txt b/CMakeLists.txt index a9d1390b..31edee9b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,16 +23,17 @@ if(EMSCRIPTEN set(SPATIAL_USE_NETWORK OFF) endif() -include_directories(spatial/include) -add_subdirectory(spatial/src) +add_subdirectory(src/spatial) +add_subdirectory(src/sgl) +include_directories(src) -include_directories(spatial/third_party/yyjson/include) -add_subdirectory(spatial/third_party/yyjson) +include_directories(src/third_party/yyjson/include) +add_subdirectory(src/third_party/yyjson) -include_directories(spatial/third_party/protozero/include) +include_directories(src/third_party/protozero/include) -include_directories(spatial/third_party/shapelib) -add_subdirectory(spatial/third_party/shapelib) +include_directories(src/third_party/shapelib) +add_subdirectory(src/third_party/shapelib) add_library(${EXTENSION_NAME} STATIC ${EXTENSION_SOURCES}) @@ -47,8 +48,7 @@ if(NOT EXISTS ${CMAKE_BINARY_DIR}/deps) COMMAND ${CMAKE_COMMAND} -G ${CMAKE_GENERATOR} -DCMAKE_CXX_COMPILER='${CMAKE_CXX_COMPILER}' - -DCMAKE_C_COMPILER='${CMAKE_C_COMPILER}' - -DDUCKDB_ENABLE_DEPRECATED_API=1 + -DCMAKE_C_COMPILER='${CMAKE_C_COMPILER}' -DDUCKDB_ENABLE_DEPRECATED_API=1 -DWASM_LOADABLE_EXTENSIONS=1 -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DOSX_BUILD_ARCH=${OSX_BUILD_ARCH} -DSPATIAL_USE_NETWORK=${SPATIAL_USE_NETWORK} @@ -96,7 +96,6 @@ find_package(SQLite3 REQUIRED) find_package(GEOS REQUIRED) find_package(GDAL REQUIRED) find_package(EXPAT REQUIRED) -find_package(GeographicLib REQUIRED) # Important: The link order matters, its the reverse order of dependency set(EXTENSION_DEPENDENCIES @@ -106,8 +105,7 @@ set(EXTENSION_DEPENDENCIES EXPAT::EXPAT SQLite::SQLite3 ZLIB::ZLIB - ${SQLITE3_MEMVFS} - ${GeographicLib_LIBRARIES}) + ${SQLITE3_MEMVFS}) if(SPATIAL_USE_NETWORK) message(STATUS "Building with network functionality") @@ -130,9 +128,6 @@ if((NOT EMSCRIPTEN) AND (NOT IOS)) endif() endif() -# Geographiclib is special -include_directories(${GeographicLib_INCLUDE_DIRS}) - # Add dependencies to extension target_link_libraries(${EXTENSION_NAME} PUBLIC ${EXTENSION_DEPENDENCIES}) diff --git a/Makefile b/Makefile index 903eb020..28ef599a 100644 --- a/Makefile +++ b/Makefile @@ -4,11 +4,12 @@ PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) EXT_NAME=excel EXT_CONFIG=${PROJ_DIR}extension_config.cmake +CORE_EXTENSIONS='httpfs' + # Include the Makefile from extension-ci-tools include extension-ci-tools/makefiles/duckdb_extension.Makefile #### Override the included format target because we have different source tree layout format: - find spatial/src -iname *.hpp -o -iname *.cpp | xargs clang-format --sort-includes=0 -style=file -i - find spatial/include -iname *.hpp -o -iname *.cpp | xargs clang-format --sort-includes=0 -style=file -i + find src/spatial -iname *.hpp -o -iname *.cpp | xargs clang-format --sort-includes=0 -style=file -i cmake-format -i CMakeLists.txt \ No newline at end of file diff --git a/duckdb b/duckdb index 0959644c..83d07cb6 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 0959644c1d57409e78d2fae0262f792921a54c55 +Subproject commit 83d07cb6838a6a56e951425b82ea3457cd2dd9e5 diff --git a/extension_config.cmake b/extension_config.cmake index 5f026a73..54f19133 100644 --- a/extension_config.cmake +++ b/extension_config.cmake @@ -11,7 +11,7 @@ endif() duckdb_extension_load(spatial SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR} - INCLUDE_DIR ${CMAKE_CURRENT_LIST_DIR}/spatial/include + INCLUDE_DIR ${CMAKE_CURRENT_LIST_DIR}/src/spatial ${DO_TESTS} LINKED_LIBS "../../deps/local/lib/*.a" ) \ No newline at end of file diff --git a/spatial/.gitignore b/spatial/.gitignore deleted file mode 100644 index 978f2bb2..00000000 --- a/spatial/.gitignore +++ /dev/null @@ -1 +0,0 @@ -duckdb_unittest_tempdir \ No newline at end of file diff --git a/spatial/include/spatial/common.hpp b/spatial/include/spatial/common.hpp deleted file mode 100644 index 8017383a..00000000 --- a/spatial/include/spatial/common.hpp +++ /dev/null @@ -1,8 +0,0 @@ -#pragma once - -#include "duckdb.hpp" -#include "duckdb/common/typedefs.hpp" -#include "duckdb/common/helper.hpp" -#include "duckdb/main/extension_util.hpp" -#include "spatial/doc_util.hpp" -using namespace duckdb; \ No newline at end of file diff --git a/spatial/include/spatial/core/functions/aggregate.hpp b/spatial/include/spatial/core/functions/aggregate.hpp deleted file mode 100644 index 9c11f817..00000000 --- a/spatial/include/spatial/core/functions/aggregate.hpp +++ /dev/null @@ -1,20 +0,0 @@ -#pragma once -#include "spatial/common.hpp" - -namespace spatial { - -namespace core { - -struct CoreAggregateFunctions { -public: - static void Register(DatabaseInstance &db) { - RegisterStExtentAgg(db); - } - -private: - static void RegisterStExtentAgg(DatabaseInstance &db); -}; - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/core/functions/cast.hpp b/spatial/include/spatial/core/functions/cast.hpp deleted file mode 100644 index f311e890..00000000 --- a/spatial/include/spatial/core/functions/cast.hpp +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once -#include "spatial/common.hpp" - -namespace spatial { - -namespace core { - -struct GeometryFactory; - -struct CoreVectorOperations { -public: - static void Point2DToVarchar(Vector &source, Vector &result, idx_t count); - static void LineString2DToVarchar(Vector &source, Vector &result, idx_t count); - static void Polygon2DToVarchar(Vector &source, Vector &result, idx_t count); - static void Box2DToVarchar(Vector &source, Vector &result, idx_t count); - static void GeometryToVarchar(Vector &source, Vector &result, idx_t count); -}; - -struct CoreCastFunctions { -public: - static void Register(DatabaseInstance &db) { - RegisterVarcharCasts(db); - RegisterDimensionalCasts(db); - RegisterGeometryCasts(db); - RegisterWKBCasts(db); - } - -private: - static void RegisterVarcharCasts(DatabaseInstance &db); - static void RegisterDimensionalCasts(DatabaseInstance &db); - static void RegisterGeometryCasts(DatabaseInstance &db); - static void RegisterWKBCasts(DatabaseInstance &db); -}; - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/core/functions/common.hpp b/spatial/include/spatial/core/functions/common.hpp deleted file mode 100644 index 5c749c50..00000000 --- a/spatial/include/spatial/core/functions/common.hpp +++ /dev/null @@ -1,23 +0,0 @@ -#pragma once -#include "spatial/common.hpp" - -namespace spatial { - -namespace core { - -struct GeometryFunctionLocalState : FunctionLocalState { -public: - ArenaAllocator arena; - -public: - explicit GeometryFunctionLocalState(ClientContext &context); - static unique_ptr Init(ExpressionState &state, const BoundFunctionExpression &expr, - FunctionData *bind_data); - static unique_ptr InitCast(CastLocalStateParameters &context); - static GeometryFunctionLocalState &ResetAndGet(ExpressionState &state); - static GeometryFunctionLocalState &ResetAndGet(CastParameters ¶meters); -}; - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/core/functions/macros.hpp b/spatial/include/spatial/core/functions/macros.hpp deleted file mode 100644 index 9c967e9e..00000000 --- a/spatial/include/spatial/core/functions/macros.hpp +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once -#include "spatial/common.hpp" -#include "spatial/core/geometry/geometry.hpp" - -namespace spatial { - -namespace core { - -struct CoreScalarMacros { -public: - static void Register(DatabaseInstance &db); -}; - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/core/functions/scalar.hpp b/spatial/include/spatial/core/functions/scalar.hpp deleted file mode 100644 index eda7eb02..00000000 --- a/spatial/include/spatial/core/functions/scalar.hpp +++ /dev/null @@ -1,241 +0,0 @@ -#pragma once -#include "spatial/common.hpp" - -namespace spatial { - -namespace core { - -struct CoreScalarFunctions { -public: - static void Register(DatabaseInstance &db) { - RegisterStArea(db); - RegisterStAsGeoJSON(db); - RegisterStAsText(db); - RegisterStAsWKB(db); - RegisterStAsHEXWKB(db); - RegisterStAsSVG(db); - RegisterStCentroid(db); - RegisterStCollect(db); - RegisterStCollectionExtract(db); - RegisterStContains(db); - RegisterStDimension(db); - RegisterStDistance(db); - RegisterStDump(db); - RegisterStEndPoint(db); - RegisterStExtent(db); - RegisterStExteriorRing(db); - RegisterStFlipCoordinates(db); - RegisterStForce(db); - RegisterStGeometryType(db); - RegisterStGeomFromHEXWKB(db); - RegisterStGeomFromText(db); - RegisterStGeomFromWKB(db); - RegisterStHas(db); - RegisterStHaversine(db); - RegisterStHilbert(db); - RegisterStIntersects(db); - RegisterStIntersectsExtent(db); - RegisterStIsClosed(db); - RegisterStIsEmpty(db); - RegisterStLength(db); - RegisterStMakeEnvelope(db); - RegisterStMakeLine(db); - RegisterStMakePolygon(db); - RegisterStMulti(db); - RegisterStNGeometries(db); - RegisterStNInteriorRings(db); - RegisterStNPoints(db); - RegisterStPerimeter(db); - RegisterStPoint(db); - RegisterStPointN(db); - RegisterStPoints(db); - RegisterStQuadKey(db); - RegisterStRemoveRepeatedPoints(db); - RegisterStStartPoint(db); - RegisterStX(db); - RegisterStXMax(db); - RegisterStXMin(db); - RegisterStY(db); - RegisterStYMax(db); - RegisterStYMin(db); - RegisterStZ(db); - RegisterStZMax(db); - RegisterStZMin(db); - RegisterStM(db); - RegisterStMMax(db); - RegisterStMMin(db); - } - -private: - // ST_Area - static void RegisterStArea(DatabaseInstance &db); - - // ST_AsGeoJSON - static void RegisterStAsGeoJSON(DatabaseInstance &db); - - // ST_AsText - static void RegisterStAsText(DatabaseInstance &db); - - // ST_AsHextWKB - static void RegisterStAsHEXWKB(DatabaseInstance &db); - - // ST_AsSVG - static void RegisterStAsSVG(DatabaseInstance &db); - - // ST_AsWKB - static void RegisterStAsWKB(DatabaseInstance &db); - - // ST_Centroid - static void RegisterStCentroid(DatabaseInstance &db); - - // ST_Collect - static void RegisterStCollect(DatabaseInstance &db); - - // ST_CollectionExtract - static void RegisterStCollectionExtract(DatabaseInstance &db); - - // ST_Contains - static void RegisterStContains(DatabaseInstance &db); - - // ST_Dimension - static void RegisterStDimension(DatabaseInstance &db); - - // ST_Distance - static void RegisterStDistance(DatabaseInstance &db); - - // ST_Dump - static void RegisterStDump(DatabaseInstance &db); - - // ST_EndPoint - static void RegisterStEndPoint(DatabaseInstance &db); - - // ST_Extent - static void RegisterStExtent(DatabaseInstance &db); - - // ST_ExteriorRing - static void RegisterStExteriorRing(DatabaseInstance &db); - - // ST_FlipCoordinates - static void RegisterStFlipCoordinates(DatabaseInstance &db); - - // ST_Force(2D/3D) - static void RegisterStForce(DatabaseInstance &db); - - // ST_GeometryType - static void RegisterStGeometryType(DatabaseInstance &db); - - // ST_GeomFromHEXWKB - static void RegisterStGeomFromHEXWKB(DatabaseInstance &db); - - // ST_GeomFromText - static void RegisterStGeomFromText(DatabaseInstance &db); - - // ST_GeomFromWKB - static void RegisterStGeomFromWKB(DatabaseInstance &db); - - // ST_Has(M/Z) + ST_ZMFlag - static void RegisterStHas(DatabaseInstance &db); - - // ST_Haversine - static void RegisterStHaversine(DatabaseInstance &db); - - // ST_Hilbert - static void RegisterStHilbert(DatabaseInstance &db); - - // ST_Intersects - static void RegisterStIntersects(DatabaseInstance &db); - - // ST_IntersectsExtent (&&) - static void RegisterStIntersectsExtent(DatabaseInstance &db); - - // ST_IsClosed - static void RegisterStIsClosed(DatabaseInstance &db); - - // ST_IsEmpty - static void RegisterStIsEmpty(DatabaseInstance &db); - - // ST_Length - static void RegisterStLength(DatabaseInstance &db); - - // ST_MakeEnvelope - static void RegisterStMakeEnvelope(DatabaseInstance &db); - - // ST_MakeLine - static void RegisterStMakeLine(DatabaseInstance &db); - - // ST_MakePolygon - static void RegisterStMakePolygon(DatabaseInstance &db); - - // ST_Multi - static void RegisterStMulti(DatabaseInstance &db); - - // ST_NGeometries - static void RegisterStNGeometries(DatabaseInstance &db); - - // ST_NInteriorRings - static void RegisterStNInteriorRings(DatabaseInstance &db); - - // ST_NPoints - static void RegisterStNPoints(DatabaseInstance &db); - - // ST_Perimeter - static void RegisterStPerimeter(DatabaseInstance &db); - - // ST_Point - static void RegisterStPoint(DatabaseInstance &db); - - // ST_PointN - static void RegisterStPointN(DatabaseInstance &db); - - // ST_Points - static void RegisterStPoints(DatabaseInstance &db); - - // ST_RemoveRepeatedPoints - static void RegisterStRemoveRepeatedPoints(DatabaseInstance &db); - - // ST_QuadKey - static void RegisterStQuadKey(DatabaseInstance &db); - - // ST_StartPoint - static void RegisterStStartPoint(DatabaseInstance &db); - - // ST_X - static void RegisterStX(DatabaseInstance &db); - - // ST_XMax - static void RegisterStXMax(DatabaseInstance &db); - - // ST_XMin - static void RegisterStXMin(DatabaseInstance &db); - - // ST_Y - static void RegisterStY(DatabaseInstance &db); - - // ST_YMax - static void RegisterStYMax(DatabaseInstance &db); - - // ST_YMin - static void RegisterStYMin(DatabaseInstance &db); - - // ST_Z - static void RegisterStZ(DatabaseInstance &db); - - // ST_ZMax - static void RegisterStZMax(DatabaseInstance &db); - - // ST_ZMin - static void RegisterStZMin(DatabaseInstance &db); - - // ST_M - static void RegisterStM(DatabaseInstance &db); - - // ST_MMax - static void RegisterStMMax(DatabaseInstance &db); - - // ST_MMin - static void RegisterStMMin(DatabaseInstance &db); -}; - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/core/functions/table.hpp b/spatial/include/spatial/core/functions/table.hpp deleted file mode 100644 index a8629575..00000000 --- a/spatial/include/spatial/core/functions/table.hpp +++ /dev/null @@ -1,28 +0,0 @@ -#pragma once -#include "spatial/common.hpp" - -namespace spatial { - -namespace core { - -struct CoreTableFunctions { -public: - static void Register(DatabaseInstance &db) { - RegisterOsmTableFunction(db); - - // TODO: Move these - RegisterShapefileTableFunction(db); - RegisterShapefileMetaTableFunction(db); - RegisterGeneratePointsTableFunction(db); - } - -private: - static void RegisterOsmTableFunction(DatabaseInstance &db); - static void RegisterShapefileTableFunction(DatabaseInstance &db); - static void RegisterShapefileMetaTableFunction(DatabaseInstance &db); - static void RegisterGeneratePointsTableFunction(DatabaseInstance &db); -}; - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/core/geoarrow.hpp b/spatial/include/spatial/core/geoarrow.hpp deleted file mode 100644 index 123cd329..00000000 --- a/spatial/include/spatial/core/geoarrow.hpp +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once -#include "spatial/common.hpp" - -namespace spatial { - -namespace core { - -struct GeoArrow { - static void Register(DatabaseInstance &db); -}; - -} // namespace core - -} // namespace spatial diff --git a/spatial/include/spatial/core/geometry/geometry.hpp b/spatial/include/spatial/core/geometry/geometry.hpp deleted file mode 100644 index efc60928..00000000 --- a/spatial/include/spatial/core/geometry/geometry.hpp +++ /dev/null @@ -1,957 +0,0 @@ -#pragma once - -#include "spatial/common.hpp" -#include "spatial/core/geometry/geometry_properties.hpp" -#include "spatial/core/util/cursor.hpp" -#include "spatial/core/geometry/geometry_type.hpp" -#include "spatial/core/geometry/vertex.hpp" - -namespace spatial { - -namespace core { - -class Geometry; - -//------------------------------------------------------------------------------ -// Geometry -//------------------------------------------------------------------------------ - -class Geometry { - friend struct SinglePartGeometry; - friend struct MultiPartGeometry; - friend struct CollectionGeometry; - -private: - GeometryType type; - GeometryProperties properties; - bool is_readonly; - uint32_t data_count; - data_ptr_t data_ptr; - - Geometry(GeometryType type, GeometryProperties props, bool is_readonly, data_ptr_t data, uint32_t count) - : type(type), properties(props), is_readonly(is_readonly), data_count(count), data_ptr(data) { - } - - // TODO: Maybe make these public... - Geometry &operator[](uint32_t index) { - D_ASSERT(index < data_count); - return reinterpret_cast(data_ptr)[index]; - } - Geometry *begin() { - return reinterpret_cast(data_ptr); - } - Geometry *end() { - return reinterpret_cast(data_ptr) + data_count; - } - - const Geometry &operator[](uint32_t index) const { - D_ASSERT(index < data_count); - return reinterpret_cast(data_ptr)[index]; - } - const Geometry *begin() const { - return reinterpret_cast(data_ptr); - } - const Geometry *end() const { - return reinterpret_cast(data_ptr) + data_count; - } - -public: - // By default, create a read-only empty point - Geometry() - : type(GeometryType::POINT), properties(false, false), is_readonly(true), data_count(0), data_ptr(nullptr) { - } - - Geometry(GeometryType type, bool has_z, bool has_m) - : type(type), properties(has_z, has_m), is_readonly(true), data_count(0), data_ptr(nullptr) { - } - - // Copy Constructor - Geometry(const Geometry &other) - : type(other.type), properties(other.properties), is_readonly(true), data_count(other.data_count), - data_ptr(other.data_ptr) { - } - - // Copy Assignment - Geometry &operator=(const Geometry &other) { - type = other.type; - properties = other.properties; - is_readonly = true; - data_count = other.data_count; - data_ptr = other.data_ptr; - return *this; - } - - // Move Constructor - Geometry(Geometry &&other) noexcept - : type(other.type), properties(other.properties), is_readonly(other.is_readonly), data_count(other.data_count), - data_ptr(other.data_ptr) { - if (!other.is_readonly) { - // Take ownership of the data, and make the other object read-only - other.is_readonly = true; - } - } - - // Move Assignment - Geometry &operator=(Geometry &&other) noexcept { - type = other.type; - properties = other.properties; - is_readonly = other.is_readonly; - data_count = other.data_count; - data_ptr = other.data_ptr; - if (!other.is_readonly) { - // Take ownership of the data, and make the other object read-only - other.is_readonly = true; - } - return *this; - } - -public: - GeometryType GetType() const { - return type; - } - GeometryProperties &GetProperties() { - return properties; - } - const GeometryProperties &GetProperties() const { - return properties; - } - const_data_ptr_t GetData() const { - return data_ptr; - } - data_ptr_t GetData() { - return data_ptr; - } - bool IsReadOnly() const { - return is_readonly; - } - uint32_t Count() const { - return data_count; - } - - bool IsCollection() const { - return GeometryTypes::IsCollection(type); - } - bool IsMultiPart() const { - return GeometryTypes::IsMultiPart(type); - } - bool IsSinglePart() const { - return GeometryTypes::IsSinglePart(type); - } - -public: - // Used for tag dispatching - struct Tags { - // Base types - struct AnyGeometry {}; - struct SinglePartGeometry : public AnyGeometry {}; - struct MultiPartGeometry : public AnyGeometry {}; - struct CollectionGeometry : public MultiPartGeometry {}; - // Concrete types - struct Point : public SinglePartGeometry {}; - struct LineString : public SinglePartGeometry {}; - struct Polygon : public MultiPartGeometry {}; - struct MultiPoint : public CollectionGeometry {}; - struct MultiLineString : public CollectionGeometry {}; - struct MultiPolygon : public CollectionGeometry {}; - struct GeometryCollection : public CollectionGeometry {}; - }; - - template - static auto Match(Geometry &geom, ARGS &&...args) - -> decltype(T::Case(std::declval(), std::declval(), std::declval()...)) { - switch (geom.type) { - case GeometryType::POINT: - return T::Case(Tags::Point {}, geom, std::forward(args)...); - case GeometryType::LINESTRING: - return T::Case(Tags::LineString {}, geom, std::forward(args)...); - case GeometryType::POLYGON: - return T::Case(Tags::Polygon {}, geom, std::forward(args)...); - case GeometryType::MULTIPOINT: - return T::Case(Tags::MultiPoint {}, geom, std::forward(args)...); - case GeometryType::MULTILINESTRING: - return T::Case(Tags::MultiLineString {}, geom, std::forward(args)...); - case GeometryType::MULTIPOLYGON: - return T::Case(Tags::MultiPolygon {}, geom, std::forward(args)...); - case GeometryType::GEOMETRYCOLLECTION: - return T::Case(Tags::GeometryCollection {}, geom, std::forward(args)...); - default: - throw NotImplementedException("Geometry::Match"); - } - } - - template - static auto Match(const Geometry &geom, ARGS &&...args) - -> decltype(T::Case(std::declval(), std::declval(), std::declval()...)) { - switch (geom.type) { - case GeometryType::POINT: - return T::Case(Tags::Point {}, geom, std::forward(args)...); - case GeometryType::LINESTRING: - return T::Case(Tags::LineString {}, geom, std::forward(args)...); - case GeometryType::POLYGON: - return T::Case(Tags::Polygon {}, geom, std::forward(args)...); - case GeometryType::MULTIPOINT: - return T::Case(Tags::MultiPoint {}, geom, std::forward(args)...); - case GeometryType::MULTILINESTRING: - return T::Case(Tags::MultiLineString {}, geom, std::forward(args)...); - case GeometryType::MULTIPOLYGON: - return T::Case(Tags::MultiPolygon {}, geom, std::forward(args)...); - case GeometryType::GEOMETRYCOLLECTION: - return T::Case(Tags::GeometryCollection {}, geom, std::forward(args)...); - default: - throw NotImplementedException("Geometry::Match"); - } - } - - // TODO: Swap this to only have two create methods, - // and use mutating methods for Reference/Copy - static Geometry Create(ArenaAllocator &alloc, GeometryType type, uint32_t count, bool has_z, bool has_m); - static Geometry CreateEmpty(GeometryType type, bool has_z, bool has_m); - - static geometry_t Serialize(const Geometry &geom, Vector &result); - static Geometry Deserialize(ArenaAllocator &arena, const geometry_t &data); - - static bool IsEmpty(const Geometry &geom); - static uint32_t GetDimension(const Geometry &geom, bool recurse); - void SetVertexType(ArenaAllocator &alloc, bool has_z, bool has_m, double default_z = 0, double default_m = 0); - - // Iterate over all points in the geometry, recursing into collections - template - static void ExtractPoints(const Geometry &geom, FUNC &&func); - - // Iterate over all lines in the geometry, recursing into collections - template - static void ExtractLines(const Geometry &geom, FUNC &&func); - - // Iterate over all polygons in the geometry, recursing into collections - template - static void ExtractPolygons(const Geometry &geom, FUNC &&func); -}; - -inline Geometry Geometry::Create(ArenaAllocator &alloc, GeometryType type, uint32_t count, bool has_z, bool has_m) { - GeometryProperties props(has_z, has_m); - auto single_part = GeometryTypes::IsSinglePart(type); - auto elem_size = single_part ? props.VertexSize() : sizeof(Geometry); - auto geom = Geometry(type, props, false, alloc.AllocateAligned(count * elem_size), count); - return geom; -} - -inline Geometry Geometry::CreateEmpty(GeometryType type, bool has_z, bool has_m) { - GeometryProperties props(has_z, has_m); - return Geometry(type, props, false, nullptr, 0); -} - -//------------------------------------------------------------------------------ -// Inlined Geometry Functions -//------------------------------------------------------------------------------ -template -inline void Geometry::ExtractPoints(const Geometry &geom, FUNC &&func) { - struct op { - static void Case(Geometry::Tags::Point, const Geometry &geom, FUNC &&func) { - func(geom); - } - static void Case(Geometry::Tags::MultiPoint, const Geometry &geom, FUNC &&func) { - for (auto &part : geom) { - func(part); - } - } - static void Case(Geometry::Tags::GeometryCollection, const Geometry &geom, FUNC &&func) { - for (auto &part : geom) { - Match(part, std::forward(func)); - } - } - static void Case(Geometry::Tags::AnyGeometry, const Geometry &, FUNC &&) { - } - }; - Match(geom, std::forward(func)); -} - -template -inline void Geometry::ExtractLines(const Geometry &geom, FUNC &&func) { - struct op { - static void Case(Geometry::Tags::LineString, const Geometry &geom, FUNC &&func) { - func(geom); - } - static void Case(Geometry::Tags::MultiLineString, const Geometry &geom, FUNC &&func) { - for (auto &part : geom) { - func(part); - } - } - static void Case(Geometry::Tags::GeometryCollection, const Geometry &geom, FUNC &&func) { - for (auto &part : geom) { - Match(part, std::forward(func)); - } - } - static void Case(Geometry::Tags::AnyGeometry, const Geometry &, FUNC &&) { - } - }; - Match(geom, std::forward(func)); -} - -template -inline void Geometry::ExtractPolygons(const Geometry &geom, FUNC &&func) { - struct op { - static void Case(Geometry::Tags::Polygon, const Geometry &geom, FUNC &&func) { - func(geom); - } - static void Case(Geometry::Tags::MultiPolygon, const Geometry &geom, FUNC &&func) { - for (auto &part : geom) { - func(part); - } - } - static void Case(Geometry::Tags::GeometryCollection, const Geometry &geom, FUNC &&func) { - for (auto &part : geom) { - Match(part, std::forward(func)); - } - } - static void Case(Geometry::Tags::AnyGeometry, const Geometry &, FUNC &&) { - } - }; - Match(geom, std::forward(func)); -} - -inline bool Geometry::IsEmpty(const Geometry &geom) { - struct op { - static bool Case(Geometry::Tags::SinglePartGeometry, const Geometry &geom) { - return geom.data_count == 0; - } - static bool Case(Geometry::Tags::MultiPartGeometry, const Geometry &geom) { - for (const auto &part : geom) { - if (!Geometry::Match(part)) { - return false; - } - } - return true; - } - }; - return Geometry::Match(geom); -} - -inline uint32_t Geometry::GetDimension(const Geometry &geom, bool ignore_empty) { - if (ignore_empty && Geometry::IsEmpty(geom)) { - return 0; - } - struct op { - static uint32_t Case(Geometry::Tags::Point, const Geometry &, bool) { - return 0; - } - static uint32_t Case(Geometry::Tags::LineString, const Geometry &, bool) { - return 1; - } - static uint32_t Case(Geometry::Tags::Polygon, const Geometry &, bool) { - return 2; - } - static uint32_t Case(Geometry::Tags::MultiPoint, const Geometry &, bool) { - return 0; - } - static uint32_t Case(Geometry::Tags::MultiLineString, const Geometry &, bool) { - return 1; - } - static uint32_t Case(Geometry::Tags::MultiPolygon, const Geometry &, bool) { - return 2; - } - static uint32_t Case(Geometry::Tags::GeometryCollection, const Geometry &geom, bool ignore_empty) { - uint32_t max_dimension = 0; - for (const auto &p : geom) { - max_dimension = std::max(max_dimension, Geometry::GetDimension(p, ignore_empty)); - } - return max_dimension; - } - }; - return Geometry::Match(geom, ignore_empty); -} - -//------------------------------------------------------------------------------ -// Iterators -//------------------------------------------------------------------------------ -class PartView { -private: - Geometry *beg_ptr; - Geometry *end_ptr; - -public: - PartView(Geometry *beg, Geometry *end) : beg_ptr(beg), end_ptr(end) { - } - Geometry *begin() { - return beg_ptr; - } - Geometry *end() { - return end_ptr; - } - Geometry &operator[](uint32_t index) { - return beg_ptr[index]; - } -}; - -class ConstPartView { -private: - const Geometry *beg_ptr; - const Geometry *end_ptr; - -public: - ConstPartView(const Geometry *beg, const Geometry *end) : beg_ptr(beg), end_ptr(end) { - } - const Geometry *begin() { - return beg_ptr; - } - const Geometry *end() { - return end_ptr; - } - const Geometry &operator[](uint32_t index) { - return beg_ptr[index]; - } -}; - -//------------------------------------------------------------------------------ -// Accessors -//------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------ -// SinglePartGeometry -//------------------------------------------------------------------------------ -struct SinglePartGeometry { - - // Turn this geometry into a read-only reference to raw data - static void ReferenceData(Geometry &geom, const_data_ptr_t data, uint32_t count, bool has_z, bool has_m) { - geom.data_count = count; - geom.data_ptr = const_cast(data); - geom.is_readonly = true; - geom.properties.SetZ(has_z); - geom.properties.SetM(has_m); - } - - // Turn this geometry into a read-only reference to another geometry, starting at the specified index - static void ReferenceData(Geometry &geom, const Geometry &other, uint32_t offset, uint32_t count) { - D_ASSERT(GeometryTypes::IsSinglePart(other.GetType())); - D_ASSERT(offset + count <= other.data_count); - auto vertex_size = other.properties.VertexSize(); - auto has_z = other.properties.HasZ(); - auto has_m = other.properties.HasM(); - ReferenceData(geom, other.data_ptr + offset * vertex_size, count, has_z, has_m); - } - - static void ReferenceData(Geometry &geom, const_data_ptr_t data, uint32_t count) { - ReferenceData(geom, data, count, geom.properties.HasZ(), geom.properties.HasM()); - } - - // Turn this geometry into a owning copy of raw data - static void CopyData(Geometry &geom, ArenaAllocator &alloc, const_data_ptr_t data, uint32_t count, bool has_z, - bool has_m) { - auto old_vertex_size = geom.properties.VertexSize(); - geom.properties.SetZ(has_z); - geom.properties.SetM(has_m); - auto new_vertex_size = geom.properties.VertexSize(); - if (geom.is_readonly) { - geom.data_ptr = alloc.AllocateAligned(count * new_vertex_size); - } else if (geom.data_count != count) { - geom.data_ptr = - alloc.ReallocateAligned(geom.data_ptr, geom.data_count * old_vertex_size, count * new_vertex_size); - } - memcpy(geom.data_ptr, data, count * new_vertex_size); - geom.data_count = count; - geom.is_readonly = false; - } - - // Turn this geometry into a owning copy of another geometry, starting at the specified index - static void CopyData(Geometry &geom, ArenaAllocator &alloc, const Geometry &other, uint32_t offset, - uint32_t count) { - D_ASSERT(GeometryTypes::IsSinglePart(other.GetType())); - D_ASSERT(offset + count <= other.data_count); - auto vertex_size = geom.properties.VertexSize(); - auto has_z = other.properties.HasZ(); - auto has_m = other.properties.HasM(); - CopyData(geom, alloc, other.data_ptr + offset * vertex_size, count, has_z, has_m); - } - - static void CopyData(Geometry &geom, ArenaAllocator &alloc, const_data_ptr_t data, uint32_t count) { - CopyData(geom, alloc, data, count, geom.properties.HasZ(), geom.properties.HasM()); - } - - // Resize the geometry, truncating or extending with zeroed vertices as needed - static void Resize(Geometry &geom, ArenaAllocator &alloc, uint32_t new_count); - - // Append the data from another geometry - static void Append(Geometry &geom, ArenaAllocator &alloc, const Geometry &other); - - // Append the data from multiple other geometries - static void Append(Geometry &geom, ArenaAllocator &alloc, const Geometry *others, uint32_t others_count); - - // Force the geometry to have a specific vertex type, resizing or shrinking the data as needed - static void SetVertexType(Geometry &geom, ArenaAllocator &alloc, bool has_z, bool has_m, double default_z = 0, - double default_m = 0); - - // If this geometry is read-only, make it mutable by copying the data - static void MakeMutable(Geometry &geom, ArenaAllocator &alloc); - - // Print this geometry as a string, starting at the specified index and printing the specified number of vertices - // (useful for debugging) - static string ToString(const Geometry &geom, uint32_t start = 0, uint32_t count = 0); - - // Check if the geometry is closed (first and last vertex are the same) - // A geometry with 1 vertex is considered closed, 0 vertices are considered open - static bool IsClosed(const Geometry &geom); - static bool IsEmpty(const Geometry &geom); - - // Return the planar length of the geometry - static double Length(const Geometry &geom); - - static VertexXY GetVertex(const Geometry &geom, uint32_t index); - static void SetVertex(Geometry &geom, uint32_t index, const VertexXY &vertex); - - template - static V GetVertex(const Geometry &geom, uint32_t index); - - template - static void SetVertex(Geometry &geom, uint32_t index, const V &vertex); - - static uint32_t VertexCount(const Geometry &geom); - static uint32_t VertexSize(const Geometry &geom); - static uint32_t ByteSize(const Geometry &geom); -}; - -inline VertexXY SinglePartGeometry::GetVertex(const Geometry &geom, uint32_t index) { - D_ASSERT(GeometryTypes::IsSinglePart(geom.GetType())); - D_ASSERT(index < geom.data_count); - return Load(geom.GetData() + index * geom.GetProperties().VertexSize()); -} - -inline void SinglePartGeometry::SetVertex(Geometry &geom, uint32_t index, const VertexXY &vertex) { - D_ASSERT(GeometryTypes::IsSinglePart(geom.GetType())); - D_ASSERT(index < geom.data_count); - Store(vertex, geom.GetData() + index * geom.GetProperties().VertexSize()); -} - -template -inline V SinglePartGeometry::GetVertex(const Geometry &geom, uint32_t index) { - D_ASSERT(GeometryTypes::IsSinglePart(geom.GetType())); - D_ASSERT(V::HAS_Z == geom.GetProperties().HasZ()); - D_ASSERT(V::HAS_M == geom.GetProperties().HasM()); - D_ASSERT(index < geom.data_count); - return Load(geom.GetData() + index * sizeof(V)); -} - -template -inline void SinglePartGeometry::SetVertex(Geometry &geom, uint32_t index, const V &vertex) { - D_ASSERT(GeometryTypes::IsSinglePart(geom.GetType())); - D_ASSERT(V::HAS_Z == geom.GetProperties().HasZ()); - D_ASSERT(V::HAS_M == geom.GetProperties().HasM()); - D_ASSERT(index < geom.data_count); - Store(vertex, geom.GetData() + index * sizeof(V)); -} - -inline uint32_t SinglePartGeometry::VertexCount(const Geometry &geom) { - D_ASSERT(GeometryTypes::IsSinglePart(geom.GetType())); - return geom.data_count; -} - -inline uint32_t SinglePartGeometry::VertexSize(const Geometry &geom) { - D_ASSERT(GeometryTypes::IsSinglePart(geom.GetType())); - return geom.GetProperties().VertexSize(); -} - -inline uint32_t SinglePartGeometry::ByteSize(const Geometry &geom) { - D_ASSERT(GeometryTypes::IsSinglePart(geom.GetType())); - return geom.data_count * geom.GetProperties().VertexSize(); -} - -inline bool SinglePartGeometry::IsEmpty(const Geometry &geom) { - D_ASSERT(GeometryTypes::IsSinglePart(geom.GetType())); - return geom.data_count == 0; -} - -//------------------------------------------------------------------------------ -// MultiPartGeometry -//------------------------------------------------------------------------------ -struct MultiPartGeometry { - - // static void Resize(Geometry &geom, ArenaAllocator &alloc, uint32_t new_count); - - static uint32_t PartCount(const Geometry &geom); - static Geometry &Part(Geometry &geom, uint32_t index); - static const Geometry &Part(const Geometry &geom, uint32_t index); - static PartView Parts(Geometry &geom); - static ConstPartView Parts(const Geometry &geom); - - static bool IsEmpty(const Geometry &geom) { - D_ASSERT(GeometryTypes::IsMultiPart(geom.GetType())); - for (uint32_t i = 0; i < geom.data_count; i++) { - if (!Geometry::IsEmpty(Part(geom, i))) { - return false; - } - } - return true; - } -}; - -inline uint32_t MultiPartGeometry::PartCount(const Geometry &geom) { - D_ASSERT(GeometryTypes::IsMultiPart(geom.GetType())); - return geom.data_count; -} - -inline Geometry &MultiPartGeometry::Part(Geometry &geom, uint32_t index) { - D_ASSERT(GeometryTypes::IsMultiPart(geom.GetType())); - D_ASSERT(index < geom.data_count); - return *reinterpret_cast(geom.GetData() + index * sizeof(Geometry)); -} - -inline const Geometry &MultiPartGeometry::Part(const Geometry &geom, uint32_t index) { - D_ASSERT(GeometryTypes::IsMultiPart(geom.GetType())); - D_ASSERT(index < geom.data_count); - return *reinterpret_cast(geom.GetData() + index * sizeof(Geometry)); -} - -inline PartView MultiPartGeometry::Parts(Geometry &geom) { - D_ASSERT(GeometryTypes::IsMultiPart(geom.GetType())); - auto ptr = reinterpret_cast(geom.GetData()); - return {ptr, ptr + geom.data_count}; -} - -inline ConstPartView MultiPartGeometry::Parts(const Geometry &geom) { - D_ASSERT(GeometryTypes::IsMultiPart(geom.GetType())); - auto ptr = reinterpret_cast(geom.GetData()); - return {ptr, ptr + geom.data_count}; -} - -//------------------------------------------------------------------------------ -// CollectionGeometry -//------------------------------------------------------------------------------ -struct CollectionGeometry : public MultiPartGeometry { -protected: - static Geometry Create(ArenaAllocator &alloc, GeometryType type, vector &items, bool has_z, bool has_m) { - D_ASSERT(GeometryTypes::IsCollection(type)); - auto collection = Geometry::Create(alloc, type, items.size(), has_z, has_m); - for (uint32_t i = 0; i < items.size(); i++) { - CollectionGeometry::Part(collection, i) = std::move(items[i]); - } - return collection; - } -}; - -//------------------------------------------------------------------------------ -// Point -//------------------------------------------------------------------------------ -struct Point : public SinglePartGeometry { - static Geometry Create(ArenaAllocator &alloc, uint32_t count, bool has_z, bool has_m); - static Geometry CreateEmpty(bool has_z, bool has_m); - - template - static Geometry CreateFromVertex(ArenaAllocator &alloc, const V &vertex); - - static Geometry CreateFromCopy(ArenaAllocator &alloc, const_data_ptr_t data, uint32_t count, bool has_z, - bool has_m) { - auto point = Point::Create(alloc, 1, has_z, has_m); - SinglePartGeometry::CopyData(point, alloc, data, count, has_z, has_m); - return point; - } - - // Methods - template - static V GetVertex(const Geometry &geom); - - template - static void SetVertex(Geometry &geom, const V &vertex); - - // Constants - static const constexpr GeometryType TYPE = GeometryType::POINT; -}; - -inline Geometry Point::Create(ArenaAllocator &alloc, uint32_t count, bool has_z, bool has_m) { - return Geometry::Create(alloc, TYPE, count, has_z, has_m); -} - -inline Geometry Point::CreateEmpty(bool has_z, bool has_m) { - return Geometry::CreateEmpty(TYPE, has_z, has_m); -} - -template -inline Geometry Point::CreateFromVertex(ArenaAllocator &alloc, const V &vertex) { - auto point = Create(alloc, 1, V::HAS_Z, V::HAS_M); - Point::SetVertex(point, vertex); - return point; -} - -template -inline V Point::GetVertex(const Geometry &geom) { - D_ASSERT(geom.GetType() == TYPE); - D_ASSERT(geom.Count() == 1); - D_ASSERT(geom.GetProperties().HasZ() == V::HAS_Z); - D_ASSERT(geom.GetProperties().HasM() == V::HAS_M); - return SinglePartGeometry::GetVertex(geom, 0); -} - -template -void Point::SetVertex(Geometry &geom, const V &vertex) { - D_ASSERT(geom.GetType() == TYPE); - D_ASSERT(geom.Count() == 1); - D_ASSERT(geom.GetProperties().HasZ() == V::HAS_Z); - D_ASSERT(geom.GetProperties().HasM() == V::HAS_M); - SinglePartGeometry::SetVertex(geom, 0, vertex); -} - -//------------------------------------------------------------------------------ -// LineString -//------------------------------------------------------------------------------ -struct LineString : public SinglePartGeometry { - static Geometry Create(ArenaAllocator &alloc, uint32_t count, bool has_z, bool has_m); - static Geometry CreateEmpty(bool has_z, bool has_m); - - static Geometry CreateFromCopy(ArenaAllocator &alloc, const_data_ptr_t data, uint32_t count, bool has_z, - bool has_m) { - auto line = LineString::Create(alloc, 1, has_z, has_m); - SinglePartGeometry::CopyData(line, alloc, data, count, has_z, has_m); - return line; - } - - // TODO: Wrap - // Create a new LineString referencing a slice of the this linestring - static Geometry GetSliceAsReference(const Geometry &geom, uint32_t start, uint32_t count) { - auto line = LineString::CreateEmpty(geom.GetProperties().HasZ(), geom.GetProperties().HasM()); - SinglePartGeometry::ReferenceData(line, geom, start, count); - return line; - } - - // TODO: Wrap - // Create a new LineString referencing a single point in the this linestring - static Geometry GetPointAsReference(const Geometry &geom, uint32_t index) { - auto count = index >= geom.Count() ? 0 : 1; - auto point = Point::CreateEmpty(geom.GetProperties().HasZ(), geom.GetProperties().HasM()); - SinglePartGeometry::ReferenceData(point, geom, index, count); - return point; - } - - // Constants - static const constexpr GeometryType TYPE = GeometryType::LINESTRING; -}; - -inline Geometry LineString::Create(ArenaAllocator &alloc, uint32_t count, bool has_z, bool has_m) { - return Geometry::Create(alloc, TYPE, count, has_z, has_m); -} - -inline Geometry LineString::CreateEmpty(bool has_z, bool has_m) { - return Geometry::CreateEmpty(TYPE, has_z, has_m); -} - -//------------------------------------------------------------------------------ -// LinearRing (special case of LineString) -//------------------------------------------------------------------------------ -struct LinearRing : public LineString { - static Geometry Create(ArenaAllocator &alloc, uint32_t count, bool has_z, bool has_m); - static Geometry CreateEmpty(bool has_z, bool has_m); - - // Methods - static bool IsClosed(const Geometry &geom); - - // Constants - // TODO: We dont have a LinearRing type, so we use LineString for now - static const constexpr GeometryType TYPE = GeometryType::LINESTRING; -}; - -inline Geometry LinearRing::Create(ArenaAllocator &alloc, uint32_t count, bool has_z, bool has_m) { - return LineString::Create(alloc, count, has_z, has_m); -} - -inline Geometry LinearRing::CreateEmpty(bool has_z, bool has_m) { - return LineString::CreateEmpty(has_z, has_m); -} - -inline bool LinearRing::IsClosed(const Geometry &geom) { - D_ASSERT(geom.GetType() == TYPE); - // The difference between LineString is that a empty LinearRing is considered closed - if (LinearRing::IsEmpty(geom)) { - return true; - } - return LineString::IsClosed(geom); -} - -//------------------------------------------------------------------------------ -// Polygon -//------------------------------------------------------------------------------ -struct Polygon : public MultiPartGeometry { - // Constructors - static Geometry Create(ArenaAllocator &alloc, uint32_t count, bool has_z, bool has_m); - static Geometry CreateEmpty(bool has_z, bool has_m); - static Geometry CreateFromBox(ArenaAllocator &alloc, double minx, double miny, double maxx, double maxy); - - // Methods - static const Geometry &ExteriorRing(const Geometry &geom); - static Geometry &ExteriorRing(Geometry &geom); - - // Constants - static const constexpr GeometryType TYPE = GeometryType::POLYGON; -}; - -inline Geometry Polygon::Create(ArenaAllocator &alloc, uint32_t count, bool has_z, bool has_m) { - auto geom = Geometry::Create(alloc, TYPE, count, has_z, has_m); - for (uint32_t i = 0; i < count; i++) { - // Placement new - new (&Polygon::Part(geom, i)) Geometry(GeometryType::LINESTRING, has_z, has_m); - } - return geom; -} - -inline Geometry Polygon::CreateEmpty(bool has_z, bool has_m) { - return Geometry::CreateEmpty(TYPE, has_z, has_m); -} - -inline Geometry Polygon::CreateFromBox(ArenaAllocator &alloc, double minx, double miny, double maxx, double maxy) { - auto polygon = Polygon::Create(alloc, 1, false, false); - auto &ring = Polygon::Part(polygon, 0); - LineString::Resize(ring, alloc, 5); - LineString::SetVertex(ring, 0, {minx, miny}); - LineString::SetVertex(ring, 1, {minx, maxy}); - LineString::SetVertex(ring, 2, {maxx, maxy}); - LineString::SetVertex(ring, 3, {maxx, miny}); - LineString::SetVertex(ring, 4, {minx, miny}); - return polygon; -} - -inline Geometry &Polygon::ExteriorRing(Geometry &geom) { - D_ASSERT(geom.GetType() == TYPE); - D_ASSERT(Polygon::PartCount(geom) > 0); - return Polygon::Part(geom, 0); -} - -inline const Geometry &Polygon::ExteriorRing(const Geometry &geom) { - D_ASSERT(geom.GetType() == TYPE); - D_ASSERT(Polygon::PartCount(geom) > 0); - return Polygon::Part(geom, 0); -} - -//------------------------------------------------------------------------------ -// MultiPoint -//------------------------------------------------------------------------------ -struct MultiPoint : public CollectionGeometry { - static Geometry Create(ArenaAllocator &alloc, uint32_t count, bool has_z, bool has_m); - static Geometry CreateEmpty(bool has_z, bool has_m); - static Geometry Create(ArenaAllocator &alloc, vector &items, bool has_z, bool has_m); - - // Constants - static const constexpr GeometryType TYPE = GeometryType::MULTIPOINT; -}; - -inline Geometry MultiPoint::Create(ArenaAllocator &alloc, uint32_t count, bool has_z, bool has_m) { - auto geom = Geometry::Create(alloc, TYPE, count, has_z, has_m); - for (uint32_t i = 0; i < count; i++) { - // Placement new - new (&MultiPoint::Part(geom, i)) Geometry(GeometryType::POINT, has_z, has_m); - } - return geom; -} - -inline Geometry MultiPoint::CreateEmpty(bool has_z, bool has_m) { - return Geometry::CreateEmpty(TYPE, has_z, has_m); -} - -inline Geometry MultiPoint::Create(ArenaAllocator &alloc, vector &items, bool has_z, bool has_m) { - return CollectionGeometry::Create(alloc, TYPE, items, has_z, has_m); -} - -//------------------------------------------------------------------------------ -// MultiLineString -//------------------------------------------------------------------------------ -struct MultiLineString : public CollectionGeometry { - static Geometry Create(ArenaAllocator &alloc, uint32_t count, bool has_z, bool has_m); - static Geometry CreateEmpty(bool has_z, bool has_m); - static Geometry Create(ArenaAllocator &alloc, vector &items, bool has_z, bool has_m); - - static bool IsClosed(const Geometry &geom); - - // Constants - static const constexpr GeometryType TYPE = GeometryType::MULTILINESTRING; -}; - -inline Geometry MultiLineString::Create(ArenaAllocator &alloc, uint32_t count, bool has_z, bool has_m) { - auto geom = Geometry::Create(alloc, TYPE, count, has_z, has_m); - for (uint32_t i = 0; i < count; i++) { - // Placement new - new (&MultiLineString::Part(geom, i)) Geometry(GeometryType::LINESTRING, has_z, has_m); - } - return geom; -} - -inline Geometry MultiLineString::CreateEmpty(bool has_z, bool has_m) { - return Geometry::CreateEmpty(TYPE, has_z, has_m); -} - -inline Geometry MultiLineString::Create(ArenaAllocator &alloc, vector &items, bool has_z, bool has_m) { - return CollectionGeometry::Create(alloc, TYPE, items, has_z, has_m); -} - -inline bool MultiLineString::IsClosed(const Geometry &geom) { - if (MultiLineString::PartCount(geom) == 0) { - return false; - } - for (auto &part : MultiLineString::Parts(geom)) { - if (!LineString::IsClosed(part)) { - return false; - } - } - return true; -} - -//------------------------------------------------------------------------------ -// MultiPolygon -//------------------------------------------------------------------------------ -struct MultiPolygon : public CollectionGeometry { - static Geometry Create(ArenaAllocator &alloc, uint32_t count, bool has_z, bool has_m); - static Geometry CreateEmpty(bool has_z, bool has_m); - static Geometry Create(ArenaAllocator &alloc, vector &items, bool has_z, bool has_m); - - // Constants - static const constexpr GeometryType TYPE = GeometryType::MULTIPOLYGON; -}; - -inline Geometry MultiPolygon::Create(ArenaAllocator &alloc, uint32_t count, bool has_z, bool has_m) { - auto geom = Geometry::Create(alloc, TYPE, count, has_z, has_m); - for (uint32_t i = 0; i < count; i++) { - // Placement new - new (&MultiPolygon::Part(geom, i)) Geometry(GeometryType::POLYGON, has_z, has_m); - } - return geom; -} - -inline Geometry MultiPolygon::CreateEmpty(bool has_z, bool has_m) { - return Geometry::CreateEmpty(TYPE, has_z, has_m); -} - -inline Geometry MultiPolygon::Create(ArenaAllocator &alloc, vector &items, bool has_z, bool has_m) { - return CollectionGeometry::Create(alloc, TYPE, items, has_z, has_m); -} - -//------------------------------------------------------------------------------ -// GeometryCollection -//------------------------------------------------------------------------------ -struct GeometryCollection : public CollectionGeometry { - static Geometry Create(ArenaAllocator &alloc, uint32_t count, bool has_z, bool has_m); - static Geometry CreateEmpty(bool has_z, bool has_m); - static Geometry Create(ArenaAllocator &alloc, vector &items, bool has_z, bool has_m); - - // Constants - static const constexpr GeometryType TYPE = GeometryType::GEOMETRYCOLLECTION; -}; - -inline Geometry GeometryCollection::Create(ArenaAllocator &alloc, uint32_t count, bool has_z, bool has_m) { - auto geom = Geometry::Create(alloc, TYPE, count, has_z, has_m); - for (uint32_t i = 0; i < count; i++) { - // Placement new - new (&GeometryCollection::Part(geom, i)) Geometry(GeometryType::GEOMETRYCOLLECTION, has_z, has_m); - } - return geom; -} - -inline Geometry GeometryCollection::CreateEmpty(bool has_z, bool has_m) { - return Geometry::CreateEmpty(TYPE, has_z, has_m); -} - -inline Geometry GeometryCollection::Create(ArenaAllocator &alloc, vector &items, bool has_z, bool has_m) { - return CollectionGeometry::Create(alloc, TYPE, items, has_z, has_m); -} - -//------------------------------------------------------------------------------ -// Assertions -//------------------------------------------------------------------------------ - -static_assert(std::is_standard_layout::value, "Geometry must be standard layout"); - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/core/geometry/geometry_writer.hpp b/spatial/include/spatial/core/geometry/geometry_writer.hpp deleted file mode 100644 index 2cde435d..00000000 --- a/spatial/include/spatial/core/geometry/geometry_writer.hpp +++ /dev/null @@ -1,228 +0,0 @@ -#pragma once - -#include "spatial/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/util/cursor.hpp" -#include "spatial/core/util/math.hpp" -#include "spatial/core/geometry/geometry_type.hpp" -#include "spatial/core/geometry/geometry_processor.hpp" -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// WriteBuffer -//------------------------------------------------------------------------------ -class WriteBuffer { - ArenaAllocator &allocator; - data_ptr_t start; - uint32_t size; - uint32_t capacity; - -public: - explicit WriteBuffer(ArenaAllocator &allocator_p) : allocator(allocator_p), start(nullptr), size(0), capacity(0) { - } - - // Begin the buffer with an initial capacity - void Begin(bool has_z, bool has_m) { - // The default initial capacity is 32 (+ 16) bytes which is enough to fit a single point geometry - auto initial_capacity = 32 + (has_z ? sizeof(double) : 0) + (has_m ? sizeof(double) : 0); - size = 0; - start = allocator.ReallocateAligned(start, capacity, initial_capacity); - capacity = initial_capacity; - } - - // Shrink the buffer to the current size - void End() { - start = allocator.ReallocateAligned(start, capacity, size); - capacity = size; - } - - void AddCapacity(uint32_t capacity_p) { - start = allocator.ReallocateAligned(start, capacity, capacity + capacity_p); - capacity += capacity_p; - } - - void Write(const void *data, uint32_t write_size) { - if (size + write_size > capacity) { - auto new_capacity = capacity * 2; - start = allocator.ReallocateAligned(start, capacity, new_capacity); - capacity = new_capacity; - } - memcpy(start + size, data, write_size); - size += write_size; - } - - template - void Write(const T &value) { - Write(&value, sizeof(T)); - } - - // Offset has to be less than count - template - void WriteOffset(const T &value, uint32_t offset) { - if (offset + sizeof(T) > size) { - throw SerializationException("Offset out of bounds"); - } - memcpy(start + offset, &value, sizeof(T)); - } - uint32_t Size() const { - return size; - } - uint32_t Capacity() const { - return capacity; - } - data_ptr_t GetPtr() const { - return start; - } -}; - -//------------------------------------------------------------------------------ -// GeometryWriter -//------------------------------------------------------------------------------ -struct GeometryStats { - uint32_t vertex_count = 0; - double min_x = std::numeric_limits::max(); - double min_y = std::numeric_limits::max(); - double max_x = std::numeric_limits::lowest(); - double max_y = std::numeric_limits::lowest(); - - void Reset() { - vertex_count = 0; - min_x = std::numeric_limits::max(); - min_y = std::numeric_limits::max(); - max_x = std::numeric_limits::lowest(); - max_y = std::numeric_limits::lowest(); - } - - void Update(double x, double y) { - vertex_count++; - min_x = std::min(min_x, x); - min_y = std::min(min_y, y); - max_x = std::max(max_x, x); - max_y = std::max(max_y, y); - } -}; - -class GeometryWriter { -private: - WriteBuffer buffer; - bool has_z = false; - bool has_m = false; - GeometryType type; - uint32_t ring_count_offset = 0; - GeometryStats stats; - -public: - explicit GeometryWriter(ArenaAllocator &allocator_p) : buffer(allocator_p) { - } - - void Begin(GeometryType geom_type, bool has_z_dim, bool has_m_dim) { - has_z = has_z_dim; - has_m = has_m_dim; - type = geom_type; - buffer.Begin(has_z, has_m); - - buffer.Write(type); - buffer.Write(0); // properties - buffer.Write(0); // Hash - buffer.Write(0); // padding - - // We dont write the bbox yet, we will write it at the end, but we reserve space for it - if (type != GeometryType::POINT) { - buffer.Write(sizeof(float) * 4); - } - } - - string_t End() { - // Shrink the buffer to the actual size - buffer.End(); - - GeometryProperties properties; - properties.SetZ(has_z); - properties.SetM(has_m); - properties.SetBBox(false); - if (stats.vertex_count > 0 && type != GeometryType::POINT) { - properties.SetBBox(true); - } - - // Write the properties at the beginning of the buffer (after geometry type) - buffer.WriteOffset(properties, 1); - - if (properties.HasBBox()) { - // Write the bbox after the first 8 bytes - buffer.WriteOffset(MathUtil::DoubleToFloatDown(stats.min_x), 8); - buffer.WriteOffset(MathUtil::DoubleToFloatDown(stats.min_y), 12); - buffer.WriteOffset(MathUtil::DoubleToFloatUp(stats.max_x), 16); - buffer.WriteOffset(MathUtil::DoubleToFloatUp(stats.max_y), 20); - string_t blob = string_t {const_char_ptr_cast(buffer.GetPtr()), buffer.Size()}; - return blob; - } else { - // Move the header forwards by 16 bytes - auto start = buffer.GetPtr(); - std::memmove(start + 16, start, buffer.Size() - 16); - string_t blob = string_t {const_char_ptr_cast(start), buffer.Size()}; - return blob; - } - } - - void AddVertex(double x, double y) { - D_ASSERT(!has_z && !has_m); - buffer.Write(x); - buffer.Write(y); - - stats.Update(x, y); - } - - void AddVertex(double x, double y, double zm) { - D_ASSERT(has_z || has_m); - buffer.Write(x); - buffer.Write(y); - buffer.Write(zm); - - stats.Update(x, y); - } - - void AddVertex(double x, double y, double z, double m) { - D_ASSERT(has_z && has_m); - buffer.Write(x); - buffer.Write(y); - buffer.Write(z); - buffer.Write(m); - - stats.Update(x, y); - } - - void AddPoint(bool is_empty) { - buffer.Write(static_cast(GeometryType::POINT)); - buffer.Write(is_empty ? 0 : 1); - } - - void AddLineString(uint32_t vertex_count) { - buffer.Write(static_cast(GeometryType::LINESTRING)); - buffer.Write(vertex_count); - } - - void AddPolygon(uint32_t ring_count) { - buffer.Write(static_cast(GeometryType::POLYGON)); - buffer.Write(ring_count); - ring_count_offset = buffer.Size(); - for (auto i = 0; i < ring_count; i++) { - buffer.Write(0); - } - } - - void AddRing(uint32_t vertex_count) { - buffer.WriteOffset(vertex_count, ring_count_offset); - ring_count_offset += sizeof(uint32_t); - } - - void AddCollection(GeometryType collection_type, uint32_t item_count) { - buffer.Write(static_cast(collection_type)); - buffer.Write(item_count); - } -}; - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/core/geometry/shapefile_common.hpp b/spatial/include/spatial/core/geometry/shapefile_common.hpp deleted file mode 100644 index 8b137891..00000000 --- a/spatial/include/spatial/core/geometry/shapefile_common.hpp +++ /dev/null @@ -1 +0,0 @@ - diff --git a/spatial/include/spatial/core/geometry/vertex_processor.hpp b/spatial/include/spatial/core/geometry/vertex_processor.hpp deleted file mode 100644 index 4d154cd8..00000000 --- a/spatial/include/spatial/core/geometry/vertex_processor.hpp +++ /dev/null @@ -1,21 +0,0 @@ -#pragma once - -#include "spatial/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/util/cursor.hpp" -#include "spatial/core/geometry/geometry_type.hpp" - -namespace spatial { - -namespace core { - -// TODO: Implement this -struct VertexProcessor { - static geometry_t Process(const geometry_t &geom, const VertexVector &vertices) { - throw NotImplementedException("VertexProcessor::Process"); - } -}; - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/core/geometry/wkb_reader.hpp b/spatial/include/spatial/core/geometry/wkb_reader.hpp deleted file mode 100644 index c50fdfc5..00000000 --- a/spatial/include/spatial/core/geometry/wkb_reader.hpp +++ /dev/null @@ -1,52 +0,0 @@ -#pragma once -#include "spatial/common.hpp" -#include "spatial/core/geometry/geometry.hpp" - -namespace spatial { - -namespace core { - -class WKBReader { -private: - ArenaAllocator &arena; - bool has_any_z; - bool has_any_m; - - struct WKBType { - GeometryType type; - bool has_z; - bool has_m; - }; - - // Primitives - uint32_t ReadInt(Cursor &cursor, bool little_endian); - double ReadDouble(Cursor &cursor, bool little_endian); - WKBType ReadType(Cursor &cursor, bool little_endian); - void ReadVertices(Cursor &cursor, bool little_endian, bool has_z, bool has_m, Geometry &geometry); - - // Geometries - Geometry ReadPoint(Cursor &cursor, bool little_endian, bool has_z, bool has_m); - Geometry ReadLineString(Cursor &cursor, bool little_endian, bool has_z, bool has_m); - Geometry ReadPolygon(Cursor &cursor, bool little_endian, bool has_z, bool has_m); - Geometry ReadMultiPoint(Cursor &cursor, bool little_endian, bool has_z, bool has_m); - Geometry ReadMultiLineString(Cursor &cursor, bool little_endian, bool has_z, bool has_m); - Geometry ReadMultiPolygon(Cursor &cursor, bool little_endian, bool has_z, bool has_m); - Geometry ReadGeometryCollection(Cursor &cursor, bool little_endian, bool has_z, bool has_m); - Geometry ReadGeometry(Cursor &cursor); - -public: - explicit WKBReader(ArenaAllocator &arena) : arena(arena) { - } - Geometry Deserialize(const string_t &wkb); - Geometry Deserialize(const_data_ptr_t wkb, uint32_t size); - bool GeomHasZ() const { - return has_any_z; - } - bool GeomHasM() const { - return has_any_m; - }; -}; - -} // namespace core - -} // namespace spatial diff --git a/spatial/include/spatial/core/geometry/wkt_reader.hpp b/spatial/include/spatial/core/geometry/wkt_reader.hpp deleted file mode 100644 index 015ac9b2..00000000 --- a/spatial/include/spatial/core/geometry/wkt_reader.hpp +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once -#include "spatial/common.hpp" -#include "spatial/core/geometry/geometry.hpp" - -namespace spatial { - -namespace core { - -class WKTReader { -private: - ArenaAllocator &arena; - const char *cursor; - const char *start; - const char *end; - bool zm_set; - bool has_z; - bool has_m; - - string GetErrorContext(); - bool TryParseDouble(double &data); - double ParseDouble(); - string ParseWord(); - bool Match(char c); - bool MatchCI(const char *str); - void Expect(char c); - void ParseVertex(vector &coords); - pair> ParseVertices(); - - Geometry ParsePoint(); - Geometry ParseLineString(); - Geometry ParsePolygon(); - Geometry ParseMultiPoint(); - Geometry ParseMultiLineString(); - Geometry ParseMultiPolygon(); - Geometry ParseGeometryCollection(); - void CheckZM(); - Geometry ParseGeometry(); - Geometry ParseWKT(); - -public: - explicit WKTReader(ArenaAllocator &arena) : arena(arena), cursor(nullptr) { - } - bool GeomHasZ() const { - return has_z; - } - bool GeomHasM() const { - return has_m; - } - Geometry Parse(const string_t &wkt); -}; - -} // namespace core - -} // namespace spatial diff --git a/spatial/include/spatial/core/io/shapefile.hpp b/spatial/include/spatial/core/io/shapefile.hpp deleted file mode 100644 index e359d485..00000000 --- a/spatial/include/spatial/core/io/shapefile.hpp +++ /dev/null @@ -1,103 +0,0 @@ -#pragma once -#include "shapefil.h" - -namespace spatial { - -namespace core { - -struct SHPHandleDeleter { - void operator()(SHPInfo *info) { - if (info) { - SHPClose(info); - } - } -}; -using SHPHandlePtr = unique_ptr; - -struct DBFHandleDeleter { - void operator()(DBFInfo *info) { - if (info) { - DBFClose(info); - } - } -}; - -using DBFHandlePtr = unique_ptr; - -struct SHPObjectDeleter { - void operator()(SHPObject *obj) { - if (obj) { - SHPDestroyObject(obj); - } - } -}; - -using SHPObjectPtr = unique_ptr; - -DBFHandlePtr OpenDBFFile(FileSystem &fs, const string &filename); -SHPHandlePtr OpenSHPFile(FileSystem &fs, const string &filename); - -enum class AttributeEncoding { - UTF8, - LATIN1, - BLOB, -}; - -struct EncodingUtil { - static inline uint8_t GetUTF8ByteLength(data_t first_char) { - if (first_char < 0x80) - return 1; - if (!(first_char & 0x20)) - return 2; - if (!(first_char & 0x10)) - return 3; - if (!(first_char & 0x08)) - return 4; - if (!(first_char & 0x04)) - return 5; - return 6; - } - static inline data_t UTF8ToLatin1Char(const_data_ptr_t ptr) { - auto len = GetUTF8ByteLength(*ptr); - if (len == 1) { - return *ptr; - } - uint32_t res = static_cast(*ptr & (0xff >> (len + 1))) << ((len - 1) * 6); - while (--len) { - res |= (*(++ptr) - 0x80) << ((len - 1) * 6); - } - // TODO: Throw exception instead if character can't be encoded? - return res > 0xff ? '?' : static_cast(res); - } - - // Convert UTF-8 to ISO-8859-1 - // out must be at least the size of in - static void UTF8ToLatin1Buffer(const_data_ptr_t in, data_ptr_t out) { - while (*in) { - *out++ = UTF8ToLatin1Char(in); - } - *out = 0; - } - - // convert ISO-8859-1 to UTF-8 - // mind = blown - // out must be at least 2x the size of in - static idx_t LatinToUTF8Buffer(const_data_ptr_t in, data_ptr_t out) { - idx_t len = 0; - while (*in) { - if (*in < 128) { - *out++ = *in++; - len += 1; - } else { - *out++ = 0xc2 + (*in > 0xbf); - *out++ = (*in++ & 0x3f) + 0x80; - len += 2; - } - } - return len; - } -}; - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/core/layout_benchmark/test.hpp b/spatial/include/spatial/core/layout_benchmark/test.hpp deleted file mode 100644 index 1138bbb5..00000000 --- a/spatial/include/spatial/core/layout_benchmark/test.hpp +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include "spatial/common.hpp" - -namespace spatial { - -namespace core { - -struct LayoutBenchmark { - static void Register(ClientContext &context); -}; - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/core/module.hpp b/spatial/include/spatial/core/module.hpp deleted file mode 100644 index c6b69790..00000000 --- a/spatial/include/spatial/core/module.hpp +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once -#include "spatial/common.hpp" -#include "spatial/core/geometry/geometry.hpp" - -namespace spatial { - -namespace core { - -struct CoreModule { -public: - static void Register(DatabaseInstance &db); -}; - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/core/optimizer_rules.hpp b/spatial/include/spatial/core/optimizer_rules.hpp deleted file mode 100644 index 654d0faa..00000000 --- a/spatial/include/spatial/core/optimizer_rules.hpp +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once -#include "spatial/common.hpp" - -namespace spatial { - -namespace core { - -struct CoreOptimizerRules { -public: - static void Register(DatabaseInstance &db); -}; - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/doc_util.hpp b/spatial/include/spatial/doc_util.hpp deleted file mode 100644 index 86fca3eb..00000000 --- a/spatial/include/spatial/doc_util.hpp +++ /dev/null @@ -1,30 +0,0 @@ -#pragma once -#include "duckdb.hpp" -#include "duckdb/main/extension_util.hpp" - -namespace spatial { - -struct DocTag { - const char *key; - const char *value; -}; - -struct DocUtil { - static void AddDocumentation(duckdb::DatabaseInstance &db, const char *function_name, const char *description, - const char *example, const duckdb::unordered_map &tags, - duckdb::vector parameter_names = {}); - - // Abuse adding tags as a comment - template - static void AddDocumentation(duckdb::DatabaseInstance &db, const char *function_name, const char *description, - const char *example, const DocTag (&tags)[N], - duckdb::vector parameter_names = {}) { - duckdb::unordered_map tag_map; - for (size_t i = 0; i < N; i++) { - tag_map[tags[i].key] = tags[i].value; - } - AddDocumentation(db, function_name, description, example, tag_map, parameter_names); - } -}; - -} // namespace spatial diff --git a/spatial/include/spatial/gdal/file_handler.hpp b/spatial/include/spatial/gdal/file_handler.hpp deleted file mode 100644 index 2143ae93..00000000 --- a/spatial/include/spatial/gdal/file_handler.hpp +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include "spatial/common.hpp" -#include "duckdb/main/database.hpp" - -namespace spatial { - -namespace gdal { - -class DuckDBFileSystemHandler; - -class GDALClientContextState : public ClientContextState { - ClientContext &context; - string client_prefix; - DuckDBFileSystemHandler *fs_handler; - -public: - explicit GDALClientContextState(ClientContext &context); - ~GDALClientContextState() override; - void QueryEnd() override; - string GetPrefix(const string &value) const; - static GDALClientContextState &GetOrCreate(ClientContext &context); -}; - -} // namespace gdal - -} // namespace spatial diff --git a/spatial/include/spatial/gdal/functions.hpp b/spatial/include/spatial/gdal/functions.hpp deleted file mode 100644 index 0a4bb2a5..00000000 --- a/spatial/include/spatial/gdal/functions.hpp +++ /dev/null @@ -1,66 +0,0 @@ -#pragma once - -#include "duckdb/function/table/arrow.hpp" -#include "duckdb/parser/parsed_data/copy_info.hpp" -#include "duckdb/function/copy_function.hpp" -#include "duckdb/function/replacement_scan.hpp" - -#include "spatial/common.hpp" - -namespace spatial { - -namespace gdal { - -struct GdalTableFunction : ArrowTableFunction { -private: - static unique_ptr Bind(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names); - static void RenameColumns(vector &names); - - static unique_ptr InitGlobal(ClientContext &context, TableFunctionInitInput &input); - static unique_ptr InitLocal(ExecutionContext &context, TableFunctionInitInput &input, - GlobalTableFunctionState *global_state_p); - - static void Scan(ClientContext &context, TableFunctionInput &input, DataChunk &output); - - static idx_t MaxThreads(ClientContext &context, const FunctionData *bind_data_p); - - static unique_ptr Cardinality(ClientContext &context, const FunctionData *data); - - static unique_ptr ReplacementScan(ClientContext &context, ReplacementScanInput &input, - optional_ptr data); - -public: - static void Register(DatabaseInstance &db); -}; - -struct GdalDriversTableFunction { - - struct BindData : public TableFunctionData { - idx_t driver_count; - BindData(idx_t driver_count) : driver_count(driver_count) { - } - }; - - struct State : public GlobalTableFunctionState { - idx_t current_idx; - State() : current_idx(0) { - } - }; - static unique_ptr Init(ClientContext &context, TableFunctionInitInput &input); - static void Execute(ClientContext &context, TableFunctionInput &data_p, DataChunk &output); - static void Register(DatabaseInstance &db); - static unique_ptr Bind(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names); -}; -struct GdalCopyFunction { - static void Register(DatabaseInstance &db); -}; - -struct GdalMetadataFunction { - static void Register(DatabaseInstance &db); -}; - -} // namespace gdal - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/gdal/module.hpp b/spatial/include/spatial/gdal/module.hpp deleted file mode 100644 index 4e7b813f..00000000 --- a/spatial/include/spatial/gdal/module.hpp +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once -#include "spatial/common.hpp" - -namespace spatial { - -namespace gdal { - -struct GdalModule { -public: - static void Register(DatabaseInstance &db); -}; - -} // namespace gdal - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/geographiclib/functions.hpp b/spatial/include/spatial/geographiclib/functions.hpp deleted file mode 100644 index 4bc67c28..00000000 --- a/spatial/include/spatial/geographiclib/functions.hpp +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -#include "spatial/common.hpp" - -namespace spatial { - -namespace geographiclib { - -struct GeographicLibFunctions { -public: - static void Register(DatabaseInstance &db) { - RegisterDistance(db); - RegisterDistanceWithin(db); - RegisterLength(db); - RegisterArea(db); - RegisterPerimeter(db); - } - -private: - static void RegisterDistance(DatabaseInstance &db); - static void RegisterDistanceWithin(DatabaseInstance &db); - static void RegisterLength(DatabaseInstance &db); - static void RegisterArea(DatabaseInstance &db); - static void RegisterPerimeter(DatabaseInstance &db); -}; - -} // namespace geographiclib - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/geographiclib/module.hpp b/spatial/include/spatial/geographiclib/module.hpp deleted file mode 100644 index d8bd5bc2..00000000 --- a/spatial/include/spatial/geographiclib/module.hpp +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once -#include "spatial/common.hpp" - -namespace spatial { - -namespace geographiclib { - -struct GeographicLibModule { -public: - static void Register(DatabaseInstance &db); -}; - -} // namespace geographiclib - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/geos/functions/aggregate.hpp b/spatial/include/spatial/geos/functions/aggregate.hpp deleted file mode 100644 index 200d11cf..00000000 --- a/spatial/include/spatial/geos/functions/aggregate.hpp +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once -#include "spatial/common.hpp" - -namespace spatial { - -namespace geos { - -struct GeosAggregateFunctions { - static void Register(DatabaseInstance &db); -}; - -} // namespace geos - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/geos/functions/cast.hpp b/spatial/include/spatial/geos/functions/cast.hpp deleted file mode 100644 index 778926d9..00000000 --- a/spatial/include/spatial/geos/functions/cast.hpp +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once -#include "spatial/common.hpp" - -namespace spatial { - -namespace geos { - -struct GeosCastFunctions { - static void Register(DatabaseInstance &db); -}; - -} // namespace geos - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/geos/functions/common.hpp b/spatial/include/spatial/geos/functions/common.hpp deleted file mode 100644 index dd1be748..00000000 --- a/spatial/include/spatial/geos/functions/common.hpp +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once -#include "spatial/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -namespace spatial { - -namespace geos { - -struct GEOSFunctionLocalState : FunctionLocalState { -public: - GeosContextWrapper ctx; - ArenaAllocator arena; - -public: - explicit GEOSFunctionLocalState(ClientContext &context); - static unique_ptr Init(ExpressionState &state, const BoundFunctionExpression &expr, - FunctionData *bind_data); - static unique_ptr InitCast(CastLocalStateParameters ¶meters); - static GEOSFunctionLocalState &ResetAndGet(ExpressionState &state); - static GEOSFunctionLocalState &ResetAndGet(CastParameters ¶meters); -}; - -} // namespace geos - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/geos/functions/scalar.hpp b/spatial/include/spatial/geos/functions/scalar.hpp deleted file mode 100644 index 62aff4eb..00000000 --- a/spatial/include/spatial/geos/functions/scalar.hpp +++ /dev/null @@ -1,86 +0,0 @@ -#pragma once -#include "spatial/common.hpp" - -namespace spatial { - -namespace geos { - -struct GEOSScalarFunctions { -public: - static void Register(DatabaseInstance &db) { - RegisterStBoundary(db); - RegisterStBuffer(db); - RegisterStCentroid(db); - RegisterStContains(db); - RegisterStContainsProperly(db); - RegisterStConvexHull(db); - RegisterStCoveredBy(db); - RegisterStCovers(db); - RegisterStCrosses(db); - RegisterStDifference(db); - RegisterStDisjoint(db); - RegisterStDistance(db); - RegisterStDistanceWithin(db); - RegisterStEquals(db); - RegisterStEnvelope(db); - RegisterStIntersection(db); - RegisterStIntersects(db); - RegisterStIsRing(db); - RegisterStIsSimple(db); - RegisterStIsValid(db); - RegisterStLineMerge(db); - RegisterStMakeValid(db); - RegisterStNormalize(db); - RegisterStOverlaps(db); - RegisterStPointOnSurface(db); - RegisterStReducePrecision(db); - RegisterStRemoveRepeatedPoints(db); - RegisterStReverse(db); - RegisterStShortestLine(db); - RegisterStSimplifyPreserveTopology(db); - RegisterStSimplify(db); - RegisterStTouches(db); - RegisterStUnion(db); - RegisterStWithin(db); - } - -private: - static void RegisterStBoundary(DatabaseInstance &db); - static void RegisterStBuffer(DatabaseInstance &db); - static void RegisterStCentroid(DatabaseInstance &db); - static void RegisterStContains(DatabaseInstance &db); - static void RegisterStContainsProperly(DatabaseInstance &db); - static void RegisterStConvexHull(DatabaseInstance &db); - static void RegisterStCoveredBy(DatabaseInstance &db); - static void RegisterStCovers(DatabaseInstance &db); - static void RegisterStCrosses(DatabaseInstance &db); - static void RegisterStDifference(DatabaseInstance &db); - static void RegisterStDisjoint(DatabaseInstance &db); - static void RegisterStDistance(DatabaseInstance &db); - static void RegisterStDistanceWithin(DatabaseInstance &db); - static void RegisterStEquals(DatabaseInstance &db); - static void RegisterStEnvelope(DatabaseInstance &db); - static void RegisterStIntersection(DatabaseInstance &db); - static void RegisterStIntersects(DatabaseInstance &db); - static void RegisterStIsRing(DatabaseInstance &db); - static void RegisterStIsSimple(DatabaseInstance &db); - static void RegisterStIsValid(DatabaseInstance &db); - static void RegisterStNormalize(DatabaseInstance &db); - static void RegisterStOverlaps(DatabaseInstance &db); - static void RegisterStPointOnSurface(DatabaseInstance &db); - static void RegisterStReducePrecision(DatabaseInstance &db); - static void RegisterStRemoveRepeatedPoints(DatabaseInstance &db); - static void RegisterStReverse(DatabaseInstance &db); - static void RegisterStLineMerge(DatabaseInstance &db); - static void RegisterStMakeValid(DatabaseInstance &db); - static void RegisterStShortestLine(DatabaseInstance &db); - static void RegisterStSimplifyPreserveTopology(DatabaseInstance &db); - static void RegisterStSimplify(DatabaseInstance &db); - static void RegisterStTouches(DatabaseInstance &db); - static void RegisterStUnion(DatabaseInstance &db); - static void RegisterStWithin(DatabaseInstance &db); -}; - -} // namespace geos - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/geos/geos_executor.hpp b/spatial/include/spatial/geos/geos_executor.hpp deleted file mode 100644 index 89350691..00000000 --- a/spatial/include/spatial/geos/geos_executor.hpp +++ /dev/null @@ -1,102 +0,0 @@ -#pragma once - -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -// Reflexive: Equals, Contains, Covers, CoveredBy, Intersects, Within -// Anti-reflexive: Disjoint -// Symmetric: Equals, Intersects, Crosses, Touches, Overlaps (and Disjoint? since Disjoint != Intersects) -// Transitive: Equals, Contains, Covers, CoveredBy, Within - -// Optimize binary predicate helper which use prepared geometry when one of the arguments is a constant -// This is much more common than you would think, e.g. joins produce a lot of constant vectors. -typedef char (*GEOSBinaryPredicate)(GEOSContextHandle_t ctx, const GEOSGeometry *left, const GEOSGeometry *right); -typedef char (*GEOSPreparedBinaryPredicate)(GEOSContextHandle_t ctx, const GEOSPreparedGeometry *left, - const GEOSGeometry *right); - -struct GEOSExecutor { - // Symmetric: left and right can be swapped - // So we prepare either if one is constant - static void ExecuteSymmetricPreparedBinary(GEOSFunctionLocalState &lstate, Vector &left, Vector &right, idx_t count, - Vector &result, GEOSBinaryPredicate normal, - GEOSPreparedBinaryPredicate prepared) { - auto &ctx = lstate.ctx.GetCtx(); - - if (left.GetVectorType() == VectorType::CONSTANT_VECTOR && - right.GetVectorType() != VectorType::CONSTANT_VECTOR && !ConstantVector::IsNull(left)) { - auto &left_blob = ConstantVector::GetData(left)[0]; - auto left_geom = lstate.ctx.Deserialize(left_blob); - auto left_prepared = make_uniq_geos(ctx, GEOSPrepare_r(ctx, left_geom.get())); - - UnaryExecutor::Execute(right, result, count, [&](geometry_t &right_blob) { - auto right_geometry = lstate.ctx.Deserialize(right_blob); - auto ok = prepared(ctx, left_prepared.get(), right_geometry.get()); - return ok == 1; - }); - } else if (right.GetVectorType() == VectorType::CONSTANT_VECTOR && - left.GetVectorType() != VectorType::CONSTANT_VECTOR && !ConstantVector::IsNull(right)) { - auto &right_blob = ConstantVector::GetData(right)[0]; - auto right_geom = lstate.ctx.Deserialize(right_blob); - auto right_prepared = make_uniq_geos(ctx, GEOSPrepare_r(ctx, right_geom.get())); - - UnaryExecutor::Execute(left, result, count, [&](geometry_t &left_blob) { - auto left_geometry = lstate.ctx.Deserialize(left_blob); - auto ok = prepared(ctx, right_prepared.get(), left_geometry.get()); - return ok == 1; - }); - } else { - BinaryExecutor::Execute( - left, right, result, count, [&](geometry_t &left_blob, geometry_t &right_blob) { - auto left_geometry = lstate.ctx.Deserialize(left_blob); - auto right_geometry = lstate.ctx.Deserialize(right_blob); - auto ok = normal(ctx, left_geometry.get(), right_geometry.get()); - return ok == 1; - }); - } - } - - // Non symmetric: left and right cannot be swapped - // So we only prepare left if left is constant - static void ExecuteNonSymmetricPreparedBinary(GEOSFunctionLocalState &lstate, Vector &left, Vector &right, - idx_t count, Vector &result, GEOSBinaryPredicate normal, - GEOSPreparedBinaryPredicate prepared) { - auto &ctx = lstate.ctx.GetCtx(); - - // Optimize: if one of the arguments is a constant, we can prepare it once and reuse it - if (left.GetVectorType() == VectorType::CONSTANT_VECTOR && - right.GetVectorType() != VectorType::CONSTANT_VECTOR && !ConstantVector::IsNull(left)) { - auto &left_blob = ConstantVector::GetData(left)[0]; - auto left_geom = lstate.ctx.Deserialize(left_blob); - auto left_prepared = make_uniq_geos(ctx, GEOSPrepare_r(ctx, left_geom.get())); - - UnaryExecutor::Execute(right, result, count, [&](geometry_t &right_blob) { - auto right_geometry = lstate.ctx.Deserialize(right_blob); - auto ok = prepared(ctx, left_prepared.get(), right_geometry.get()); - return ok == 1; - }); - } else { - BinaryExecutor::Execute( - left, right, result, count, [&](geometry_t &left_blob, geometry_t &right_blob) { - auto left_geometry = lstate.ctx.Deserialize(left_blob); - auto right_geometry = lstate.ctx.Deserialize(right_blob); - auto ok = normal(ctx, left_geometry.get(), right_geometry.get()); - return ok == 1; - }); - } - } -}; - -} // namespace geos - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/geos/geos_wrappers.hpp b/spatial/include/spatial/geos/geos_wrappers.hpp deleted file mode 100644 index c946a650..00000000 --- a/spatial/include/spatial/geos/geos_wrappers.hpp +++ /dev/null @@ -1,215 +0,0 @@ -#pragma once -#include "spatial/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "geos_c.h" - -namespace spatial { - -namespace geos { - -using namespace core; - -template -struct GeosDeleter { - GEOSContextHandle_t ctx; - void operator()(T *ptr) const = delete; -}; - -template <> -struct GeosDeleter { - GEOSContextHandle_t ctx; - void operator()(GEOSGeometry *ptr) const { - GEOSGeom_destroy_r(ctx, ptr); - } -}; - -template <> -struct GeosDeleter { - GEOSContextHandle_t ctx; - void operator()(GEOSPreparedGeometry *ptr) const { - GEOSPreparedGeom_destroy_r(ctx, ptr); - } -}; - -template <> -struct GeosDeleter { - GEOSContextHandle_t ctx; - void operator()(GEOSWKBReader_t *ptr) const { - GEOSWKBReader_destroy_r(ctx, ptr); - } -}; - -template <> -struct GeosDeleter { - GEOSContextHandle_t ctx; - void operator()(const GEOSPreparedGeometry *ptr) const { - GEOSPreparedGeom_destroy_r(ctx, ptr); - } -}; - -template -unique_ptr> make_uniq_geos(GEOSContextHandle_t ctx, T *ptr) { - return unique_ptr>(ptr, GeosDeleter {ctx}); -} - -using GeometryPtr = unique_ptr>; - -struct WKBReader { - GEOSContextHandle_t ctx; - GEOSWKBReader_t *reader; - - explicit WKBReader(GEOSContextHandle_t ctx) : ctx(ctx) { - reader = GEOSWKBReader_create_r(ctx); - } - - GeometryPtr Read(const unsigned char *wkb, size_t size) const { - auto geom = GEOSWKBReader_read_r(ctx, reader, wkb, size); - if (!geom) { - throw InvalidInputException("Could not read WKB"); - } - return make_uniq_geos(ctx, geom); - } - - GeometryPtr Read(string_t &wkb) const { - return Read((const unsigned char *)wkb.GetDataUnsafe(), wkb.GetSize()); - } - - ~WKBReader() { - GEOSWKBReader_destroy_r(ctx, reader); - } -}; - -struct WKBWriter { - GEOSContextHandle_t ctx; - GEOSWKBWriter_t *writer; - - explicit WKBWriter(GEOSContextHandle_t ctx) : ctx(ctx) { - writer = GEOSWKBWriter_create_r(ctx); - } - - void Write(const GeometryPtr &geom, std::ostream &stream) const { - size_t size = 0; - auto wkb = GEOSWKBWriter_write_r(ctx, writer, geom.get(), &size); - if (!wkb) { - throw InvalidInputException("Could not write WKB"); - } - stream.write((const char *)wkb, (long)size); - GEOSFree_r(ctx, wkb); - } - - string_t Write(const GeometryPtr &geom, Vector &vec) const { - std::stringstream buf; - Write(geom, buf); - return StringVector::AddStringOrBlob(vec, buf.str()); - } - - ~WKBWriter() { - GEOSWKBWriter_destroy_r(ctx, writer); - } -}; - -struct WKTReader { - GEOSContextHandle_t ctx; - GEOSWKTReader_t *reader; - - explicit WKTReader(GEOSContextHandle_t ctx) : ctx(ctx) { - reader = GEOSWKTReader_create_r(ctx); - } - - ~WKTReader() { - GEOSWKTReader_destroy_r(ctx, reader); - } - - GeometryPtr Read(string_t &wkt) const { - auto str = wkt.GetString(); - auto geom = GEOSWKTReader_read_r(ctx, reader, str.c_str()); - if (!geom) { - return nullptr; - } - return make_uniq_geos(ctx, geom); - } -}; - -struct WKTWriter { - GEOSContextHandle_t ctx; - GEOSWKTWriter_t *writer; - - explicit WKTWriter(GEOSContextHandle_t ctx) : ctx(ctx) { - writer = GEOSWKTWriter_create_r(ctx); - } - - ~WKTWriter() { - GEOSWKTWriter_destroy_r(ctx, writer); - } - - void SetTrim(bool trim) const { - GEOSWKTWriter_setTrim_r(ctx, writer, trim ? 1 : 0); - } - - void Write(const GeometryPtr &geom, std::ostream &stream) const { - auto wkt = GEOSWKTWriter_write_r(ctx, writer, geom.get()); - if (!wkt) { - throw InvalidInputException("Could not write WKT"); - } - stream << wkt; - GEOSFree_r(ctx, wkt); - } - - string_t Write(const GeometryPtr &geom, Vector &vec) const { - auto wkt = GEOSWKTWriter_write_r(ctx, writer, geom.get()); - if (!wkt) { - throw InvalidInputException("Could not write WKT"); - } - auto str = StringVector::AddStringOrBlob(vec, wkt); - GEOSFree_r(ctx, wkt); - return str; - } -}; - -struct GeosContextWrapper { -private: - GEOSContextHandle_t ctx; - -public: - GeosContextWrapper() { - ctx = GEOS_init_r(); - GEOSContext_setErrorMessageHandler_r(ctx, ErrorHandler, (void *)nullptr); - } - ~GeosContextWrapper() { - GEOS_finish_r(ctx); - } - - static void ErrorHandler(const char *message, void *userdata) { - throw InvalidInputException(message); - } - - inline const GEOSContextHandle_t &GetCtx() { - return ctx; - } - - WKBReader CreateWKBReader() const { - return WKBReader(ctx); - } - - WKBWriter CreateWKBWriter() const { - return WKBWriter(ctx); - } - - WKTWriter CreateWKTWriter() const { - return WKTWriter(ctx); - } - - WKTReader CreateWKTReader() const { - return WKTReader(ctx); - } - - unique_ptr> Deserialize(const geometry_t &blob); - geometry_t Serialize(Vector &result, const unique_ptr> &geom); -}; - -GEOSGeometry *DeserializeGEOSGeometry(const geometry_t &blob, GEOSContextHandle_t ctx); -geometry_t SerializeGEOSGeometry(Vector &result, const GEOSGeometry *geom, GEOSContextHandle_t ctx); - -} // namespace geos - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/geos/module.hpp b/spatial/include/spatial/geos/module.hpp deleted file mode 100644 index b9240030..00000000 --- a/spatial/include/spatial/geos/module.hpp +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once -#include "spatial/common.hpp" - -namespace spatial { - -namespace geos { - -struct GeosModule { -public: - static void Register(DatabaseInstance &db); -}; - -} // namespace geos - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/proj/functions.hpp b/spatial/include/spatial/proj/functions.hpp deleted file mode 100644 index b3fd00ea..00000000 --- a/spatial/include/spatial/proj/functions.hpp +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -#include "spatial/common.hpp" - -namespace spatial { - -namespace proj { - -struct ProjFunctions { - -public: - static void Register(DatabaseInstance &db); -}; - -} // namespace proj - -} // namespace spatial \ No newline at end of file diff --git a/spatial/include/spatial/proj/module.hpp b/spatial/include/spatial/proj/module.hpp deleted file mode 100644 index d86c965e..00000000 --- a/spatial/include/spatial/proj/module.hpp +++ /dev/null @@ -1,19 +0,0 @@ -#pragma once - -#include "spatial/common.hpp" - -#include "proj.h" - -namespace spatial { - -namespace proj { - -struct ProjModule { -public: - static PJ_CONTEXT *GetThreadProjContext(); - static void Register(DatabaseInstance &db); -}; - -} // namespace proj - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/CMakeLists.txt b/spatial/src/CMakeLists.txt deleted file mode 100644 index 75d1024e..00000000 --- a/spatial/src/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -add_subdirectory(spatial) - -set(EXTENSION_SOURCES - ${EXTENSION_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/spatial_extension.cpp - PARENT_SCOPE) diff --git a/spatial/src/spatial/CMakeLists.txt b/spatial/src/spatial/CMakeLists.txt deleted file mode 100644 index 25ba1e69..00000000 --- a/spatial/src/spatial/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -add_subdirectory(core) -add_subdirectory(gdal) -add_subdirectory(geos) -add_subdirectory(proj) -add_subdirectory(geographiclib) - -set(EXTENSION_SOURCES - ${EXTENSION_SOURCES} - PARENT_SCOPE -) \ No newline at end of file diff --git a/spatial/src/spatial/core/CMakeLists.txt b/spatial/src/spatial/core/CMakeLists.txt deleted file mode 100644 index 35aa5719..00000000 --- a/spatial/src/spatial/core/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -add_subdirectory(geometry) -add_subdirectory(index) -add_subdirectory(functions) -add_subdirectory(io) -add_subdirectory(util) - -set(EXTENSION_SOURCES - ${EXTENSION_SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/function_builder.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/geoarrow.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/module.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/types.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/optimizer_rules.cpp - PARENT_SCOPE -) \ No newline at end of file diff --git a/spatial/src/spatial/core/function_builder.cpp b/spatial/src/spatial/core/function_builder.cpp deleted file mode 100644 index 7367287b..00000000 --- a/spatial/src/spatial/core/function_builder.cpp +++ /dev/null @@ -1,78 +0,0 @@ -#include "spatial/core/function_builder.hpp" -#include "duckdb/catalog/catalog_entry/function_entry.hpp" -#include "duckdb/main/extension_util.hpp" - -namespace spatial { - -namespace core { - -static string RemoveIndentAndTrailingWhitespace(const char *text) { - string result; - // Skip any empty first newlines if present - while (*text == '\n') { - text++; - } - - // Track indent length - auto indent_start = text; - while (isspace(*text) && *text != '\n') { - text++; - } - auto indent_len = text - indent_start; - while (*text) { - result += *text; - if (*text++ == '\n') { - // Remove all indentation, but only if it matches the first line's indentation - bool matched_indent = true; - for (auto i = 0; i < indent_len; i++) { - if (*text != indent_start[i]) { - matched_indent = false; - break; - } - } - if (matched_indent) { - text += indent_len; - } - } - } - - // Also remove any trailing whitespace - result.erase(result.find_last_not_of(" \n\r\t") + 1); - return result; -} - -void FunctionBuilder::Register(DatabaseInstance &db, const char *name, ScalarFunctionBuilder &builder) { - // Register the function - ExtensionUtil::RegisterFunction(db, std::move(builder.set)); - - // Also add the parameter names. We need to access the catalog entry for this. - auto &catalog = Catalog::GetSystemCatalog(db); - auto transaction = CatalogTransaction::GetSystemTransaction(db); - auto &schema = catalog.GetSchema(transaction, DEFAULT_SCHEMA); - auto catalog_entry = schema.GetEntry(transaction, CatalogType::SCALAR_FUNCTION_ENTRY, name); - if (!catalog_entry) { - // This should not happen, we just registered the function - throw InternalException("Function with name \"%s\" not found in FunctionBuilder::AddScalar", name); - } - - auto &func_entry = catalog_entry->Cast(); - - // Insert all descriptions - for (auto &desc : builder.descriptions) { - - desc.description = RemoveIndentAndTrailingWhitespace(desc.description.c_str()); - for (auto &ex : desc.examples) { - ex = RemoveIndentAndTrailingWhitespace(ex.c_str()); - } - - func_entry.descriptions.push_back(desc); - } - - if (!builder.tags.empty()) { - func_entry.tags = std::move(builder.tags); - } -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/CMakeLists.txt b/spatial/src/spatial/core/functions/CMakeLists.txt deleted file mode 100644 index d7b1692c..00000000 --- a/spatial/src/spatial/core/functions/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -add_subdirectory(aggregate) -add_subdirectory(scalar) -add_subdirectory(cast) -add_subdirectory(table) - -set(EXTENSION_SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/common.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/macros.cpp - ${EXTENSION_SOURCES} - PARENT_SCOPE -) \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/aggregate/CMakeLists.txt b/spatial/src/spatial/core/functions/aggregate/CMakeLists.txt deleted file mode 100644 index ce00dcb2..00000000 --- a/spatial/src/spatial/core/functions/aggregate/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -set(EXTENSION_SOURCES - ${EXTENSION_SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/st_extent_agg.cpp - PARENT_SCOPE -) \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/cast/CMakeLists.txt b/spatial/src/spatial/core/functions/cast/CMakeLists.txt deleted file mode 100644 index 6848cecf..00000000 --- a/spatial/src/spatial/core/functions/cast/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -set(EXTENSION_SOURCES - ${EXTENSION_SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/dimensional_cast.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/geometry_cast.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/varchar_cast.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/wkb_cast.cpp - PARENT_SCOPE -) \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/cast/dimensional_cast.cpp b/spatial/src/spatial/core/functions/cast/dimensional_cast.cpp deleted file mode 100644 index 896d689a..00000000 --- a/spatial/src/spatial/core/functions/cast/dimensional_cast.cpp +++ /dev/null @@ -1,41 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/functions/cast.hpp" -#include "spatial/core/types.hpp" - -#include "duckdb/function/cast/cast_function_set.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// POINT(N) -> POINT_2D -//------------------------------------------------------------------------------ -static bool ToPoint2DCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { - auto &children = StructVector::GetEntries(source); - auto &x_child = children[0]; - auto &y_child = children[1]; - - auto &result_children = StructVector::GetEntries(result); - auto &result_x_child = result_children[0]; - auto &result_y_child = result_children[1]; - result_x_child->Reference(*x_child); - result_y_child->Reference(*y_child); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } - return true; -} - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreCastFunctions::RegisterDimensionalCasts(DatabaseInstance &db) { - ExtensionUtil::RegisterCastFunction(db, GeoTypes::POINT_4D(), GeoTypes::POINT_2D(), ToPoint2DCast, 1); - ExtensionUtil::RegisterCastFunction(db, GeoTypes::POINT_3D(), GeoTypes::POINT_2D(), ToPoint2DCast, 1); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/cast/geometry_cast.cpp b/spatial/src/spatial/core/functions/cast/geometry_cast.cpp deleted file mode 100644 index d31caf3e..00000000 --- a/spatial/src/spatial/core/functions/cast/geometry_cast.cpp +++ /dev/null @@ -1,276 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/functions/cast.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/functions/common.hpp" -#include "duckdb/common/exception/conversion_exception.hpp" -#include "duckdb/function/cast/cast_function_set.hpp" -#include "duckdb/common/vector_operations/generic_executor.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// Point2D -> Geometry -//------------------------------------------------------------------------------ -static bool Point2DToGeometryCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { - using POINT_TYPE = StructTypeBinary; - using GEOMETRY_TYPE = PrimitiveType; - - auto &lstate = GeometryFunctionLocalState::ResetAndGet(parameters); - auto &arena = lstate.arena; - - GenericExecutor::ExecuteUnary(source, result, count, [&](POINT_TYPE &point) { - auto geom = Point::CreateFromVertex(arena, VertexXY {point.a_val, point.b_val}); - return Geometry::Serialize(geom, result); - }); - return true; -} - -//------------------------------------------------------------------------------ -// Geometry -> Point2D -//------------------------------------------------------------------------------ -static bool GeometryToPoint2DCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { - using POINT_TYPE = StructTypeBinary; - using GEOMETRY_TYPE = PrimitiveType; - - auto &lstate = GeometryFunctionLocalState::ResetAndGet(parameters); - auto &arena = lstate.arena; - - GenericExecutor::ExecuteUnary(source, result, count, [&](GEOMETRY_TYPE &geometry) { - auto geom = Geometry::Deserialize(arena, geometry.val); - if (geom.GetType() != GeometryType::POINT) { - throw ConversionException("Cannot cast non-point GEOMETRY to POINT_2D"); - } - if (Point::IsEmpty(geom)) { - // TODO: Maybe make this return NULL instead - throw ConversionException("Cannot cast empty point GEOMETRY to POINT_2D"); - } - auto vertex = Point::GetVertex(geom); - return POINT_TYPE {vertex.x, vertex.y}; - }); - return true; -} - -//------------------------------------------------------------------------------ -// LineString2D -> Geometry -//------------------------------------------------------------------------------ -static bool LineString2DToGeometryCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { - - auto &lstate = GeometryFunctionLocalState::ResetAndGet(parameters); - auto &arena = lstate.arena; - - auto &coord_vec = ListVector::GetEntry(source); - auto &coord_vec_children = StructVector::GetEntries(coord_vec); - auto x_data = FlatVector::GetData(*coord_vec_children[0]); - auto y_data = FlatVector::GetData(*coord_vec_children[1]); - - UnaryExecutor::Execute(source, result, count, [&](list_entry_t &line) { - auto geom = LineString::Create(arena, line.length, false, false); - for (idx_t i = 0; i < line.length; i++) { - auto x = x_data[line.offset + i]; - auto y = y_data[line.offset + i]; - LineString::SetVertex(geom, i, VertexXY {x, y}); - } - return Geometry::Serialize(geom, result); - }); - return true; -} - -//------------------------------------------------------------------------------ -// Geometry -> LineString2D -//------------------------------------------------------------------------------ -static bool GeometryToLineString2DCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(parameters); - auto &arena = lstate.arena; - - auto &coord_vec = ListVector::GetEntry(result); - auto &coord_vec_children = StructVector::GetEntries(coord_vec); - auto x_data = FlatVector::GetData(*coord_vec_children[0]); - auto y_data = FlatVector::GetData(*coord_vec_children[1]); - - idx_t total_coords = 0; - UnaryExecutor::Execute(source, result, count, [&](geometry_t &geom) { - if (geom.GetType() != GeometryType::LINESTRING) { - throw ConversionException("Cannot cast non-linestring GEOMETRY to LINESTRING_2D"); - } - - auto line = Geometry::Deserialize(arena, geom); - auto line_size = LineString::VertexCount(line); - - auto entry = list_entry_t(total_coords, line_size); - total_coords += line_size; - ListVector::Reserve(result, total_coords); - - for (idx_t i = 0; i < line_size; i++) { - auto vertex = LineString::GetVertex(line, i); - x_data[entry.offset + i] = vertex.x; - y_data[entry.offset + i] = vertex.y; - } - return entry; - }); - ListVector::SetListSize(result, total_coords); - return true; -} - -//------------------------------------------------------------------------------ -// Polygon2D -> Geometry -//------------------------------------------------------------------------------ -static bool Polygon2DToGeometryCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(parameters); - auto &arena = lstate.arena; - - auto &ring_vec = ListVector::GetEntry(source); - auto ring_entries = ListVector::GetData(ring_vec); - auto &coord_vec = ListVector::GetEntry(ring_vec); - auto &coord_vec_children = StructVector::GetEntries(coord_vec); - auto x_data = FlatVector::GetData(*coord_vec_children[0]); - auto y_data = FlatVector::GetData(*coord_vec_children[1]); - - UnaryExecutor::Execute(source, result, count, [&](list_entry_t &poly) { - auto geom = Polygon::Create(arena, poly.length, false, false); - - for (idx_t i = 0; i < poly.length; i++) { - auto ring = ring_entries[poly.offset + i]; - auto &ring_array = Polygon::Part(geom, i); - LineString::Resize(ring_array, arena, ring.length); - for (idx_t j = 0; j < ring.length; j++) { - auto x = x_data[ring.offset + j]; - auto y = y_data[ring.offset + j]; - LineString::SetVertex(ring_array, j, VertexXY {x, y}); - } - } - return Geometry::Serialize(geom, result); - }); - return true; -} - -//------------------------------------------------------------------------------ -// Geometry -> Polygon2D -//------------------------------------------------------------------------------ -static bool GeometryToPolygon2DCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(parameters); - auto &arena = lstate.arena; - - auto &ring_vec = ListVector::GetEntry(result); - - idx_t total_rings = 0; - idx_t total_coords = 0; - - UnaryExecutor::Execute(source, result, count, [&](geometry_t &geom) { - if (geom.GetType() != GeometryType::POLYGON) { - throw ConversionException("Cannot cast non-polygon GEOMETRY to POLYGON_2D"); - } - auto poly = Geometry::Deserialize(arena, geom); - auto poly_size = Polygon::PartCount(poly); - auto poly_entry = list_entry_t(total_rings, poly_size); - - ListVector::Reserve(result, total_rings + poly_size); - - for (idx_t ring_idx = 0; ring_idx < poly_size; ring_idx++) { - auto ring = Polygon::Part(poly, ring_idx); - auto ring_size = ring.Count(); - auto ring_entry = list_entry_t(total_coords, ring_size); - - ListVector::Reserve(ring_vec, total_coords + ring_size); - - auto ring_entries = ListVector::GetData(ring_vec); - auto &coord_vec = ListVector::GetEntry(ring_vec); - auto &coord_vec_children = StructVector::GetEntries(coord_vec); - auto x_data = FlatVector::GetData(*coord_vec_children[0]); - auto y_data = FlatVector::GetData(*coord_vec_children[1]); - - ring_entries[total_rings + ring_idx] = ring_entry; - - for (idx_t j = 0; j < ring_size; j++) { - auto vert = LineString::GetVertex(ring, j); - x_data[ring_entry.offset + j] = vert.x; - y_data[ring_entry.offset + j] = vert.y; - } - total_coords += ring_size; - } - total_rings += poly_size; - - return poly_entry; - }); - - ListVector::SetListSize(result, total_rings); - ListVector::SetListSize(ring_vec, total_coords); - - return true; -} - -//------------------------------------------------------------------------------ -// BOX_2D -> Geometry -//------------------------------------------------------------------------------ -// Since BOX is a non-standard geometry type, we serialize it as a polygon -static bool Box2DToGeometryCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(parameters); - auto &arena = lstate.arena; - using BOX_TYPE = StructTypeQuaternary; - using GEOMETRY_TYPE = PrimitiveType; - GenericExecutor::ExecuteUnary(source, result, count, [&](BOX_TYPE &box) { - auto minx = box.a_val; - auto miny = box.b_val; - auto maxx = box.c_val; - auto maxy = box.d_val; - auto polygon = Polygon::CreateFromBox(arena, minx, miny, maxx, maxy); - return Geometry::Serialize(polygon, result); - }); - return true; -} - -static bool Box2DFToGeometryCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(parameters); - auto &arena = lstate.arena; - using BOX_TYPE = StructTypeQuaternary; - using GEOMETRY_TYPE = PrimitiveType; - GenericExecutor::ExecuteUnary(source, result, count, [&](BOX_TYPE &box) { - auto minx = box.a_val; - auto miny = box.b_val; - auto maxx = box.c_val; - auto maxy = box.d_val; - auto polygon = Polygon::CreateFromBox(arena, minx, miny, maxx, maxy); - return Geometry::Serialize(polygon, result); - }); - return true; -} - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreCastFunctions::RegisterGeometryCasts(DatabaseInstance &db) { - ExtensionUtil::RegisterCastFunction( - db, GeoTypes::GEOMETRY(), GeoTypes::LINESTRING_2D(), - BoundCastInfo(GeometryToLineString2DCast, nullptr, GeometryFunctionLocalState::InitCast), 1); - ExtensionUtil::RegisterCastFunction( - db, GeoTypes::LINESTRING_2D(), GeoTypes::GEOMETRY(), - BoundCastInfo(LineString2DToGeometryCast, nullptr, GeometryFunctionLocalState::InitCast), 1); - - ExtensionUtil::RegisterCastFunction( - db, GeoTypes::GEOMETRY(), GeoTypes::POINT_2D(), - BoundCastInfo(GeometryToPoint2DCast, nullptr, GeometryFunctionLocalState::InitCast), 1); - ExtensionUtil::RegisterCastFunction( - db, GeoTypes::POINT_2D(), GeoTypes::GEOMETRY(), - BoundCastInfo(Point2DToGeometryCast, nullptr, GeometryFunctionLocalState::InitCast), 1); - - ExtensionUtil::RegisterCastFunction( - db, GeoTypes::GEOMETRY(), GeoTypes::POLYGON_2D(), - BoundCastInfo(GeometryToPolygon2DCast, nullptr, GeometryFunctionLocalState::InitCast), 1); - ExtensionUtil::RegisterCastFunction( - db, GeoTypes::POLYGON_2D(), GeoTypes::GEOMETRY(), - BoundCastInfo(Polygon2DToGeometryCast, nullptr, GeometryFunctionLocalState::InitCast), 1); - - ExtensionUtil::RegisterCastFunction( - db, GeoTypes::BOX_2D(), GeoTypes::GEOMETRY(), - BoundCastInfo(Box2DToGeometryCast, nullptr, GeometryFunctionLocalState::InitCast), 1); - - ExtensionUtil::RegisterCastFunction( - db, GeoTypes::BOX_2DF(), GeoTypes::GEOMETRY(), - BoundCastInfo(Box2DFToGeometryCast, nullptr, GeometryFunctionLocalState::InitCast), 1); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/cast/varchar_cast.cpp b/spatial/src/spatial/core/functions/cast/varchar_cast.cpp deleted file mode 100644 index c11a6f7a..00000000 --- a/spatial/src/spatial/core/functions/cast/varchar_cast.cpp +++ /dev/null @@ -1,394 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/functions/cast.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry_processor.hpp" -#include "spatial/core/geometry/wkt_reader.hpp" -#include "spatial/core/util/math.hpp" -#include "duckdb/function/cast/cast_function_set.hpp" -#include "duckdb/common/operator/cast_operators.hpp" -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "duckdb/common/error_data.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// POINT_2D -> VARCHAR -//------------------------------------------------------------------------------ -void CoreVectorOperations::Point2DToVarchar(Vector &source, Vector &result, idx_t count) { - using POINT_TYPE = StructTypeBinary; - using VARCHAR_TYPE = PrimitiveType; - - GenericExecutor::ExecuteUnary(source, result, count, [&](POINT_TYPE &point) { - auto x = point.a_val; - auto y = point.b_val; - - if (std::isnan(x) || std::isnan(y)) { - return StringVector::AddString(result, "POINT EMPTY"); - } - - return StringVector::AddString(result, StringUtil::Format("POINT (%s)", MathUtil::format_coord(x, y))); - }); -} - -//------------------------------------------------------------------------------ -// LINESTRING_2D -> VARCHAR -//------------------------------------------------------------------------------ -void CoreVectorOperations::LineString2DToVarchar(Vector &source, Vector &result, idx_t count) { - auto &inner = ListVector::GetEntry(source); - auto &children = StructVector::GetEntries(inner); - auto x_data = FlatVector::GetData(*children[0]); - auto y_data = FlatVector::GetData(*children[1]); - - UnaryExecutor::Execute(source, result, count, [&](list_entry_t &line) { - auto offset = line.offset; - auto length = line.length; - - if (length == 0) { - return StringVector::AddString(result, "LINESTRING EMPTY"); - } - - string result_str = "LINESTRING ("; - for (idx_t i = offset; i < offset + length; i++) { - result_str += MathUtil::format_coord(x_data[i], y_data[i]); - if (i < offset + length - 1) { - result_str += ", "; - } - } - result_str += ")"; - return StringVector::AddString(result, result_str); - }); -} - -//------------------------------------------------------------------------------ -// POLYGON_2D -> VARCHAR -//------------------------------------------------------------------------------ -void CoreVectorOperations::Polygon2DToVarchar(Vector &source, Vector &result, idx_t count) { - auto &poly_vector = source; - auto &ring_vector = ListVector::GetEntry(poly_vector); - auto ring_entries = ListVector::GetData(ring_vector); - auto &point_vector = ListVector::GetEntry(ring_vector); - auto &point_children = StructVector::GetEntries(point_vector); - auto x_data = FlatVector::GetData(*point_children[0]); - auto y_data = FlatVector::GetData(*point_children[1]); - - UnaryExecutor::Execute(poly_vector, result, count, [&](list_entry_t polygon_entry) { - auto offset = polygon_entry.offset; - auto length = polygon_entry.length; - - if (length == 0) { - return StringVector::AddString(result, "POLYGON EMPTY"); - } - - string result_str = "POLYGON ("; - for (idx_t i = offset; i < offset + length; i++) { - auto ring_entry = ring_entries[i]; - auto ring_offset = ring_entry.offset; - auto ring_length = ring_entry.length; - result_str += "("; - for (idx_t j = ring_offset; j < ring_offset + ring_length; j++) { - result_str += MathUtil::format_coord(x_data[j], y_data[j]); - if (j < ring_offset + ring_length - 1) { - result_str += ", "; - } - } - result_str += ")"; - if (i < offset + length - 1) { - result_str += ", "; - } - } - result_str += ")"; - return StringVector::AddString(result, result_str); - }); -} - -//------------------------------------------------------------------------------ -// BOX_2D -> VARCHAR -//------------------------------------------------------------------------------ -void CoreVectorOperations::Box2DToVarchar(Vector &source, Vector &result, idx_t count) { - using BOX_TYPE = StructTypeQuaternary; - using VARCHAR_TYPE = PrimitiveType; - GenericExecutor::ExecuteUnary(source, result, count, [&](BOX_TYPE &box) { - return StringVector::AddString(result, - StringUtil::Format("BOX(%s, %s)", MathUtil::format_coord(box.a_val, box.b_val), - MathUtil::format_coord(box.c_val, box.d_val))); - }); -} - -//------------------------------------------------------------------------------ -// GEOMETRY -> VARCHAR -//------------------------------------------------------------------------------ -class GeometryTextProcessor final : GeometryProcessor { -private: - string text; - -public: - void OnVertexData(const VertexData &data) { - auto &dims = data.data; - auto &strides = data.stride; - auto count = data.count; - - if (HasZ() && HasM()) { - for (uint32_t i = 0; i < count; i++) { - auto x = Load(dims[0] + i * strides[0]); - auto y = Load(dims[1] + i * strides[1]); - auto z = Load(dims[2] + i * strides[2]); - auto m = Load(dims[3] + i * strides[3]); - text += MathUtil::format_coord(x, y, z, m); - if (i < count - 1) { - text += ", "; - } - } - } else if (HasZ()) { - for (uint32_t i = 0; i < count; i++) { - auto x = Load(dims[0] + i * strides[0]); - auto y = Load(dims[1] + i * strides[1]); - auto zm = Load(dims[2] + i * strides[2]); - text += MathUtil::format_coord(x, y, zm); - if (i < count - 1) { - text += ", "; - } - } - } else if (HasM()) { - for (uint32_t i = 0; i < count; i++) { - auto x = Load(dims[0] + i * strides[0]); - auto y = Load(dims[1] + i * strides[1]); - auto m = Load(dims[3] + i * strides[3]); - text += MathUtil::format_coord(x, y, m); - if (i < count - 1) { - text += ", "; - } - } - } else { - for (uint32_t i = 0; i < count; i++) { - auto x = Load(dims[0] + i * strides[0]); - auto y = Load(dims[1] + i * strides[1]); - text += MathUtil::format_coord(x, y); - - if (i < count - 1) { - text += ", "; - } - } - } - } - - void ProcessPoint(const VertexData &data, bool in_typed_collection) override { - if (!in_typed_collection) { - text += "POINT"; - if (HasZ() && HasM()) { - text += " ZM"; - } else if (HasZ()) { - text += " Z"; - } else if (HasM()) { - text += " M"; - } - text += " "; - } - - if (data.count == 0) { - text += "EMPTY"; - } else if (in_typed_collection) { - OnVertexData(data); - } else { - text += "("; - OnVertexData(data); - text += ")"; - } - } - - void ProcessLineString(const VertexData &data, bool in_typed_collection) override { - if (!in_typed_collection) { - text += "LINESTRING"; - if (HasZ() && HasM()) { - text += " ZM"; - } else if (HasZ()) { - text += " Z"; - } else if (HasM()) { - text += " M"; - } - text += " "; - } - - if (data.count == 0) { - text += "EMPTY"; - } else { - text += "("; - OnVertexData(data); - text += ")"; - } - } - - void ProcessPolygon(PolygonState &state, bool in_typed_collection) override { - if (!in_typed_collection) { - text += "POLYGON"; - if (HasZ() && HasM()) { - text += " ZM"; - } else if (HasZ()) { - text += " Z"; - } else if (HasM()) { - text += " M"; - } - text += " "; - } - - if (state.RingCount() == 0) { - text += "EMPTY"; - } else { - text += "("; - bool first = true; - while (!state.IsDone()) { - if (!first) { - text += ", "; - } - first = false; - text += "("; - auto vertices = state.Next(); - OnVertexData(vertices); - text += ")"; - } - text += ")"; - } - } - - void ProcessCollection(CollectionState &state, bool) override { - bool collection_is_typed = false; - switch (CurrentType()) { - case GeometryType::MULTIPOINT: - text += "MULTIPOINT"; - collection_is_typed = true; - break; - case GeometryType::MULTILINESTRING: - text += "MULTILINESTRING"; - collection_is_typed = true; - break; - case GeometryType::MULTIPOLYGON: - text += "MULTIPOLYGON"; - collection_is_typed = true; - break; - case GeometryType::GEOMETRYCOLLECTION: - text += "GEOMETRYCOLLECTION"; - collection_is_typed = false; - break; - default: - throw InvalidInputException("Invalid geometry type"); - } - - if (HasZ() && HasM()) { - text += " ZM"; - } else if (HasZ()) { - text += " Z"; - } else if (HasM()) { - text += " M"; - } - - if (state.ItemCount() == 0) { - text += " EMPTY"; - } else { - text += " ("; - bool first = true; - while (!state.IsDone()) { - if (!first) { - text += ", "; - } - first = false; - state.Next(collection_is_typed); - } - text += ")"; - } - } - - const string &Execute(const geometry_t &geom) { - text.clear(); - Process(geom, false); - return text; - } -}; - -void CoreVectorOperations::GeometryToVarchar(Vector &source, Vector &result, idx_t count) { - GeometryTextProcessor processor; - UnaryExecutor::Execute(source, result, count, [&](geometry_t &input) { - auto text = processor.Execute(input); - return StringVector::AddString(result, text); - }); -} - -static bool TextToGeometryCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { - - auto &lstate = GeometryFunctionLocalState::ResetAndGet(parameters); - WKTReader reader(lstate.arena); - - bool success = true; - UnaryExecutor::ExecuteWithNulls( - source, result, count, [&](string_t &wkt, ValidityMask &mask, idx_t idx) { - try { - auto geom = reader.Parse(wkt); - return Geometry::Serialize(geom, result); - } catch (InvalidInputException &e) { - if (success) { - success = false; - ErrorData error(e); - HandleCastError::AssignError(error.RawMessage(), parameters.error_message); - } - mask.SetInvalid(idx); - return geometry_t {}; - } - }); - return success; -} - -//------------------------------------------------------------------------------ -// CASTS -//------------------------------------------------------------------------------ -static bool Point2DToVarcharCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { - CoreVectorOperations::Point2DToVarchar(source, result, count); - return true; -} - -static bool LineString2DToVarcharCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { - CoreVectorOperations::LineString2DToVarchar(source, result, count); - return true; -} - -static bool Polygon2DToVarcharCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { - CoreVectorOperations::Polygon2DToVarchar(source, result, count); - return true; -} - -static bool Box2DToVarcharCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { - CoreVectorOperations::Box2DToVarchar(source, result, count); - return true; -} - -static bool GeometryToVarcharCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { - CoreVectorOperations::GeometryToVarchar(source, result, count); - return true; -} - -void CoreCastFunctions::RegisterVarcharCasts(DatabaseInstance &db) { - - ExtensionUtil::RegisterCastFunction(db, GeoTypes::POINT_2D(), LogicalType::VARCHAR, - BoundCastInfo(Point2DToVarcharCast), 1); - - ExtensionUtil::RegisterCastFunction(db, GeoTypes::LINESTRING_2D(), LogicalType::VARCHAR, - BoundCastInfo(LineString2DToVarcharCast), 1); - - ExtensionUtil::RegisterCastFunction(db, GeoTypes::POLYGON_2D(), LogicalType::VARCHAR, - BoundCastInfo(Polygon2DToVarcharCast), 1); - - ExtensionUtil::RegisterCastFunction(db, GeoTypes::BOX_2D(), LogicalType::VARCHAR, BoundCastInfo(Box2DToVarcharCast), - 1); - - ExtensionUtil::RegisterCastFunction(db, GeoTypes::GEOMETRY(), LogicalType::VARCHAR, - BoundCastInfo(GeometryToVarcharCast), 1); - - ExtensionUtil::RegisterCastFunction( - db, LogicalType::VARCHAR, core::GeoTypes::GEOMETRY(), - BoundCastInfo(TextToGeometryCast, nullptr, GeometryFunctionLocalState::InitCast)); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/cast/wkb_cast.cpp b/spatial/src/spatial/core/functions/cast/wkb_cast.cpp deleted file mode 100644 index c1a41b99..00000000 --- a/spatial/src/spatial/core/functions/cast/wkb_cast.cpp +++ /dev/null @@ -1,75 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/functions/cast.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/wkb_writer.hpp" -#include "spatial/core/geometry/wkb_reader.hpp" - -#include "duckdb/function/cast/cast_function_set.hpp" -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "duckdb/common/operator/cast_operators.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// WKB -> GEOMETRY -//------------------------------------------------------------------------------ -static bool WKBToGeometryCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { - - auto &lstate = GeometryFunctionLocalState::ResetAndGet(parameters); - WKBReader reader(lstate.arena); - - bool success = true; - UnaryExecutor::ExecuteWithNulls( - source, result, count, [&](string_t input, ValidityMask &mask, idx_t idx) { - try { - auto geom = reader.Deserialize(input); - return Geometry::Serialize(geom, result); - } catch (SerializationException &e) { - if (success) { - success = false; - ErrorData error(e); - HandleCastError::AssignError(error.RawMessage(), parameters.error_message); - } - mask.SetInvalid(idx); - return geometry_t {}; - } - }); - return success; -} - -//------------------------------------------------------------------------------ -// GEOMETRY -> WKB -//------------------------------------------------------------------------------ -static bool GeometryToWKBCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { - - UnaryExecutor::Execute(source, result, count, - [&](geometry_t input) { return WKBWriter::Write(input, result); }); - - return true; -} - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreCastFunctions::RegisterWKBCasts(DatabaseInstance &db) { - // Geometry <-> WKB is explicitly castable - ExtensionUtil::RegisterCastFunction(db, GeoTypes::GEOMETRY(), GeoTypes::WKB_BLOB(), - BoundCastInfo(GeometryToWKBCast)); - - ExtensionUtil::RegisterCastFunction( - db, GeoTypes::WKB_BLOB(), GeoTypes::GEOMETRY(), - BoundCastInfo(WKBToGeometryCast, nullptr, GeometryFunctionLocalState::InitCast)); - - // WKB -> BLOB is implicitly castable - ExtensionUtil::RegisterCastFunction(db, GeoTypes::WKB_BLOB(), LogicalType::BLOB, DefaultCasts::ReinterpretCast, 1); - - // Geometry -> BLOB is explicitly castable - ExtensionUtil::RegisterCastFunction(db, GeoTypes::GEOMETRY(), LogicalType::BLOB, DefaultCasts::ReinterpretCast); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/common.cpp b/spatial/src/spatial/core/functions/common.cpp deleted file mode 100644 index 9e0ceee9..00000000 --- a/spatial/src/spatial/core/functions/common.cpp +++ /dev/null @@ -1,34 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/functions/common.hpp" - -namespace spatial { - -namespace core { - -GeometryFunctionLocalState::GeometryFunctionLocalState(ClientContext &context) : arena(BufferAllocator::Get(context)) { -} - -unique_ptr -GeometryFunctionLocalState::Init(ExpressionState &state, const BoundFunctionExpression &expr, FunctionData *bind_data) { - return make_uniq(state.GetContext()); -} - -unique_ptr GeometryFunctionLocalState::InitCast(CastLocalStateParameters ¶meters) { - return make_uniq(*parameters.context.get()); -} - -GeometryFunctionLocalState &GeometryFunctionLocalState::ResetAndGet(CastParameters ¶meters) { - auto &local_state = parameters.local_state->Cast(); - local_state.arena.Reset(); - return local_state; -} - -GeometryFunctionLocalState &GeometryFunctionLocalState::ResetAndGet(ExpressionState &state) { - auto &local_state = ExecuteFunctionState::GetFunctionState(state)->Cast(); - local_state.arena.Reset(); - return local_state; -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/macros.cpp b/spatial/src/spatial/core/functions/macros.cpp deleted file mode 100644 index 6dc1f563..00000000 --- a/spatial/src/spatial/core/functions/macros.cpp +++ /dev/null @@ -1,71 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/functions/macros.hpp" - -#include "duckdb/catalog/default/default_functions.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarMacros::Register(DatabaseInstance &db) { - - // Sadly this doesnt work since we cant overload macros :( - /* - // Macros - DefaultMacro macros[] = { - // Accessors - {DEFAULT_SCHEMA, "x", {"geom", nullptr}, "ST_X(geom)"}, - {DEFAULT_SCHEMA, "y", {"geom", nullptr}, "ST_Y(geom)"}, - {DEFAULT_SCHEMA, "z", {"geom", nullptr}, "ST_Z(geom)"}, - {DEFAULT_SCHEMA, "m", {"geom", nullptr}, "ST_M(geom)"}, - {DEFAULT_SCHEMA, "xmin", {"geom", nullptr}, "ST_XMin(geom)"}, - {DEFAULT_SCHEMA, "ymin", {"geom", nullptr}, "ST_YMin(geom)"}, - {DEFAULT_SCHEMA, "zmin", {"geom", nullptr}, "ST_ZMin(geom)"}, - {DEFAULT_SCHEMA, "mmin", {"geom", nullptr}, "ST_MMin(geom)"}, - {DEFAULT_SCHEMA, "xmax", {"geom", nullptr}, "ST_XMax(geom)"}, - {DEFAULT_SCHEMA, "ymax", {"geom", nullptr}, "ST_YMax(geom)"}, - {DEFAULT_SCHEMA, "zmax", {"geom", nullptr}, "ST_ZMax(geom)"}, - {DEFAULT_SCHEMA, "mmax", {"geom", nullptr}, "ST_MMax(geom)"}, - // Predicates - {DEFAULT_SCHEMA, "overlaps", {"a", "b", nullptr}, "ST_Overlaps(a, b)"}, - {DEFAULT_SCHEMA, "contains", {"a", "b", nullptr}, "ST_Contains(a, b)"}, - {DEFAULT_SCHEMA, "intersects", {"a", "b", nullptr}, "ST_Intersects(a, b)"}, - {DEFAULT_SCHEMA, "within", {"a", "b", nullptr}, "ST_Within(a, b)"}, - {DEFAULT_SCHEMA, "covers", {"a", "b", nullptr}, "ST_Covers(a, b)"}, - {DEFAULT_SCHEMA, "crosses", {"a", "b", nullptr}, "ST_Crosses(a, b)"}, - {DEFAULT_SCHEMA, "touches", {"a", "b", nullptr}, "ST_Touches(a, b)"}, - - // Properties - {DEFAULT_SCHEMA, "area", {"geom", nullptr}, "ST_Area(geom)"}, - {DEFAULT_SCHEMA, "length", {"geom", nullptr}, "ST_Length(geom)"}, - {DEFAULT_SCHEMA, "centroid", {"geom", nullptr}, "ST_Centroid(geom)"}, - {DEFAULT_SCHEMA, "is_empty", {"geom", nullptr}, "ST_IsEmpty(geom)"}, - {DEFAULT_SCHEMA, "is_simple", {"geom", nullptr}, "ST_IsSimple(geom)"}, - {DEFAULT_SCHEMA, "is_valid", {"geom", nullptr}, "ST_IsValid(geom)"}, - {DEFAULT_SCHEMA, "is_closed", {"geom", nullptr}, "ST_IsClosed(geom)"}, - // Conversion - {DEFAULT_SCHEMA, "as_wkb", {"geom", nullptr}, "ST_AsWKB(geom)"}, - {DEFAULT_SCHEMA, "as_text", {"geom", nullptr}, "ST_AsText(geom)"}, - {DEFAULT_SCHEMA, "as_json", {"geom", nullptr}, "ST_AsGeoJSON(geom)"}, - // Misc - {DEFAULT_SCHEMA, "flip", {"geom", nullptr}, "ST_FlipCoordinates(geom)"}, - {DEFAULT_SCHEMA, "reverse", {"geom", nullptr}, "ST_Reverse(geom)"}, - {DEFAULT_SCHEMA, "extent", {"geom", nullptr}, "ST_Extent(geom)"}, - {DEFAULT_SCHEMA, "buffer", {"geom", "radius", nullptr}, "ST_Buffer(geom, radius)"}, - {DEFAULT_SCHEMA, "simplify", {"geom", "tolerance", nullptr}, "ST_Simplify(geom, tolerance)"} - }; - - for(auto ¯o : macros) { - auto info = DefaultFunctionGenerator::CreateInternalMacroInfo(macro); - info->on_conflict = OnCreateConflict::ALTER_ON_CONFLICT; - ExtensionUtil::RegisterFunction(db, *info); - } - */ -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/scalar/CMakeLists.txt b/spatial/src/spatial/core/functions/scalar/CMakeLists.txt deleted file mode 100644 index eb5256a2..00000000 --- a/spatial/src/spatial/core/functions/scalar/CMakeLists.txt +++ /dev/null @@ -1,49 +0,0 @@ -set(EXTENSION_SOURCES - ${EXTENSION_SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/st_area.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_asgeojson.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_ashexwkb.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_assvg.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_astext.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_aswkb.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_centroid.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_collect.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_collectionextract.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_contains.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_dimension.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_distance.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_distance_sphere.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_dump.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_endpoint.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_extent.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_exteriorring.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_flipcoordinates.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_force.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_geometrytype.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_geomfromhexwkb.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_geomfromtext.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_geomfromwkb.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_has.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_hilbert.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_intersects.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_intersects_extent.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_is_closed.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_length.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_makeenvelope.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_makeline.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_makepolygon.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_multi.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_ngeometries.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_ninteriorrings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_npoints.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_perimeter.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_point.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_pointn.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_points.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_quadkey.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_removerepeatedpoints.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_startpoint.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_xyzm.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_isempty.cpp - PARENT_SCOPE -) \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/scalar/st_area.cpp b/spatial/src/spatial/core/functions/scalar/st_area.cpp deleted file mode 100644 index 440cdbe4..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_area.cpp +++ /dev/null @@ -1,215 +0,0 @@ -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/geometry/geometry_processor.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// POLYGON_2D -//------------------------------------------------------------------------------ -static void PolygonAreaFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - - auto &input = args.data[0]; - auto count = args.size(); - - auto &ring_vec = ListVector::GetEntry(input); - auto ring_entries = ListVector::GetData(ring_vec); - auto &coord_vec = ListVector::GetEntry(ring_vec); - auto &coord_vec_children = StructVector::GetEntries(coord_vec); - auto x_data = FlatVector::GetData(*coord_vec_children[0]); - auto y_data = FlatVector::GetData(*coord_vec_children[1]); - - UnaryExecutor::Execute(input, result, count, [&](list_entry_t polygon) { - auto polygon_offset = polygon.offset; - auto polygon_length = polygon.length; - - bool first = true; - double area = 0; - for (idx_t ring_idx = polygon_offset; ring_idx < polygon_offset + polygon_length; ring_idx++) { - auto ring = ring_entries[ring_idx]; - auto ring_offset = ring.offset; - auto ring_length = ring.length; - - double sum = 0; - for (idx_t coord_idx = ring_offset; coord_idx < ring_offset + ring_length - 1; coord_idx++) { - sum += (x_data[coord_idx] * y_data[coord_idx + 1]) - (x_data[coord_idx + 1] * y_data[coord_idx]); - } - sum = std::abs(sum); - if (first) { - // Add outer ring - area = sum * 0.5; - first = false; - } else { - // Subtract holes - area -= sum * 0.5; - } - } - return area; - }); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// LINESTRING_2D -//------------------------------------------------------------------------------ -static void LineStringAreaFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto input = args.data[0]; - UnaryExecutor::Execute(input, result, args.size(), [](list_entry_t) { return 0; }); -} - -//------------------------------------------------------------------------------ -// POINT_2D -//------------------------------------------------------------------------------ -static void PointAreaFunction(DataChunk &args, ExpressionState &state, Vector &result) { - using POINT_TYPE = StructTypeBinary; - using AREA_TYPE = PrimitiveType; - GenericExecutor::ExecuteUnary(args.data[0], result, args.size(), - [](POINT_TYPE) { return 0; }); -} - -//------------------------------------------------------------------------------ -// BOX_2D -//------------------------------------------------------------------------------ -static void BoxAreaFunction(DataChunk &args, ExpressionState &state, Vector &result) { - - using BOX_TYPE = StructTypeQuaternary; - using AREA_TYPE = PrimitiveType; - - GenericExecutor::ExecuteUnary(args.data[0], result, args.size(), [&](BOX_TYPE &box) { - auto minx = box.a_val; - auto miny = box.b_val; - auto maxx = box.c_val; - auto maxy = box.d_val; - return AREA_TYPE {(maxx - minx) * (maxy - miny)}; - }); -} - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -class AreaProcessor final : GeometryProcessor { - static double ProcessVertices(const VertexData &vertices) { - if (vertices.count < 3) { - return 0.0; - } - - const auto count = vertices.count; - const auto x_data = vertices.data[0]; - const auto y_data = vertices.data[1]; - const auto x_stride = vertices.stride[0]; - const auto y_stride = vertices.stride[1]; - - double signed_area = 0.0; - - auto x0 = Load(x_data); - - for (uint32_t i = 1; i < count - 1; ++i) { - auto x1 = Load(x_data + i * x_stride); - auto y1 = Load(y_data + (i + 1) * y_stride); - auto y2 = Load(y_data + (i - 1) * y_stride); - signed_area += (x1 - x0) * (y2 - y1); - } - - signed_area *= 0.5; - - return std::abs(signed_area); - } - - double ProcessPoint(const VertexData &vertices) override { - return 0.0; - } - - double ProcessLineString(const VertexData &vertices) override { - return 0.0; - } - - double ProcessPolygon(PolygonState &state) override { - double sum = 0.0; - if (!state.IsDone()) { - sum += ProcessVertices(state.Next()); - } - while (!state.IsDone()) { - sum -= ProcessVertices(state.Next()); - } - return std::abs(sum); - } - - double ProcessCollection(CollectionState &state) override { - switch (CurrentType()) { - case GeometryType::MULTIPOLYGON: - case GeometryType::GEOMETRYCOLLECTION: { - double sum = 0; - while (!state.IsDone()) { - sum += state.Next(); - } - return sum; - } - default: - return 0.0; - } - } - -public: - double Execute(const geometry_t &geometry) { - return Process(geometry); - } -}; - -static void GeometryAreaFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &input = args.data[0]; - auto count = args.size(); - AreaProcessor processor; - UnaryExecutor::Execute(input, result, count, - [&](const geometry_t &input) { return processor.Execute(input); }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ - -static constexpr const char *DOC_DESCRIPTION = R"( - Compute the area of a geometry. - - Returns `0.0` for any geometry that is not a `POLYGON`, `MULTIPOLYGON` or `GEOMETRYCOLLECTION` containing polygon geometries. - The area is in the same units as the spatial reference system of the geometry. - - The `POINT_2D` and `LINESTRING_2D` overloads of this function always return `0.0` but are included for completeness. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - select ST_Area('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'::geometry); - -- 1.0 -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}}; - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStArea(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_Area"); - set.AddFunction(ScalarFunction({GeoTypes::POINT_2D()}, LogicalType::DOUBLE, PointAreaFunction)); - set.AddFunction(ScalarFunction({GeoTypes::LINESTRING_2D()}, LogicalType::DOUBLE, LineStringAreaFunction)); - set.AddFunction(ScalarFunction({GeoTypes::POLYGON_2D()}, LogicalType::DOUBLE, PolygonAreaFunction)); - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::DOUBLE, GeometryAreaFunction)); - set.AddFunction(ScalarFunction({GeoTypes::BOX_2D()}, LogicalType::DOUBLE, BoxAreaFunction)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Area", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/scalar/st_asgeojson.cpp b/spatial/src/spatial/core/functions/scalar/st_asgeojson.cpp deleted file mode 100644 index 64489ccd..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_asgeojson.cpp +++ /dev/null @@ -1,622 +0,0 @@ -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/planner/expression/bound_function_expression.hpp" -#include "duckdb/common/types/cast_helpers.hpp" - -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/types.hpp" - -#include "yyjson.h" - -#include "spatial/core/geometry/geometry_processor.hpp" - -namespace spatial { - -namespace core { - -using namespace duckdb_yyjson_spatial; - -class JSONAllocator { - // Stolen from the JSON extension :) -public: - explicit JSONAllocator(ArenaAllocator &allocator) - : allocator(allocator), yyjson_allocator({Allocate, Reallocate, Free, &allocator}) { - } - - inline yyjson_alc *GetYYJSONAllocator() { - return &yyjson_allocator; - } - - void Reset() { - allocator.Reset(); - } - -private: - static inline void *Allocate(void *ctx, size_t size) { - auto alloc = (ArenaAllocator *)ctx; - return alloc->AllocateAligned(size); - } - - static inline void *Reallocate(void *ctx, void *ptr, size_t old_size, size_t size) { - auto alloc = (ArenaAllocator *)ctx; - return alloc->ReallocateAligned((data_ptr_t)ptr, old_size, size); - } - - static inline void Free(void *ctx, void *ptr) { - // NOP because ArenaAllocator can't free - } - -private: - ArenaAllocator &allocator; - yyjson_alc yyjson_allocator; -}; - -//------------------------------------------------------------------------------ -// GEOMETRY -> GEOJSON Fragment -//------------------------------------------------------------------------------ - -static void VerticesToGeoJSON(const Geometry &vertices, yyjson_mut_doc *doc, yyjson_mut_val *arr) { - // TODO: If the vertexvector is empty, do we null, add an empty array or a pair of NaN? - auto haz_z = vertices.GetProperties().HasZ(); - auto has_m = vertices.GetProperties().HasM(); - // GeoJSON does not support M values, so we ignore them - if (haz_z && has_m) { - for (uint32_t i = 0; i < vertices.Count(); i++) { - auto coord = yyjson_mut_arr(doc); - auto vert = SinglePartGeometry::GetVertex(vertices, i); - yyjson_mut_arr_add_real(doc, coord, vert.x); - yyjson_mut_arr_add_real(doc, coord, vert.y); - yyjson_mut_arr_add_real(doc, coord, vert.z); - yyjson_mut_arr_append(arr, coord); - } - } else if (haz_z) { - for (uint32_t i = 0; i < vertices.Count(); i++) { - auto coord = yyjson_mut_arr(doc); - auto vert = SinglePartGeometry::GetVertex(vertices, i); - yyjson_mut_arr_add_real(doc, coord, vert.x); - yyjson_mut_arr_add_real(doc, coord, vert.y); - yyjson_mut_arr_add_real(doc, coord, vert.z); - yyjson_mut_arr_append(arr, coord); - } - } else if (has_m) { - for (uint32_t i = 0; i < vertices.Count(); i++) { - auto coord = yyjson_mut_arr(doc); - auto vert = SinglePartGeometry::GetVertex(vertices, i); - yyjson_mut_arr_add_real(doc, coord, vert.x); - yyjson_mut_arr_add_real(doc, coord, vert.y); - yyjson_mut_arr_append(arr, coord); - } - } else { - for (uint32_t i = 0; i < vertices.Count(); i++) { - auto coord = yyjson_mut_arr(doc); - auto vert = SinglePartGeometry::GetVertex(vertices, i); - yyjson_mut_arr_add_real(doc, coord, vert.x); - yyjson_mut_arr_add_real(doc, coord, vert.y); - yyjson_mut_arr_append(arr, coord); - } - } -} - -struct ToGeoJSONFunctor { - - // Point - static void Case(Geometry::Tags::Point, const Geometry &point, yyjson_mut_doc *doc, yyjson_mut_val *obj) { - yyjson_mut_obj_add_str(doc, obj, "type", "Point"); - - auto coords = yyjson_mut_arr(doc); - yyjson_mut_obj_add_val(doc, obj, "coordinates", coords); - if (!Point::IsEmpty(point)) { - auto has_z = point.GetProperties().HasZ(); - auto has_m = point.GetProperties().HasM(); - if (has_z && has_m) { - auto vert = Point::GetVertex(point); - yyjson_mut_arr_add_real(doc, coords, vert.x); - yyjson_mut_arr_add_real(doc, coords, vert.y); - yyjson_mut_arr_add_real(doc, coords, vert.z); - } else if (has_z) { - auto vert = Point::GetVertex(point); - yyjson_mut_arr_add_real(doc, coords, vert.x); - yyjson_mut_arr_add_real(doc, coords, vert.y); - yyjson_mut_arr_add_real(doc, coords, vert.z); - } else if (has_m) { - auto vert = Point::GetVertex(point); - yyjson_mut_arr_add_real(doc, coords, vert.x); - yyjson_mut_arr_add_real(doc, coords, vert.y); - } else { - auto vert = Point::GetVertex(point); - yyjson_mut_arr_add_real(doc, coords, vert.x); - yyjson_mut_arr_add_real(doc, coords, vert.y); - } - } - } - - // LineString - static void Case(Geometry::Tags::LineString, const Geometry &line, yyjson_mut_doc *doc, yyjson_mut_val *obj) { - yyjson_mut_obj_add_str(doc, obj, "type", "LineString"); - - auto coords = yyjson_mut_arr(doc); - yyjson_mut_obj_add_val(doc, obj, "coordinates", coords); - VerticesToGeoJSON(line, doc, coords); - } - - // Polygon - static void Case(Geometry::Tags::Polygon, const Geometry &poly, yyjson_mut_doc *doc, yyjson_mut_val *obj) { - yyjson_mut_obj_add_str(doc, obj, "type", "Polygon"); - - auto coords = yyjson_mut_arr(doc); - yyjson_mut_obj_add_val(doc, obj, "coordinates", coords); - for (uint32_t i = 0; i < Polygon::PartCount(poly); i++) { - auto &ring = Polygon::Part(poly, i); - auto ring_coords = yyjson_mut_arr(doc); - VerticesToGeoJSON(ring, doc, ring_coords); - yyjson_mut_arr_append(coords, ring_coords); - } - } - - // MultiPoint - static void Case(Geometry::Tags::MultiPoint, const Geometry &mpoint, yyjson_mut_doc *doc, yyjson_mut_val *obj) { - yyjson_mut_obj_add_str(doc, obj, "type", "MultiPoint"); - - auto coords = yyjson_mut_arr(doc); - yyjson_mut_obj_add_val(doc, obj, "coordinates", coords); - for (uint32_t i = 0; i < MultiPoint::PartCount(mpoint); i++) { - auto &point = MultiPoint::Part(mpoint, i); - VerticesToGeoJSON(point, doc, coords); - } - } - - // MultiLineString - static void Case(Geometry::Tags::MultiLineString, const Geometry &mline, yyjson_mut_doc *doc, yyjson_mut_val *obj) { - yyjson_mut_obj_add_str(doc, obj, "type", "MultiLineString"); - - auto coords = yyjson_mut_arr(doc); - yyjson_mut_obj_add_val(doc, obj, "coordinates", coords); - - for (uint32_t i = 0; i < MultiLineString::PartCount(mline); i++) { - auto &line = MultiLineString::Part(mline, i); - auto line_coords = yyjson_mut_arr(doc); - VerticesToGeoJSON(line, doc, line_coords); - yyjson_mut_arr_append(coords, line_coords); - } - } - - // MultiPolygon - static void Case(Geometry::Tags::MultiPolygon, const Geometry &mpoly, yyjson_mut_doc *doc, yyjson_mut_val *obj) { - yyjson_mut_obj_add_str(doc, obj, "type", "MultiPolygon"); - - auto coords = yyjson_mut_arr(doc); - yyjson_mut_obj_add_val(doc, obj, "coordinates", coords); - - for (uint32_t i = 0; i < MultiPolygon::PartCount(mpoly); i++) { - auto &poly = MultiPolygon::Part(mpoly, i); - auto poly_coords = yyjson_mut_arr(doc); - for (uint32_t j = 0; j < Polygon::PartCount(poly); j++) { - auto &ring = Polygon::Part(poly, j); - auto ring_coords = yyjson_mut_arr(doc); - VerticesToGeoJSON(ring, doc, ring_coords); - yyjson_mut_arr_append(poly_coords, ring_coords); - } - yyjson_mut_arr_append(coords, poly_coords); - } - } - - // GeometryCollection - static void Case(Geometry::Tags::GeometryCollection, const Geometry &collection, yyjson_mut_doc *doc, - yyjson_mut_val *obj) { - yyjson_mut_obj_add_str(doc, obj, "type", "GeometryCollection"); - auto arr = yyjson_mut_arr(doc); - yyjson_mut_obj_add_val(doc, obj, "geometries", arr); - - for (uint32_t i = 0; i < GeometryCollection::PartCount(collection); i++) { - auto &geom = GeometryCollection::Part(collection, i); - auto geom_obj = yyjson_mut_obj(doc); - Geometry::Match(geom, doc, geom_obj); - yyjson_mut_arr_append(arr, geom_obj); - } - } -}; - -static void GeometryToGeoJSONFragmentFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - auto &input = args.data[0]; - auto count = args.size(); - - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - - JSONAllocator json_allocator(lstate.arena); - - UnaryExecutor::Execute(input, result, count, [&](geometry_t input) { - auto geom = Geometry::Deserialize(lstate.arena, input); - - auto doc = yyjson_mut_doc_new(json_allocator.GetYYJSONAllocator()); - auto obj = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, obj); - - Geometry::Match(geom, doc, obj); - - size_t json_size = 0; - char *json_data = yyjson_mut_write_opts(doc, 0, json_allocator.GetYYJSONAllocator(), &json_size, nullptr); - // Because the arena allocator only resets after each pipeline invocation, we can safely just point into the - // arena here without needing to copy the data to the string heap with StringVector::AddString - return string_t(json_data, json_size); - }); -} - -//------------------------------------------------------------------------------ -// GEOJSON Fragment -> GEOMETRY -//------------------------------------------------------------------------------ - -static Geometry PointFromGeoJSON(yyjson_val *coord_array, ArenaAllocator &arena, const string_t &raw, bool &has_z) { - auto len = yyjson_arr_size(coord_array); - if (len == 0) { - // empty point - return Point::CreateEmpty(has_z, false); - } - if (len < 2) { - throw InvalidInputException("GeoJSON input coordinates field is not an array of at least length 2: %s", - raw.GetString()); - } - auto x_val = yyjson_arr_get_first(coord_array); - if (!yyjson_is_num(x_val)) { - throw InvalidInputException("GeoJSON input coordinates field is not an array of numbers: %s", raw.GetString()); - } - auto y_val = yyjson_arr_get(coord_array, 1); - if (!yyjson_is_num(y_val)) { - throw InvalidInputException("GeoJSON input coordinates field is not an array of numbers: %s", raw.GetString()); - } - - auto x = yyjson_get_num(x_val); - auto y = yyjson_get_num(y_val); - - auto geom_has_z = len > 2; - if (geom_has_z) { - has_z = true; - auto z_val = yyjson_arr_get(coord_array, 2); - if (!yyjson_is_num(z_val)) { - throw InvalidInputException("GeoJSON input coordinates field is not an array of numbers: %s", - raw.GetString()); - } - auto z = yyjson_get_num(z_val); - return Point::CreateFromVertex(arena, VertexXYZ {x, y, z}); - } else { - return Point::CreateFromVertex(arena, VertexXY {x, y}); - } -} - -static Geometry VerticesFromGeoJSON(yyjson_val *coord_array, ArenaAllocator &arena, const string_t &raw, bool &has_z) { - auto len = yyjson_arr_size(coord_array); - if (len == 0) { - // Empty - return LineString::CreateEmpty(false, false); - } else { - // Sniff the coordinates to see if we have Z - bool has_any_z = false; - size_t idx, max; - yyjson_val *coord; - yyjson_arr_foreach(coord_array, idx, max, coord) { - if (!yyjson_is_arr(coord)) { - throw InvalidInputException("GeoJSON input coordinates field is not an array of arrays: %s", - raw.GetString()); - } - auto coord_len = yyjson_arr_size(coord); - if (coord_len > 2) { - has_any_z = true; - } else if (coord_len < 2) { - throw InvalidInputException( - "GeoJSON input coordinates field is not an array of arrays of length >= 2: %s", raw.GetString()); - } - } - - if (has_any_z) { - has_z = true; - } - - auto vertices = LineString::Create(arena, len, has_any_z, false); - - yyjson_arr_foreach(coord_array, idx, max, coord) { - auto coord_len = yyjson_arr_size(coord); - auto x_val = yyjson_arr_get_first(coord); - if (!yyjson_is_num(x_val)) { - throw InvalidInputException("GeoJSON input coordinates field is not an array of arrays of numbers: %s", - raw.GetString()); - } - auto y_val = yyjson_arr_get(coord, 1); - if (!yyjson_is_num(y_val)) { - throw InvalidInputException("GeoJSON input coordinates field is not an array of arrays of numbers: %s", - raw.GetString()); - } - auto x = yyjson_get_num(x_val); - auto y = yyjson_get_num(y_val); - auto z = 0.0; - - if (coord_len > 2) { - auto z_val = yyjson_arr_get(coord, 2); - if (!yyjson_is_num(z_val)) { - throw InvalidInputException( - "GeoJSON input coordinates field is not an array of arrays of numbers: %s", raw.GetString()); - } - z = yyjson_get_num(z_val); - } - if (has_any_z) { - LineString::SetVertex(vertices, idx, {x, y, z}); - } else { - LineString::SetVertex(vertices, idx, {x, y}); - } - } - return vertices; - } -} - -static Geometry LineStringFromGeoJSON(yyjson_val *coord_array, ArenaAllocator &arena, const string_t &raw, - bool &has_z) { - return VerticesFromGeoJSON(coord_array, arena, raw, has_z); -} - -static Geometry PolygonFromGeoJSON(yyjson_val *coord_array, ArenaAllocator &arena, const string_t &raw, bool &has_z) { - auto num_rings = yyjson_arr_size(coord_array); - if (num_rings == 0) { - // Empty - return Polygon::CreateEmpty(has_z, false); - } else { - // Polygon - auto polygon = Polygon::Create(arena, num_rings, has_z, false); - size_t idx, max; - yyjson_val *ring_val; - yyjson_arr_foreach(coord_array, idx, max, ring_val) { - if (!yyjson_is_arr(ring_val)) { - throw InvalidInputException("GeoJSON input coordinates field is not an array of arrays: %s", - raw.GetString()); - } - Polygon::Part(polygon, idx) = VerticesFromGeoJSON(ring_val, arena, raw, has_z); - } - - return polygon; - } -} - -static Geometry MultiPointFromGeoJSON(yyjson_val *coord_array, ArenaAllocator &arena, const string_t &raw, - bool &has_z) { - auto num_points = yyjson_arr_size(coord_array); - if (num_points == 0) { - // Empty - return MultiPoint::CreateEmpty(has_z, false); - } else { - // MultiPoint - auto multi_point = MultiPoint::Create(arena, num_points, has_z, false); - size_t idx, max; - yyjson_val *point_val; - yyjson_arr_foreach(coord_array, idx, max, point_val) { - if (!yyjson_is_arr(point_val)) { - throw InvalidInputException("GeoJSON input coordinates field is not an array of arrays: %s", - raw.GetString()); - } - if (yyjson_arr_size(point_val) < 2) { - throw InvalidInputException( - "GeoJSON input coordinates field is not an array of arrays of length >= 2: %s", raw.GetString()); - } - MultiPoint::Part(multi_point, idx) = PointFromGeoJSON(point_val, arena, raw, has_z); - } - return multi_point; - } -} - -static Geometry MultiLineStringFromGeoJSON(yyjson_val *coord_array, ArenaAllocator &arena, const string_t &raw, - bool &has_z) { - auto num_linestrings = yyjson_arr_size(coord_array); - if (num_linestrings == 0) { - // Empty - return MultiLineString::CreateEmpty(has_z, false); - } else { - // MultiLineString - auto multi_linestring = MultiLineString::Create(arena, num_linestrings, has_z, false); - size_t idx, max; - yyjson_val *linestring_val; - yyjson_arr_foreach(coord_array, idx, max, linestring_val) { - if (!yyjson_is_arr(linestring_val)) { - throw InvalidInputException("GeoJSON input coordinates field is not an array of arrays: %s", - raw.GetString()); - } - MultiLineString::Part(multi_linestring, idx) = LineStringFromGeoJSON(linestring_val, arena, raw, has_z); - } - - return multi_linestring; - } -} - -static Geometry MultiPolygonFromGeoJSON(yyjson_val *coord_array, ArenaAllocator &arena, const string_t &raw, - bool &has_z) { - auto num_polygons = yyjson_arr_size(coord_array); - if (num_polygons == 0) { - // Empty - return MultiPolygon::CreateEmpty(has_z, false); - } else { - // MultiPolygon - auto multi_polygon = MultiPolygon::Create(arena, num_polygons, has_z, false); - size_t idx, max; - yyjson_val *polygon_val; - yyjson_arr_foreach(coord_array, idx, max, polygon_val) { - if (!yyjson_is_arr(polygon_val)) { - throw InvalidInputException("GeoJSON input coordinates field is not an array of arrays: %s", - raw.GetString()); - } - MultiPolygon::Part(multi_polygon, idx) = PolygonFromGeoJSON(polygon_val, arena, raw, has_z); - } - - return multi_polygon; - } -} - -static Geometry FromGeoJSON(yyjson_val *root, ArenaAllocator &arena, const string_t &raw, bool &has_z); - -static Geometry GeometryCollectionFromGeoJSON(yyjson_val *root, ArenaAllocator &arena, const string_t &raw, - bool &has_z) { - auto geometries_val = yyjson_obj_get(root, "geometries"); - if (!geometries_val) { - throw InvalidInputException("GeoJSON input does not have a geometries field: %s", raw.GetString()); - } - if (!yyjson_is_arr(geometries_val)) { - throw InvalidInputException("GeoJSON input geometries field is not an array: %s", raw.GetString()); - } - auto num_geometries = yyjson_arr_size(geometries_val); - if (num_geometries == 0) { - // Empty - return GeometryCollection::CreateEmpty(has_z, false); - } else { - // GeometryCollection - auto geometry_collection = GeometryCollection::Create(arena, num_geometries, has_z, false); - size_t idx, max; - yyjson_val *geometry_val; - yyjson_arr_foreach(geometries_val, idx, max, geometry_val) { - GeometryCollection::Part(geometry_collection, idx) = FromGeoJSON(geometry_val, arena, raw, has_z); - } - - return geometry_collection; - } -} - -static Geometry FromGeoJSON(yyjson_val *root, ArenaAllocator &arena, const string_t &raw, bool &has_z) { - auto type_val = yyjson_obj_get(root, "type"); - if (!type_val) { - throw InvalidInputException("GeoJSON input does not have a type field: %s", raw.GetString()); - } - auto type_str = yyjson_get_str(type_val); - if (!type_str) { - throw InvalidInputException("GeoJSON input type field is not a string: %s", raw.GetString()); - } - - if (StringUtil::Equals(type_str, "GeometryCollection")) { - return GeometryCollectionFromGeoJSON(root, arena, raw, has_z); - } - - // Get the coordinates - auto coord_array = yyjson_obj_get(root, "coordinates"); - if (!coord_array) { - throw InvalidInputException("GeoJSON input does not have a coordinates field: %s", raw.GetString()); - } - if (!yyjson_is_arr(coord_array)) { - throw InvalidInputException("GeoJSON input coordinates field is not an array: %s", raw.GetString()); - } - - if (StringUtil::Equals(type_str, "Point")) { - return PointFromGeoJSON(coord_array, arena, raw, has_z); - } else if (StringUtil::Equals(type_str, "LineString")) { - return LineStringFromGeoJSON(coord_array, arena, raw, has_z); - } else if (StringUtil::Equals(type_str, "Polygon")) { - return PolygonFromGeoJSON(coord_array, arena, raw, has_z); - } else if (StringUtil::Equals(type_str, "MultiPoint")) { - return MultiPointFromGeoJSON(coord_array, arena, raw, has_z); - } else if (StringUtil::Equals(type_str, "MultiLineString")) { - return MultiLineStringFromGeoJSON(coord_array, arena, raw, has_z); - } else if (StringUtil::Equals(type_str, "MultiPolygon")) { - return MultiPolygonFromGeoJSON(coord_array, arena, raw, has_z); - } else { - throw InvalidInputException("GeoJSON input has invalid type field: %s", raw.GetString()); - } -} - -static void GeoJSONFragmentToGeometryFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - auto &input = args.data[0]; - auto count = args.size(); - - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - - JSONAllocator json_allocator(lstate.arena); - - UnaryExecutor::Execute(input, result, count, [&](string_t input) { - yyjson_read_err err; - auto doc = yyjson_read_opts(const_cast(input.GetDataUnsafe()), input.GetSize(), - YYJSON_READ_ALLOW_TRAILING_COMMAS | YYJSON_READ_ALLOW_COMMENTS, - json_allocator.GetYYJSONAllocator(), &err); - - if (err.code) { - throw InvalidInputException("Could not parse GeoJSON input: %s, (%s)", err.msg, input.GetString()); - } - - auto root = yyjson_doc_get_root(doc); - if (!yyjson_is_obj(root)) { - throw InvalidInputException("Could not parse GeoJSON input: %s, (%s)", err.msg, input.GetString()); - } else { - bool has_z = false; - auto geom = FromGeoJSON(root, lstate.arena, input, has_z); - if (has_z) { - // Ensure the geometries has consistent Z values - geom.SetVertexType(lstate.arena, has_z, false); - } - return Geometry::Serialize(geom, result); - } - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "conversion"}}; - -// AsGeoJSON -static constexpr const char *AS_DOC_DESCRIPTION = R"( - Returns the geometry as a GeoJSON fragment - - This does not return a complete GeoJSON document, only the geometry fragment. To construct a complete GeoJSON document or feature, look into using the DuckDB JSON extension in conjunction with this function. - This function supports geometries with Z values, but not M values. -)"; - -static constexpr const char *AS_DOC_EXAMPLE = R"( -select ST_AsGeoJSON('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'::geometry); ----- -{"type":"Polygon","coordinates":[[[0.0,0.0],[0.0,1.0],[1.0,1.0],[1.0,0.0],[0.0,0.0]]]} - --- Convert a geometry into a full GeoJSON feature (requires the JSON extension to be loaded) -SELECT CAST({ - type: 'Feature', - geometry: ST_AsGeoJSON(ST_Point(1,2)), - properties: { - name: 'my_point' - } -} AS JSON); ----- -{"type":"Feature","geometry":{"type":"Point","coordinates":[1.0,2.0]},"properties":{"name":"my_point"}} -)"; - -// FromGeoJSON -static constexpr const char *FROM_DOC_DESCRIPTION = R"( - Deserializes a GEOMETRY from a GeoJSON fragment. -)"; - -static constexpr const char *FROM_DOC_EXAMPLE = R"( -SELECT ST_GeomFromGeoJSON('{"type":"Point","coordinates":[1.0,2.0]}'); ----- -POINT (1 2) -)"; - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStAsGeoJSON(DatabaseInstance &db) { - ScalarFunctionSet to_geojson("ST_AsGeoJSON"); - to_geojson.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::JSON(), - GeometryToGeoJSONFragmentFunction, nullptr, nullptr, nullptr, - GeometryFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, to_geojson); - DocUtil::AddDocumentation(db, "ST_AsGeoJSON", AS_DOC_DESCRIPTION, AS_DOC_EXAMPLE, DOC_TAGS); - - ScalarFunctionSet from_geojson("ST_GeomFromGeoJSON"); - from_geojson.AddFunction(ScalarFunction({LogicalType::VARCHAR}, GeoTypes::GEOMETRY(), - GeoJSONFragmentToGeometryFunction, nullptr, nullptr, nullptr, - GeometryFunctionLocalState::Init)); - - from_geojson.AddFunction(ScalarFunction({LogicalType::JSON()}, GeoTypes::GEOMETRY(), - GeoJSONFragmentToGeometryFunction, nullptr, nullptr, nullptr, - GeometryFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, from_geojson); - DocUtil::AddDocumentation(db, "ST_GeomFromGeoJSON", FROM_DOC_DESCRIPTION, FROM_DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/scalar/st_ashexwkb.cpp b/spatial/src/spatial/core/functions/scalar/st_ashexwkb.cpp deleted file mode 100644 index c6d5c50f..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_ashexwkb.cpp +++ /dev/null @@ -1,73 +0,0 @@ -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/types/blob.hpp" - -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/geometry/wkb_writer.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// GEOMETRY -> HEX WKB -//------------------------------------------------------------------------------ - -void GeometryAsHEXWKBFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - auto &input = args.data[0]; - auto count = args.size(); - - vector buffer; - UnaryExecutor::Execute(input, result, count, [&](geometry_t input) { - buffer.clear(); - WKBWriter::Write(input, buffer); - - auto blob_size = buffer.size() * 2; // every byte is rendered as two characters - auto blob_str = StringVector::EmptyString(result, blob_size); - auto blob_ptr = blob_str.GetDataWriteable(); - - idx_t str_idx = 0; - for (auto byte : buffer) { - auto byte_a = byte >> 4; - auto byte_b = byte & 0x0F; - blob_ptr[str_idx++] = Blob::HEX_TABLE[byte_a]; - blob_ptr[str_idx++] = Blob::HEX_TABLE[byte_b]; - } - - blob_str.Finalize(); - return blob_str; - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ - -static constexpr const char *DOC_DESCRIPTION = R"( - Returns the geometry as a HEXWKB string -)"; - -static constexpr const char *DOC_EXAMPLE = R"( -SELECT ST_AsHexWKB('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'::geometry); ----- -01030000000100000005000000000000000000000000000... -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "conversion"}}; - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStAsHEXWKB(DatabaseInstance &db) { - ScalarFunction func("ST_AsHEXWKB", {GeoTypes::GEOMETRY()}, LogicalType::VARCHAR, GeometryAsHEXWKBFunction); - ExtensionUtil::RegisterFunction(db, func); - DocUtil::AddDocumentation(db, "ST_AsHEXWKB", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/scalar/st_assvg.cpp b/spatial/src/spatial/core/functions/scalar/st_assvg.cpp deleted file mode 100644 index d27ac099..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_assvg.cpp +++ /dev/null @@ -1,185 +0,0 @@ -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "spatial/common.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/geometry/geometry_processor.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/util/math.hpp" - -namespace spatial { - -namespace core { - -static void GeometrySVGFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - - struct op { - - static void PrintVertices(const Geometry &geom, const bool rel, const int32_t max_digits, bool close, - vector &buffer) { - const auto vertex_count = LineString::VertexCount(geom); - if (vertex_count == 0) { - return; - } - - VertexXY last_vert = LineString::GetVertex(geom, 0); - buffer.push_back('M'); - buffer.push_back(' '); - MathUtil::format_coord(last_vert.x, -last_vert.y, buffer, max_digits); - - if (vertex_count == 1) { - return; - } - - buffer.push_back(' '); - buffer.push_back(rel ? 'l' : 'L'); - - if (rel) { - for (uint32_t i = 1; i < vertex_count; i++) { - if (i == vertex_count - 1 && close) { - buffer.push_back(' '); - buffer.push_back('z'); - } else { - const auto vert = LineString::GetVertex(geom, i); - const auto relative_vert = vert - last_vert; - last_vert = vert; - buffer.push_back(' '); - MathUtil::format_coord(relative_vert.x, -relative_vert.y, buffer, max_digits); - } - } - } else { - for (uint32_t i = 1; i < vertex_count; i++) { - if (i == vertex_count - 1 && close) { - buffer.push_back(' '); - buffer.push_back('Z'); - } else { - const auto vert = LineString::GetVertex(geom, i); - buffer.push_back(' '); - MathUtil::format_coord(vert.x, -vert.y, buffer, max_digits); - } - } - } - } - - static void Case(Geometry::Tags::Point, const Geometry &geom, const bool rel, const int32_t max_digits, - vector &buffer) { - if (!Point::IsEmpty(geom)) { - const auto vert = Point::GetVertex(geom); - if (!rel) { - constexpr auto cx = "cx=\""; - constexpr auto cy = "cy=\""; - buffer.insert(buffer.end(), cx, cx + 4); - MathUtil::format_coord(vert.x, buffer, max_digits); - buffer.push_back('"'); - buffer.push_back(' '); - buffer.insert(buffer.end(), cy, cy + 4); - MathUtil::format_coord(-vert.y, buffer, max_digits); - buffer.push_back('"'); - } else { - constexpr auto x = "x=\""; - constexpr auto y = "y=\""; - buffer.insert(buffer.end(), x, x + 3); - MathUtil::format_coord(vert.x, buffer, max_digits); - buffer.push_back('"'); - buffer.push_back(' '); - buffer.insert(buffer.end(), y, y + 3); - MathUtil::format_coord(-vert.y, buffer, max_digits); - buffer.push_back('"'); - } - } - } - static void Case(Geometry::Tags::LineString, const Geometry &geom, const bool rel, const int32_t max_digits, - vector &buffer) { - PrintVertices(geom, rel, max_digits, false, buffer); - } - static void Case(Geometry::Tags::Polygon, const Geometry &geom, const bool rel, const int32_t max_digits, - vector &buffer) { - const auto ring_count = Polygon::PartCount(geom); - for (uint32_t i = 0; i < ring_count; i++) { - const auto &ring = Polygon::Part(geom, i); - PrintVertices(ring, rel, max_digits, true, buffer); - } - } - static void Case(Geometry::Tags::MultiPartGeometry, const Geometry &geom, const bool rel, - const int32_t max_digits, vector &buffer) { - // Special delimiter for multipoint and geometry collections - char delimiter = ' '; - if (geom.GetType() == GeometryType::MULTIPOINT) { - delimiter = ','; - } else if (geom.GetType() == GeometryType::GEOMETRYCOLLECTION) { - delimiter = ';'; - } - - const auto part_count = MultiPartGeometry::PartCount(geom); - for (uint32_t i = 0; i < part_count; i++) { - if (i > 0) { - buffer.push_back(delimiter); - } - auto &part = MultiPartGeometry::Part(geom, i); - Geometry::Match(part, rel, max_digits, buffer); - } - } - }; - - // Buffer holding the SVG fragment - vector buffer; - - TernaryExecutor::Execute( - args.data[0], args.data[1], args.data[2], result, args.size(), - [&](const geometry_t &blob, bool rel, int32_t max_digits) { - if (max_digits < 0 || max_digits > 15) { - throw InvalidInputException("max_digits must be between 0 and 15"); - } - - buffer.clear(); - - auto geom = Geometry::Deserialize(arena, blob); - - Geometry::Match(geom, rel, max_digits, buffer); - - return StringVector::AddString(result, buffer.data(), buffer.size()); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Convert the geometry into a SVG fragment or path - - Convert the geometry into a SVG fragment or path - The SVG fragment is returned as a string. The fragment is a path element that can be used in an SVG document. - The second boolean argument specifies whether the path should be relative or absolute. - The third argument specifies the maximum number of digits to use for the coordinates. - - Points are formatted as cx/cy using absolute coordinates or x/y using relative coordinates. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( -SELECT ST_AsSVG('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'::GEOMETRY, false, 15); ----- -M 0 0 L 0 -1 1 -1 1 0 Z -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "conversion"}}; - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStAsSVG(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_AsSVG"); - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY(), LogicalType::BOOLEAN, LogicalType::INTEGER}, - LogicalType::VARCHAR, GeometrySVGFunction, nullptr, nullptr, nullptr, - GeometryFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_AsSVG", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/scalar/st_astext.cpp b/spatial/src/spatial/core/functions/scalar/st_astext.cpp deleted file mode 100644 index 5012993d..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_astext.cpp +++ /dev/null @@ -1,106 +0,0 @@ -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/types.hpp" - -#include "spatial/core/functions/cast.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// POINT_2D -//------------------------------------------------------------------------------ - -static void Point2DAsTextFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - auto &input = args.data[0]; - auto count = args.size(); - CoreVectorOperations::Point2DToVarchar(input, result, count); -} - -//------------------------------------------------------------------------------ -// LINESTRING_2D -//------------------------------------------------------------------------------ - -// TODO: We want to format these to trim trailing zeros -static void LineString2DAsTextFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - auto &input = args.data[0]; - auto count = args.size(); - CoreVectorOperations::LineString2DToVarchar(input, result, count); -} - -//------------------------------------------------------------------------------ -// POLYGON_2D -//------------------------------------------------------------------------------ - -// TODO: We want to format these to trim trailing zeros -static void Polygon2DAsTextFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - auto count = args.size(); - auto &input = args.data[0]; - CoreVectorOperations::Polygon2DToVarchar(input, result, count); -} - -//------------------------------------------------------------------------------ -// BOX_2D -//------------------------------------------------------------------------------ -static void Box2DAsTextFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - auto count = args.size(); - auto &input = args.data[0]; - CoreVectorOperations::Box2DToVarchar(input, result, count); -} - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -static void GeometryAsTextFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - auto count = args.size(); - auto &input = args.data[0]; - CoreVectorOperations::GeometryToVarchar(input, result, count); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ - -static constexpr const char *DOC_DESCRIPTION = R"( - Returns the geometry as a WKT string -)"; - -static constexpr const char *DOC_EXAMPLE = R"( -SELECT ST_AsText(ST_MakeEnvelope(0,0,1,1)); ----- -POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0)) -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "conversion"}}; - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStAsText(DatabaseInstance &db) { - ScalarFunctionSet as_text_function_set("ST_AsText"); - - as_text_function_set.AddFunction( - ScalarFunction({GeoTypes::POINT_2D()}, LogicalType::VARCHAR, Point2DAsTextFunction)); - as_text_function_set.AddFunction( - ScalarFunction({GeoTypes::LINESTRING_2D()}, LogicalType::VARCHAR, LineString2DAsTextFunction)); - as_text_function_set.AddFunction( - ScalarFunction({GeoTypes::POLYGON_2D()}, LogicalType::VARCHAR, Polygon2DAsTextFunction)); - as_text_function_set.AddFunction(ScalarFunction({GeoTypes::BOX_2D()}, LogicalType::VARCHAR, Box2DAsTextFunction)); - as_text_function_set.AddFunction( - ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::VARCHAR, GeometryAsTextFunction)); - - ExtensionUtil::RegisterFunction(db, as_text_function_set); - DocUtil::AddDocumentation(db, "ST_AsText", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/scalar/st_aswkb.cpp b/spatial/src/spatial/core/functions/scalar/st_aswkb.cpp deleted file mode 100644 index 9adec5e7..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_aswkb.cpp +++ /dev/null @@ -1,56 +0,0 @@ -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/geometry/wkb_writer.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -void GeometryAsWBKFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - auto &input = args.data[0]; - auto count = args.size(); - - UnaryExecutor::Execute(input, result, count, - [&](geometry_t input) { return WKBWriter::Write(input, result); }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ - -static constexpr const char *DOC_DESCRIPTION = R"( - Returns the geometry as a WKB blob -)"; - -static constexpr const char *DOC_EXAMPLE = R"( -SELECT ST_AsWKB('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'::GEOMETRY)::BLOB; ----- -\x01\x03\x00\x00\x00\x01\x00\x00\x00\x05... -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "conversion"}}; - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStAsWKB(DatabaseInstance &db) { - ScalarFunctionSet as_wkb_function_set("ST_AsWKB"); - - as_wkb_function_set.AddFunction( - ScalarFunction({GeoTypes::GEOMETRY()}, GeoTypes::WKB_BLOB(), GeometryAsWBKFunction)); - - ExtensionUtil::RegisterFunction(db, as_wkb_function_set); - DocUtil::AddDocumentation(db, "ST_AsWKB", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/scalar/st_centroid.cpp b/spatial/src/spatial/core/functions/scalar/st_centroid.cpp deleted file mode 100644 index 8299355e..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_centroid.cpp +++ /dev/null @@ -1,217 +0,0 @@ -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/types.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// POINT_2D -//------------------------------------------------------------------------------ -static void PointCentroidFunction(DataChunk &args, ExpressionState &state, Vector &result) { - // The centroid of a point is the point itself - auto input = args.data[0]; - result.Reference(input); -} - -//------------------------------------------------------------------------------ -// LINESTRING_2D -//------------------------------------------------------------------------------ -static void LineStringCentroidFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto input = args.data[0]; - auto count = args.size(); - UnifiedVectorFormat format; - input.ToUnifiedFormat(count, format); - - auto line_vertex_entries = ListVector::GetData(input); - auto &line_vertex_vec = ListVector::GetEntry(input); - auto &line_vertex_vec_children = StructVector::GetEntries(line_vertex_vec); - auto line_x_data = FlatVector::GetData(*line_vertex_vec_children[0]); - auto line_y_vec = FlatVector::GetData(*line_vertex_vec_children[1]); - - auto &point_vertex_children = StructVector::GetEntries(result); - auto point_x_data = FlatVector::GetData(*point_vertex_children[0]); - auto point_y_data = FlatVector::GetData(*point_vertex_children[1]); - for (idx_t out_row_idx = 0; out_row_idx < count; out_row_idx++) { - - auto in_row_idx = format.sel->get_index(out_row_idx); - if (format.validity.RowIsValid(in_row_idx)) { - auto line = line_vertex_entries[in_row_idx]; - auto line_offset = line.offset; - auto line_length = line.length; - - double total_x = 0; - double total_y = 0; - double total_length = 0; - - // To calculate the centroid of a line, we calculate the centroid of each segment - // and then weight the segment centroids by the length of the segment. - // The final centroid is the sum of the weighted segment centroids divided by the total length. - for (idx_t coord_idx = line_offset; coord_idx < line_offset + line_length - 1; coord_idx++) { - auto x1 = line_x_data[coord_idx]; - auto y1 = line_y_vec[coord_idx]; - auto x2 = line_x_data[coord_idx + 1]; - auto y2 = line_y_vec[coord_idx + 1]; - - auto segment_length = sqrt((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1)); - total_length += segment_length; - total_x += (x1 + x2) * 0.5 * segment_length; - total_y += (y1 + y2) * 0.5 * segment_length; - } - - point_x_data[out_row_idx] = total_x / total_length; - point_y_data[out_row_idx] = total_y / total_length; - - } else { - FlatVector::SetNull(result, out_row_idx, true); - } - } - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// POLYGON_2D -//------------------------------------------------------------------------------ -static void PolygonCentroidFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto input = args.data[0]; - auto count = args.size(); - UnifiedVectorFormat format; - input.ToUnifiedFormat(count, format); - - auto poly_entries = ListVector::GetData(input); - auto &ring_vec = ListVector::GetEntry(input); - auto ring_entries = ListVector::GetData(ring_vec); - auto &vertex_vec = ListVector::GetEntry(ring_vec); - auto &vertex_vec_children = StructVector::GetEntries(vertex_vec); - auto x_data = FlatVector::GetData(*vertex_vec_children[0]); - auto y_data = FlatVector::GetData(*vertex_vec_children[1]); - - auto ¢roid_children = StructVector::GetEntries(result); - auto centroid_x_data = FlatVector::GetData(*centroid_children[0]); - auto centroid_y_data = FlatVector::GetData(*centroid_children[1]); - - for (idx_t in_row_idx = 0; in_row_idx < count; in_row_idx++) { - if (format.validity.RowIsValid(in_row_idx)) { - auto poly = poly_entries[in_row_idx]; - auto poly_offset = poly.offset; - auto poly_length = poly.length; - - double poly_centroid_x = 0; - double poly_centroid_y = 0; - double poly_area = 0; - - // To calculate the centroid of a polygon, we calculate the centroid of each ring - // and then weight the ring centroids by the area of the ring. - // The final centroid is the sum of the weighted ring centroids divided by the total area. - for (idx_t ring_idx = poly_offset; ring_idx < poly_offset + poly_length; ring_idx++) { - auto ring = ring_entries[ring_idx]; - auto ring_offset = ring.offset; - auto ring_length = ring.length; - - double ring_centroid_x = 0; - double ring_centroid_y = 0; - double ring_area = 0; - - // To calculate the centroid of a ring, we calculate the centroid of each triangle - // and then weight the triangle centroids by the area of the triangle. - // The final centroid is the sum of the weighted triangle centroids divided by the ring area. - for (idx_t coord_idx = ring_offset; coord_idx < ring_offset + ring_length - 1; coord_idx++) { - auto x1 = x_data[coord_idx]; - auto y1 = y_data[coord_idx]; - auto x2 = x_data[coord_idx + 1]; - auto y2 = y_data[coord_idx + 1]; - - auto tri_area = (x1 * y2) - (x2 * y1); - ring_centroid_x += (x1 + x2) * tri_area; - ring_centroid_y += (y1 + y2) * tri_area; - ring_area += tri_area; - } - ring_area *= 0.5; - - ring_centroid_x /= (ring_area * 6); - ring_centroid_y /= (ring_area * 6); - - if (ring_idx == poly_offset) { - // The first ring is the outer ring, and the remaining rings are holes. - // For the outer ring, we add the area and centroid to the total area and centroid. - poly_area += ring_area; - poly_centroid_x += ring_centroid_x * ring_area; - poly_centroid_y += ring_centroid_y * ring_area; - } else { - // For holes, we subtract the area and centroid from the total area and centroid. - poly_area -= ring_area; - poly_centroid_x -= ring_centroid_x * ring_area; - poly_centroid_y -= ring_centroid_y * ring_area; - } - } - centroid_x_data[in_row_idx] = poly_centroid_x / poly_area; - centroid_y_data[in_row_idx] = poly_centroid_y / poly_area; - } else { - FlatVector::SetNull(result, in_row_idx, true); - } - } - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// BOX_2D -//------------------------------------------------------------------------------ -template -static void BoxCentroidFunction(DataChunk &args, ExpressionState &state, Vector &result) { - // using BOX_TYPE = StructTypeQuaternary; - // using POINT_TYPE = StructTypeBinary; - - auto input = args.data[0]; - auto count = args.size(); - UnifiedVectorFormat format; - input.ToUnifiedFormat(count, format); - auto &box_children = StructVector::GetEntries(input); - auto minx_data = FlatVector::GetData(*box_children[0]); - auto miny_data = FlatVector::GetData(*box_children[1]); - auto maxx_data = FlatVector::GetData(*box_children[2]); - auto maxy_data = FlatVector::GetData(*box_children[3]); - - auto ¢roid_children = StructVector::GetEntries(result); - auto centroid_x_data = FlatVector::GetData(*centroid_children[0]); - auto centroid_y_data = FlatVector::GetData(*centroid_children[1]); - - for (idx_t out_row_idx = 0; out_row_idx < count; out_row_idx++) { - auto in_row_idx = format.sel->get_index(out_row_idx); - if (format.validity.RowIsValid(in_row_idx)) { - centroid_x_data[out_row_idx] = (minx_data[in_row_idx] + maxx_data[in_row_idx]) * 0.5; - centroid_y_data[out_row_idx] = (miny_data[in_row_idx] + maxy_data[in_row_idx]) * 0.5; - } else { - FlatVector::SetNull(result, out_row_idx, true); - } - } - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStCentroid(DatabaseInstance &db) { - ScalarFunctionSet set("ST_Centroid"); - set.AddFunction(ScalarFunction({GeoTypes::POINT_2D()}, GeoTypes::POINT_2D(), PointCentroidFunction)); - set.AddFunction(ScalarFunction({GeoTypes::LINESTRING_2D()}, GeoTypes::POINT_2D(), LineStringCentroidFunction)); - set.AddFunction(ScalarFunction({GeoTypes::POLYGON_2D()}, GeoTypes::POINT_2D(), PolygonCentroidFunction)); - set.AddFunction(ScalarFunction({GeoTypes::BOX_2D()}, GeoTypes::POINT_2D(), BoxCentroidFunction)); - set.AddFunction(ScalarFunction({GeoTypes::BOX_2DF()}, GeoTypes::POINT_2D(), BoxCentroidFunction)); - - ExtensionUtil::RegisterFunction(db, set); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/scalar/st_collect.cpp b/spatial/src/spatial/core/functions/scalar/st_collect.cpp deleted file mode 100644 index e60c737c..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_collect.cpp +++ /dev/null @@ -1,144 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace core { - -static void CollectFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - auto count = args.size(); - auto &child_vec = ListVector::GetEntry(args.data[0]); - UnifiedVectorFormat format; - child_vec.ToUnifiedFormat(count, format); - - UnaryExecutor::Execute(args.data[0], result, count, [&](list_entry_t &geometry_list) { - auto offset = geometry_list.offset; - auto length = geometry_list.length; - - // First figure out if we have Z or M - bool has_z = false; - bool has_m = false; - bool all_points = true; - bool all_lines = true; - bool all_polygons = true; - - for (idx_t i = offset; i < offset + length; i++) { - auto mapped_idx = format.sel->get_index(i); - if (format.validity.RowIsValid(mapped_idx)) { - auto geometry_blob = ((geometry_t *)format.data)[mapped_idx]; - auto props = geometry_blob.GetProperties(); - has_z = has_z || props.HasZ(); - has_m = has_m || props.HasM(); - } - } - - vector geometries; - for (idx_t i = offset; i < offset + length; i++) { - auto mapped_idx = format.sel->get_index(i); - if (format.validity.RowIsValid(mapped_idx)) { - auto geometry_blob = ((geometry_t *)format.data)[mapped_idx]; - auto geometry = Geometry::Deserialize(arena, geometry_blob); - // Dont add empty geometries - if (!Geometry::IsEmpty(geometry)) { - all_points = all_points && geometry_blob.GetType() == GeometryType::POINT; - all_lines = all_lines && geometry_blob.GetType() == GeometryType::LINESTRING; - all_polygons = all_polygons && geometry_blob.GetType() == GeometryType::POLYGON; - - // Ensure all geometries have the same Z and M - geometry.SetVertexType(arena, has_z, has_m); - geometries.push_back(std::move(geometry)); - } - } - } - - if (geometries.empty()) { - return Geometry::Serialize(GeometryCollection::CreateEmpty(has_z, has_m), result); - } - - // TODO: Dont upcast the children, just append them. - if (all_points) { - return Geometry::Serialize(MultiPoint::Create(arena, geometries, has_z, has_m), result); - } else if (all_lines) { - return Geometry::Serialize(MultiLineString::Create(arena, geometries, has_z, has_m), result); - } else if (all_polygons) { - return Geometry::Serialize(MultiPolygon::Create(arena, geometries, has_z, has_m), result); - } else { - return Geometry::Serialize(GeometryCollection::Create(arena, geometries, has_z, has_m), result); - } - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ - -static constexpr const char *DOC_DESCRIPTION = R"( - -Collects a list of geometries into a collection geometry. -- If all geometries are `POINT`'s, a `MULTIPOINT` is returned. -- If all geometries are `LINESTRING`'s, a `MULTILINESTRING` is returned. -- If all geometries are `POLYGON`'s, a `MULTIPOLYGON` is returned. -- Otherwise if the input collection contains a mix of geometry types, a `GEOMETRYCOLLECTION` is returned. - -Empty and `NULL` geometries are ignored. If all geometries are empty or `NULL`, a `GEOMETRYCOLLECTION EMPTY` is returned. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( --- With all POINT's, a MULTIPOINT is returned -SELECT ST_Collect([ST_Point(1, 2), ST_Point(3, 4)]); ----- -MULTIPOINT (1 2, 3 4) - --- With mixed geometry types, a GEOMETRYCOLLECTION is returned -SELECT ST_Collect([ST_Point(1, 2), ST_GeomFromText('LINESTRING(3 4, 5 6)')]); ----- -GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (3 4, 5 6)) - --- Note that the empty geometry is ignored, so the result is a MULTIPOINT -SELECT ST_Collect([ST_Point(1, 2), NULL, ST_GeomFromText('GEOMETRYCOLLECTION EMPTY')]); ----- -MULTIPOINT (1 2) - --- If all geometries are empty or NULL, a GEOMETRYCOLLECTION EMPTY is returned -SELECT ST_Collect([NULL, ST_GeomFromText('GEOMETRYCOLLECTION EMPTY')]); ----- -GEOMETRYCOLLECTION EMPTY - --- Tip: You can use the `ST_Collect` function together with the `list()` aggregate function to collect multiple rows of geometries into a single geometry collection: - -CREATE TABLE points (geom GEOMETRY); - -INSERT INTO points VALUES (ST_Point(1, 2)), (ST_Point(3, 4)); - -SELECT ST_Collect(list(geom)) FROM points; ----- -MULTIPOINT (1 2, 3 4) -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ - -void CoreScalarFunctions::RegisterStCollect(DatabaseInstance &db) { - ScalarFunctionSet set("ST_Collect"); - - set.AddFunction(ScalarFunction({LogicalType::LIST(GeoTypes::GEOMETRY())}, GeoTypes::GEOMETRY(), CollectFunction, - nullptr, nullptr, nullptr, GeometryFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Collect", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/scalar/st_collectionextract.cpp b/spatial/src/spatial/core/functions/scalar/st_collectionextract.cpp deleted file mode 100644 index 82ad4598..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_collectionextract.cpp +++ /dev/null @@ -1,204 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -// Collection extract with a specific dimension -static void CollectionExtractTypeFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - auto count = args.size(); - auto &input = args.data[0]; - auto &dim = args.data[1]; - - // Items vector - vector items; - - BinaryExecutor::Execute(input, dim, result, count, [&](geometry_t input, int32_t requested_type) { - // Reset the items vector - items.clear(); - - // Deserialize the input geometry - auto props = input.GetProperties(); - auto geometry = Geometry::Deserialize(arena, input); - - // Switch on the requested type - switch (requested_type) { - case 1: { - if (geometry.GetType() == GeometryType::MULTIPOINT || geometry.GetType() == GeometryType::POINT) { - return input; - } else if (geometry.IsCollection()) { - // if it is a geometry collection, we need to collect all points - if (geometry.GetType() == GeometryType::GEOMETRYCOLLECTION && !GeometryCollection::IsEmpty(geometry)) { - Geometry::ExtractPoints(geometry, [&](const Geometry &point) { items.push_back(point); }); - auto mpoint = MultiPoint::Create(arena, items, props.HasZ(), props.HasM()); - return Geometry::Serialize(mpoint, result); - } - // otherwise, we return an empty multipoint - auto empty = MultiPoint::CreateEmpty(props.HasZ(), props.HasM()); - return Geometry::Serialize(empty, result); - } else { - // otherwise if its not a collection, we return an empty point - auto empty = Point::CreateEmpty(props.HasZ(), props.HasM()); - return Geometry::Serialize(empty, result); - } - } - case 2: { - if (geometry.GetType() == GeometryType::MULTILINESTRING || geometry.GetType() == GeometryType::LINESTRING) { - return input; - } else if (geometry.IsCollection()) { - // if it is a geometry collection, we need to collect all lines - if (geometry.GetType() == GeometryType::GEOMETRYCOLLECTION && !GeometryCollection::IsEmpty(geometry)) { - Geometry::ExtractLines(geometry, [&](const Geometry &line) { items.push_back(line); }); - auto mline = MultiLineString::Create(arena, items, props.HasZ(), props.HasM()); - return Geometry::Serialize(mline, result); - } - // otherwise, we return an empty multilinestring - auto empty = MultiLineString::CreateEmpty(props.HasZ(), props.HasM()); - return Geometry::Serialize(empty, result); - } else { - // otherwise if its not a collection, we return an empty linestring - auto empty = LineString::CreateEmpty(props.HasZ(), props.HasM()); - return Geometry::Serialize(empty, result); - } - } - case 3: { - if (geometry.GetType() == GeometryType::MULTIPOLYGON || geometry.GetType() == GeometryType::POLYGON) { - return input; - } else if (geometry.IsCollection()) { - // if it is a geometry collection, we need to collect all polygons - if (geometry.GetType() == GeometryType::GEOMETRYCOLLECTION && !GeometryCollection::IsEmpty(geometry)) { - Geometry::ExtractPolygons(geometry, [&](const Geometry &poly) { items.push_back(poly); }); - auto mpoly = MultiPolygon::Create(arena, items, props.HasZ(), props.HasM()); - return Geometry::Serialize(mpoly, result); - } - // otherwise, we return an empty multipolygon - auto empty = MultiPolygon::CreateEmpty(props.HasZ(), props.HasM()); - return Geometry::Serialize(empty, result); - } else { - // otherwise if its not a collection, we return an empty polygon - auto empty = Polygon::CreateEmpty(props.HasZ(), props.HasM()); - return Geometry::Serialize(empty, result); - } - } - default: - throw InvalidInputException("Invalid requested type parameter for collection extract, must be 1 " - "(POINT), 2 (LINESTRING) or 3 (POLYGON)"); - } - }); -} - -// Note: We're being smart here and reusing the memory from the input geometry -static void CollectionExtractAutoFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - - auto count = args.size(); - auto &input = args.data[0]; - - vector items; - - UnaryExecutor::Execute(input, result, count, [&](geometry_t input) { - if (input.GetType() == GeometryType::GEOMETRYCOLLECTION) { - // Reset the items vector - items.clear(); - - auto props = input.GetProperties(); - auto collection = Geometry::Deserialize(arena, input); - if (GeometryCollection::IsEmpty(collection)) { - return input; - } - // Find the highest dimension of the geometries in the collection - // Empty geometries are ignored - auto dim = Geometry::GetDimension(collection, true); - - switch (dim) { - // Point case - case 0: { - Geometry::ExtractPoints(collection, [&](const Geometry &point) { items.push_back(point); }); - auto mpoint = MultiPoint::Create(arena, items, props.HasZ(), props.HasM()); - return Geometry::Serialize(mpoint, result); - } - // LineString case - case 1: { - Geometry::ExtractLines(collection, [&](const Geometry &line) { items.push_back(line); }); - auto mline = MultiLineString::Create(arena, items, props.HasZ(), props.HasM()); - return Geometry::Serialize(mline, result); - } - // Polygon case - case 2: { - Geometry::ExtractPolygons(collection, [&](const Geometry &poly) { items.push_back(poly); }); - auto mpoly = MultiPolygon::Create(arena, items, props.HasZ(), props.HasM()); - return Geometry::Serialize(mpoly, result); - } - default: { - throw InternalException("Invalid dimension in collection extract"); - } - } - } else { - return input; - } - }); -} -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ - -static constexpr const char *DOC_DESCRIPTION = R"( -Extracts geometries from a GeometryCollection into a typed multi geometry. - -If the input geometry is a GeometryCollection, the function will return a multi geometry, determined by the `type` parameter. -- if `type` = 1, returns a MultiPoint containg all the Points in the collection -- if `type` = 2, returns a MultiLineString containg all the LineStrings in the collection -- if `type` = 3, returns a MultiPolygon containg all the Polygons in the collection - -If no `type` parameters is provided, the function will return a multi geometry matching the highest "surface dimension" -of the contained geometries. E.g. if the collection contains only Points, a MultiPoint will be returned. But if the -collection contains both Points and LineStrings, a MultiLineString will be returned. Similarly, if the collection -contains Polygons, a MultiPolygon will be returned. Contained geometries of a lower surface dimension will be ignored. - -If the input geometry contains nested GeometryCollections, their geometries will be extracted recursively and included -into the final multi geometry as well. - -If the input geometry is not a GeometryCollection, the function will return the input geometry as is. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( -select st_collectionextract('MULTIPOINT(1 2,3 4)'::geometry, 1); --- MULTIPOINT (1 2, 3 4) -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; - -//------------------------------------------------------------------------------ -// Register -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStCollectionExtract(DatabaseInstance &db) { - ScalarFunctionSet set("ST_CollectionExtract"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), CollectionExtractAutoFunction, nullptr, - nullptr, nullptr, GeometryFunctionLocalState::Init)); - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY(), LogicalType::INTEGER}, GeoTypes::GEOMETRY(), - CollectionExtractTypeFunction, nullptr, nullptr, nullptr, - GeometryFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_CollectionExtract", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS, {"geom", "type"}); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/scalar/st_contains.cpp b/spatial/src/spatial/core/functions/scalar/st_contains.cpp deleted file mode 100644 index 1978e85e..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_contains.cpp +++ /dev/null @@ -1,173 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/function_builder.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// POLYGON_2D - POINT_2D -//------------------------------------------------------------------------------ - -enum class Side { LEFT, RIGHT, ON }; - -static void PointInPolygonOperation(Vector &in_point, Vector &in_polygon, Vector &result, idx_t count) { - - in_polygon.Flatten(count); - in_point.Flatten(count); - - // Setup point vectors - auto &p_children = StructVector::GetEntries(in_point); - auto p_x_data = FlatVector::GetData(*p_children[0]); - auto p_y_data = FlatVector::GetData(*p_children[1]); - - // Setup polygon vectors - auto polygon_entries = ListVector::GetData(in_polygon); - auto &ring_vec = ListVector::GetEntry(in_polygon); - auto ring_entries = ListVector::GetData(ring_vec); - auto &coord_vec = ListVector::GetEntry(ring_vec); - auto &coord_children = StructVector::GetEntries(coord_vec); - auto x_data = FlatVector::GetData(*coord_children[0]); - auto y_data = FlatVector::GetData(*coord_children[1]); - - auto result_data = FlatVector::GetData(result); - - for (idx_t polygon_idx = 0; polygon_idx < count; polygon_idx++) { - auto polygon = polygon_entries[polygon_idx]; - auto polygon_offset = polygon.offset; - auto polygon_length = polygon.length; - bool first = true; - - // does the point lie inside the polygon? - bool contains = false; - - auto x = p_x_data[polygon_idx]; - auto y = p_y_data[polygon_idx]; - - for (idx_t ring_idx = polygon_offset; ring_idx < polygon_offset + polygon_length; ring_idx++) { - auto ring = ring_entries[ring_idx]; - auto ring_offset = ring.offset; - auto ring_length = ring.length; - - auto x1 = x_data[ring_offset]; - auto y1 = y_data[ring_offset]; - int winding_number = 0; - - for (idx_t coord_idx = ring_offset + 1; coord_idx < ring_offset + ring_length; coord_idx++) { - // foo foo foo - auto x2 = x_data[coord_idx]; - auto y2 = y_data[coord_idx]; - - if (x1 == x2 && y1 == y2) { - x1 = x2; - y1 = y2; - continue; - } - - auto y_min = std::min(y1, y2); - auto y_max = std::max(y1, y2); - - if (y > y_max || y < y_min) { - x1 = x2; - y1 = y2; - continue; - } - - auto side = Side::ON; - double side_v = ((x - x1) * (y2 - y1) - (x2 - x1) * (y - y1)); - if (side_v == 0) { - side = Side::ON; - } else if (side_v < 0) { - side = Side::LEFT; - } else { - side = Side::RIGHT; - } - - if (side == Side::ON && - (((x1 <= x && x < x2) || (x1 >= x && x > x2)) || ((y1 <= y && y < y2) || (y1 >= y && y > y2)))) { - - // return Contains::ON_EDGE; - contains = false; - break; - } else if (side == Side::LEFT && (y1 < y && y <= y2)) { - winding_number++; - } else if (side == Side::RIGHT && (y2 <= y && y < y1)) { - winding_number--; - } - - x1 = x2; - y1 = y2; - } - bool in_ring = winding_number != 0; - if (first) { - if (!in_ring) { - // if the first ring is not inside, then the point is not inside the polygon - contains = false; - break; - } else { - // if the first ring is inside, then the point is inside the polygon - // but might be inside a hole, so we continue - contains = true; - } - } else { - if (in_ring) { - // if the hole is inside, then the point is not inside the polygon - contains = false; - break; - } // else continue - } - first = false; - } - result_data[polygon_idx] = contains; - } - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -static void PolygonContainsPointFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 2); - auto count = args.size(); - auto &in_polygon = args.data[0]; - auto &in_point = args.data[1]; - PointInPolygonOperation(in_point, in_polygon, result, count); -} - -static void PointWithinPolygonFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 2); - auto count = args.size(); - auto &in_point = args.data[0]; - auto &in_polygon = args.data[1]; - PointInPolygonOperation(in_point, in_polygon, result, count); -} - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStContains(DatabaseInstance &db) { - - FunctionBuilder::RegisterScalar(db, "ST_Contains", [](ScalarFunctionBuilder &func) { - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::POLYGON_2D()); - variant.AddParameter("geom2", GeoTypes::POINT_2D()); - variant.SetReturnType(LogicalType::BOOLEAN); - variant.SetFunction(PolygonContainsPointFunction); - }); - }); - - // ST_Within is the inverse of ST_Contains - FunctionBuilder::RegisterScalar(db, "ST_Within", [](ScalarFunctionBuilder &func) { - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::POINT_2D()); - variant.AddParameter("geom2", GeoTypes::POLYGON_2D()); - variant.SetReturnType(LogicalType::BOOLEAN); - variant.SetFunction(PointWithinPolygonFunction); - }); - }); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/scalar/st_dimension.cpp b/spatial/src/spatial/core/functions/scalar/st_dimension.cpp deleted file mode 100644 index 1edcae24..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_dimension.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -static void DimensionFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - - auto count = args.size(); - auto &input = args.data[0]; - - UnaryExecutor::Execute(input, result, count, [&](geometry_t input) { - auto geometry = Geometry::Deserialize(lstate.arena, input); - return Geometry::GetDimension(geometry, false); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( -Returns the dimension of a geometry. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( -select st_dimension('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'::geometry); ----- -2 -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}}; -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStDimension(DatabaseInstance &db) { - ScalarFunctionSet set("ST_Dimension"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::INTEGER, DimensionFunction, nullptr, nullptr, - nullptr, GeometryFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Dimension", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/scalar/st_distance.cpp b/spatial/src/spatial/core/functions/scalar/st_distance.cpp deleted file mode 100644 index 0ec7c179..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_distance.cpp +++ /dev/null @@ -1,163 +0,0 @@ -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/geometry/vertex.hpp" - -namespace spatial { - -namespace core { - -//----------------------------------------------------------------------------- -// Helpers -//----------------------------------------------------------------------------- -template -static PointXY ClosestPointOnSegment(const PointXY &p, const PointXY &p1, const PointXY &p2) { - // If the segment is a Vertex, then return that Vertex - if (p1.ApproxEqualTo(p2)) { - return p1; - } - auto n1 = ((p.x - p1.x) * (p2.x - p1.x) + (p.y - p1.y) * (p2.y - p1.y)); - auto n2 = ((p2.x - p1.x) * (p2.x - p1.x) + (p2.y - p1.y) * (p2.y - p1.y)); - auto r = n1 / n2; - // If r is less than 0, then the Point is outside the segment in the p1 direction - if (r <= 0) { - return p1; - } - // If r is greater than 1, then the Point is outside the segment in the p2 direction - if (r >= 1) { - return p2; - } - // Interpolate between p1 and p2 - return PointXY(p1.x + r * (p2.x - p1.x), p1.y + r * (p2.y - p1.y)); -} - -template -static double DistanceToSegmentSquared(const PointXY &px, const PointXY &ax, const PointXY &bx) { - auto point = ClosestPointOnSegment(px, ax, bx); - auto dx = px.x - point.x; - auto dy = px.y - point.y; - return dx * dx + dy * dy; -} - -//------------------------------------------------------------------------------ -// POINT_2D - POINT_2D -//------------------------------------------------------------------------------ -static void PointToPointDistanceFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 2); - auto &left = args.data[0]; - auto &right = args.data[1]; - auto count = args.size(); - - left.Flatten(count); - right.Flatten(count); - - auto &left_entries = StructVector::GetEntries(left); - auto &right_entries = StructVector::GetEntries(right); - - auto left_x = FlatVector::GetData(*left_entries[0]); - auto left_y = FlatVector::GetData(*left_entries[1]); - auto right_x = FlatVector::GetData(*right_entries[0]); - auto right_y = FlatVector::GetData(*right_entries[1]); - - auto out_data = FlatVector::GetData(result); - for (idx_t i = 0; i < count; i++) { - out_data[i] = std::sqrt(std::pow(left_x[i] - right_x[i], 2) + std::pow(left_y[i] - right_y[i], 2)); - } - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// POINT_2D - LINESTRING_2D -//------------------------------------------------------------------------------ - -static void PointToLineStringDistanceOperation(Vector &in_point, Vector &in_line, Vector &result, idx_t count) { - - // Set up the point vectors - in_point.Flatten(count); - auto &p_children = StructVector::GetEntries(in_point); - auto &p_x = p_children[0]; - auto &p_y = p_children[1]; - auto p_x_data = FlatVector::GetData(*p_x); - auto p_y_data = FlatVector::GetData(*p_y); - - // Set up the line vectors - in_line.Flatten(count); - - auto &inner = ListVector::GetEntry(in_line); - auto &children = StructVector::GetEntries(inner); - auto &x = children[0]; - auto &y = children[1]; - auto x_data = FlatVector::GetData(*x); - auto y_data = FlatVector::GetData(*y); - auto lines = ListVector::GetData(in_line); - - auto result_data = FlatVector::GetData(result); - for (idx_t i = 0; i < count; i++) { - auto offset = lines[i].offset; - auto length = lines[i].length; - - double min_distance = std::numeric_limits::max(); - auto p = PointXY(p_x_data[i], p_y_data[i]); - - // Loop over the segments and find the closes one to the point - for (idx_t j = 0; j < length - 1; j++) { - auto a = PointXY(x_data[offset + j], y_data[offset + j]); - auto b = PointXY(x_data[offset + j + 1], y_data[offset + j + 1]); - - auto distance = DistanceToSegmentSquared(p, a, b); - if (distance < min_distance) { - min_distance = distance; - - if (min_distance == 0) { - break; - } - } - } - result_data[i] = std::sqrt(min_distance); - } - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -static void PointToLineStringDistanceFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 2); - auto &in_point = args.data[0]; - auto &in_line = args.data[1]; - auto count = args.size(); - PointToLineStringDistanceOperation(in_point, in_line, result, count); -} - -static void LineStringToPointDistanceFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 2); - auto &in_line = args.data[0]; - auto &in_point = args.data[1]; - auto count = args.size(); - PointToLineStringDistanceOperation(in_point, in_line, result, count); -} - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStDistance(DatabaseInstance &db) { - ScalarFunctionSet distance_function_set("ST_Distance"); - - distance_function_set.AddFunction(ScalarFunction({GeoTypes::POINT_2D(), GeoTypes::POINT_2D()}, LogicalType::DOUBLE, - PointToPointDistanceFunction)); - distance_function_set.AddFunction(ScalarFunction({GeoTypes::POINT_2D(), GeoTypes::LINESTRING_2D()}, - LogicalType::DOUBLE, PointToLineStringDistanceFunction)); - distance_function_set.AddFunction(ScalarFunction({GeoTypes::LINESTRING_2D(), GeoTypes::POINT_2D()}, - LogicalType::DOUBLE, LineStringToPointDistanceFunction)); - - ExtensionUtil::RegisterFunction(db, distance_function_set); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/scalar/st_distance_sphere.cpp b/spatial/src/spatial/core/functions/scalar/st_distance_sphere.cpp deleted file mode 100644 index 21f08fd6..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_distance_sphere.cpp +++ /dev/null @@ -1,115 +0,0 @@ -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/types.hpp" - -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "duckdb/common/constants.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// Helper -//------------------------------------------------------------------------------ -static inline double HaversineFunction(double lat1_p, double lon1_p, double lat2_p, double lon2_p) { - // Radius of the earth in km - auto R = 6371000.0; - - // Convert to radians - auto lat1 = lat1_p * PI / 180.0; - auto lon1 = lon1_p * PI / 180.0; - auto lat2 = lat2_p * PI / 180.0; - auto lon2 = lon2_p * PI / 180.0; - - auto dlat = lat2 - lat1; - auto dlon = lon2 - lon1; - - auto a = - std::pow(std::sin(dlat / 2.0), 2.0) + std::cos(lat1) * std::cos(lat2) * std::pow(std::sin(dlon / 2.0), 2.0); - auto c = 2.0 * std::atan2(std::sqrt(a), std::sqrt(1.0 - a)); - return R * c; -} - -//------------------------------------------------------------------------------ -// POINT_2D - POINT_2D -//------------------------------------------------------------------------------ -static void PointHaversineFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 2); - auto &left = args.data[0]; - auto &right = args.data[1]; - auto count = args.size(); - - using POINT_TYPE = StructTypeBinary; - using DISTANCE_TYPE = PrimitiveType; - - GenericExecutor::ExecuteBinary( - left, right, result, count, [&](POINT_TYPE left, POINT_TYPE right) { - return HaversineFunction(left.a_val, left.b_val, right.a_val, right.b_val); - }); -} - -//------------------------------------------------------------------------------ -// GEOMETRY - GEOMETRY -//------------------------------------------------------------------------------ -static void GeometryHaversineFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &left = args.data[0]; - auto &right = args.data[1]; - auto count = args.size(); - - BinaryExecutor::Execute( - left, right, result, count, [&](geometry_t left, geometry_t right) { - if (left.GetType() != GeometryType::POINT || right.GetType() != GeometryType::POINT) { - throw InvalidInputException("ST_Distance_Sphere only supports POINT geometries (for now!)"); - } - auto left_geom = Geometry::Deserialize(lstate.arena, left); - auto right_geom = Geometry::Deserialize(lstate.arena, right); - if (Point::IsEmpty(left_geom) || Point::IsEmpty(right_geom)) { - throw InvalidInputException("ST_Distance_Sphere does not support EMPTY geometries"); - } - auto v1 = Point::GetVertex(left_geom); - auto v2 = Point::GetVertex(right_geom); - return HaversineFunction(v1.x, v1.y, v2.x, v2.y); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns the haversine distance between two geometries. - - - Only supports POINT geometries. - - Returns the distance in meters. - - The input is expected to be in WGS84 (EPSG:4326) coordinates, using a [latitude, longitude] axis order. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}}; - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStHaversine(DatabaseInstance &db) { - ScalarFunctionSet distance_function_set("ST_Distance_Sphere"); - - distance_function_set.AddFunction( - ScalarFunction({GeoTypes::POINT_2D(), GeoTypes::POINT_2D()}, LogicalType::DOUBLE, PointHaversineFunction)); - - distance_function_set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY(), GeoTypes::GEOMETRY()}, LogicalType::DOUBLE, - GeometryHaversineFunction, nullptr, nullptr, nullptr, - GeometryFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, distance_function_set); - DocUtil::AddDocumentation(db, "ST_Distance_Sphere", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/scalar/st_dump.cpp b/spatial/src/spatial/core/functions/scalar/st_dump.cpp deleted file mode 100644 index a199c7c5..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_dump.cpp +++ /dev/null @@ -1,149 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" - -namespace spatial { - -namespace core { - -static void DumpFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - auto count = args.size(); - - auto &geom_vec = args.data[0]; - UnifiedVectorFormat geom_format; - geom_vec.ToUnifiedFormat(count, geom_format); - - idx_t total_geom_count = 0; - idx_t total_path_count = 0; - - for (idx_t out_row_idx = 0; out_row_idx < count; out_row_idx++) { - auto in_row_idx = geom_format.sel->get_index(out_row_idx); - - if (!geom_format.validity.RowIsValid(in_row_idx)) { - FlatVector::SetNull(result, out_row_idx, true); - continue; - } - - auto geometry_blob = UnifiedVectorFormat::GetData(geom_format)[in_row_idx]; - auto geometry = Geometry::Deserialize(arena, geometry_blob); - - vector>> stack; - vector>> items; - - stack.emplace_back(geometry, vector()); - - while (!stack.empty()) { - auto current = stack.back(); - auto current_geom = std::get<0>(current); - auto current_path = std::get<1>(current); - - stack.pop_back(); - if (current_geom.IsCollection()) { - for (int32_t i = 0; i < CollectionGeometry::PartCount(current_geom); i++) { - auto path = current_path; - path.push_back(i + 1); // path is 1-indexed - stack.emplace_back(CollectionGeometry::Part(current_geom, i), path); - } - } else { - items.push_back(current); - } - } - - // Finally reverse the results - std::reverse(items.begin(), items.end()); - - // Push to the result vector - auto result_entries = ListVector::GetData(result); - - auto geom_offset = total_geom_count; - auto geom_length = items.size(); - - result_entries[out_row_idx].length = geom_length; - result_entries[out_row_idx].offset = geom_offset; - - total_geom_count += geom_length; - - ListVector::Reserve(result, total_geom_count); - ListVector::SetListSize(result, total_geom_count); - - auto &result_list = ListVector::GetEntry(result); - auto &result_list_children = StructVector::GetEntries(result_list); - auto &result_geom_vec = result_list_children[0]; - auto &result_path_vec = result_list_children[1]; - - // The child geometries must share the same properties as the parent geometry - auto geom_data = FlatVector::GetData(*result_geom_vec); - for (idx_t i = 0; i < geom_length; i++) { - // Write the geometry - auto &item_blob = std::get<0>(items[i]); - geom_data[geom_offset + i] = Geometry::Serialize(item_blob, *result_geom_vec); - - // Now write the paths - auto &path = std::get<1>(items[i]); - auto path_offset = total_path_count; - auto path_length = path.size(); - - total_path_count += path_length; - - ListVector::Reserve(*result_path_vec, total_path_count); - ListVector::SetListSize(*result_path_vec, total_path_count); - - auto path_entries = ListVector::GetData(*result_path_vec); - - path_entries[geom_offset + i].offset = path_offset; - path_entries[geom_offset + i].length = path_length; - - auto &path_data_vec = ListVector::GetEntry(*result_path_vec); - auto path_data = FlatVector::GetData(path_data_vec); - - for (idx_t j = 0; j < path_length; j++) { - path_data[path_offset + j] = path[j]; - } - } - } - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( -Dumps a geometry into a list of sub-geometries and their "path" in the original geometry. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( -select st_dump('MULTIPOINT(1 2,3 4)'::geometry); ----- -[{'geom': 'POINT(1 2)', 'path': [0]}, {'geom': 'POINT(3 4)', 'path': [1]}] -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStDump(DatabaseInstance &db) { - ScalarFunctionSet set("ST_Dump"); - - set.AddFunction( - ScalarFunction({GeoTypes::GEOMETRY()}, - LogicalType::LIST(LogicalType::STRUCT( - {{"geom", GeoTypes::GEOMETRY()}, {"path", LogicalType::LIST(LogicalType::INTEGER)}})), - DumpFunction, nullptr, nullptr, nullptr, GeometryFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Dump", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/scalar/st_endpoint.cpp b/spatial/src/spatial/core/functions/scalar/st_endpoint.cpp deleted file mode 100644 index cf6f3f43..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_endpoint.cpp +++ /dev/null @@ -1,117 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// LINESTRING_2D -//------------------------------------------------------------------------------ -static void LineStringEndPointFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto geom_vec = args.data[0]; - auto count = args.size(); - - UnifiedVectorFormat geom_format; - geom_vec.ToUnifiedFormat(count, geom_format); - - auto line_vertex_entries = ListVector::GetData(geom_vec); - auto &line_vertex_vec = ListVector::GetEntry(geom_vec); - auto &line_vertex_vec_children = StructVector::GetEntries(line_vertex_vec); - auto line_x_data = FlatVector::GetData(*line_vertex_vec_children[0]); - auto line_y_data = FlatVector::GetData(*line_vertex_vec_children[1]); - - auto &point_vertex_children = StructVector::GetEntries(result); - auto point_x_data = FlatVector::GetData(*point_vertex_children[0]); - auto point_y_data = FlatVector::GetData(*point_vertex_children[1]); - - for (idx_t out_row_idx = 0; out_row_idx < count; out_row_idx++) { - auto in_row_idx = geom_format.sel->get_index(out_row_idx); - - if (!geom_format.validity.RowIsValid(in_row_idx)) { - FlatVector::SetNull(result, out_row_idx, true); - continue; - } - - auto line = line_vertex_entries[in_row_idx]; - auto line_offset = line.offset; - auto line_length = line.length; - - if (line_length == 0) { - FlatVector::SetNull(result, out_row_idx, true); - continue; - } - - point_x_data[out_row_idx] = line_x_data[line_offset + line_length - 1]; - point_y_data[out_row_idx] = line_y_data[line_offset + line_length - 1]; - } - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -static void GeometryEndPointFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &geom_vec = args.data[0]; - auto count = args.size(); - - UnaryExecutor::ExecuteWithNulls( - geom_vec, result, count, [&](geometry_t input, ValidityMask &mask, idx_t row_idx) { - if (input.GetType() != GeometryType::LINESTRING) { - mask.SetInvalid(row_idx); - return geometry_t {}; - } - - auto line = Geometry::Deserialize(lstate.arena, input); - auto point_count = LineString::VertexCount(line); - - if (point_count == 0) { - mask.SetInvalid(row_idx); - return geometry_t {}; - } - - auto point = LineString::GetPointAsReference(line, point_count - 1); - return Geometry::Serialize(point, result); - }); -} -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( -Returns the last point of a line. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( -select ST_EndPoint('LINESTRING(0 0, 1 1)'::geometry); --- POINT(1 1) -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}}; - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStEndPoint(DatabaseInstance &db) { - ScalarFunctionSet set("ST_EndPoint"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), GeometryEndPointFunction, nullptr, - nullptr, nullptr, GeometryFunctionLocalState::Init)); - - set.AddFunction(ScalarFunction({GeoTypes::LINESTRING_2D()}, GeoTypes::POINT_2D(), LineStringEndPointFunction)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_EndPoint", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/scalar/st_extent.cpp b/spatial/src/spatial/core/functions/scalar/st_extent.cpp deleted file mode 100644 index 60d4730a..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_extent.cpp +++ /dev/null @@ -1,243 +0,0 @@ -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "spatial/common.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/util/math.hpp" -#include "spatial/core/util/cursor.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// WKB -//------------------------------------------------------------------------------ -static uint8_t ReadByte(Cursor &cursor) { - return cursor.Read(); -} -static double ReadDouble(const bool le, Cursor &cursor) { - return le ? cursor.Read() : cursor.ReadBigEndian(); -} -static uint32_t ReadInt(const bool le, Cursor &cursor) { - return le ? cursor.Read() : cursor.ReadBigEndian(); -} - -static void ReadWKB(Cursor &cursor, Box2D &bbox); - -static void ReadWKB(const bool le, const uint32_t type, const bool has_z, const bool has_m, Cursor &cursor, - Box2D &bbox) { - switch (type) { - case 1: { // POINT - // Points are special in that they can be all-nan (empty) - bool all_nan = true; - double coords[4]; - for (auto i = 0; i < (2 + has_z + has_m); i++) { - coords[i] = ReadDouble(le, cursor); - if (!std::isnan(coords[i])) { - all_nan = false; - } - } - if (!all_nan) { - bbox.Stretch(PointXY(coords[0], coords[1])); - } - } break; - case 2: { // LINESTRING - const auto num_verts = ReadInt(le, cursor); - for (uint32_t i = 0; i < num_verts; i++) { - const auto x = ReadDouble(le, cursor); - const auto y = ReadDouble(le, cursor); - if (has_z) { - ReadDouble(le, cursor); - } - if (has_m) { - ReadDouble(le, cursor); - } - bbox.Stretch(PointXY(x, y)); - } - } break; - case 3: { // POLYGON - const auto num_rings = ReadInt(le, cursor); - for (uint32_t i = 0; i < num_rings; i++) { - const auto num_verts = ReadInt(le, cursor); - for (uint32_t j = 0; j < num_verts; j++) { - const auto x = ReadDouble(le, cursor); - const auto y = ReadDouble(le, cursor); - if (has_z) { - ReadDouble(le, cursor); - } - if (has_m) { - ReadDouble(le, cursor); - } - bbox.Stretch(PointXY(x, y)); - } - } - } break; - case 4: // MULTIPOINT - case 5: // MULTILINESTRING - case 6: // MULTIPOLYGON - case 7: { // GEOMETRYCOLLECTION - const auto num_items = ReadInt(le, cursor); - for (uint32_t i = 0; i < num_items; i++) { - ReadWKB(cursor, bbox); - } - } break; - default: - throw NotImplementedException("WKB Reader: Geometry type %u not supported", type); - } -} - -static void ReadWKB(Cursor &cursor, Box2D &bbox) { - const auto le = ReadByte(cursor); - const auto type = ReadInt(le, cursor); - - // Check for ISO WKB and EWKB Z and M flags - const uint32_t iso_wkb_props = (type & 0xffff) / 1000; - const auto has_z = (iso_wkb_props == 1) || (iso_wkb_props == 3) || ((type & 0x80000000) != 0); - const auto has_m = (iso_wkb_props == 2) || (iso_wkb_props == 3) || ((type & 0x40000000) != 0); - - // Skip SRID if present - const auto has_srid = (type & 0x20000000) != 0; - if (has_srid) { - cursor.Skip(sizeof(uint32_t)); - } - - ReadWKB(le, ((type & 0xffff) % 1000), has_z, has_m, cursor, bbox); -} - -static void WKBExtFunction(DataChunk &args, ExpressionState &state, Vector &result) { - const auto count = args.size(); - - using BOX_TYPE = StructTypeQuaternary; - using WKB_TYPE = PrimitiveType; - - GenericExecutor::ExecuteUnary(args.data[0], result, count, [&](const WKB_TYPE &wkb) { - Box2D bbox; - Cursor cursor(wkb.val); - ReadWKB(cursor, bbox); - return BOX_TYPE {bbox.min.x, bbox.min.y, bbox.max.x, bbox.max.y}; - }); -} - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -static void ExtentFunction(DataChunk &args, ExpressionState &state, Vector &result) { - - auto count = args.size(); - auto &input = args.data[0]; - auto &struct_vec = StructVector::GetEntries(result); - auto min_x_data = FlatVector::GetData(*struct_vec[0]); - auto min_y_data = FlatVector::GetData(*struct_vec[1]); - auto max_x_data = FlatVector::GetData(*struct_vec[2]); - auto max_y_data = FlatVector::GetData(*struct_vec[3]); - - UnifiedVectorFormat input_vdata; - input.ToUnifiedFormat(count, input_vdata); - auto input_data = UnifiedVectorFormat::GetData(input_vdata); - - for (idx_t i = 0; i < count; i++) { - auto row_idx = input_vdata.sel->get_index(i); - if (input_vdata.validity.RowIsValid(row_idx)) { - auto &blob = input_data[row_idx]; - - // Try to get the cached bounding box from the blob - Box2D bbox; - if (blob.TryGetCachedBounds(bbox)) { - min_x_data[i] = bbox.min.x; - min_y_data[i] = bbox.min.y; - max_x_data[i] = bbox.max.x; - max_y_data[i] = bbox.max.y; - } else { - // No bounding box, return null - FlatVector::SetNull(result, i, true); - } - } else { - // Null input, return null - FlatVector::SetNull(result, i, true); - } - } - - if (input.GetVectorType() == VectorType::CONSTANT_VECTOR) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -static void ExtentCachedFunction(DataChunk &args, ExpressionState &state, Vector &result) { - - auto count = args.size(); - auto &input = args.data[0]; - auto &struct_vec = StructVector::GetEntries(result); - auto min_x_data = FlatVector::GetData(*struct_vec[0]); - auto min_y_data = FlatVector::GetData(*struct_vec[1]); - auto max_x_data = FlatVector::GetData(*struct_vec[2]); - auto max_y_data = FlatVector::GetData(*struct_vec[3]); - - UnifiedVectorFormat input_vdata; - input.ToUnifiedFormat(count, input_vdata); - const auto input_data = UnifiedVectorFormat::GetData(input_vdata); - - for (idx_t i = 0; i < count; i++) { - const auto row_idx = input_vdata.sel->get_index(i); - if (input_vdata.validity.RowIsValid(row_idx)) { - auto &blob = input_data[row_idx]; - - // Try to get the cached bounding box from the blob - Box2D bbox; - if (blob.TryGetCachedBounds(bbox)) { - min_x_data[i] = MathUtil::DoubleToFloatDown(bbox.min.x); - min_y_data[i] = MathUtil::DoubleToFloatDown(bbox.min.y); - max_x_data[i] = MathUtil::DoubleToFloatUp(bbox.max.x); - max_y_data[i] = MathUtil::DoubleToFloatUp(bbox.max.y); - } else { - // No bounding box, return null - FlatVector::SetNull(result, i, true); - } - } else { - // Null input, return null - FlatVector::SetNull(result, i, true); - } - } - - if (input.GetVectorType() == VectorType::CONSTANT_VECTOR) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns the minimal bounding box enclosing the input geometry -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}}; - -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStExtent(DatabaseInstance &db) { - ScalarFunctionSet set("ST_Extent"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, GeoTypes::BOX_2D(), ExtentFunction)); - set.AddFunction(ScalarFunction({GeoTypes::WKB_BLOB()}, GeoTypes::BOX_2D(), WKBExtFunction)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Extent", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); - - ScalarFunctionSet approx_set("ST_Extent_Approx"); - approx_set.AddFunction( - ScalarFunction({GeoTypes::GEOMETRY()}, GeoTypes::BOX_2DF(), ExtentCachedFunction, nullptr, nullptr, nullptr)); - ExtensionUtil::RegisterFunction(db, approx_set); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/scalar/st_exteriorring.cpp b/spatial/src/spatial/core/functions/scalar/st_exteriorring.cpp deleted file mode 100644 index 31445045..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_exteriorring.cpp +++ /dev/null @@ -1,145 +0,0 @@ -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/types.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// POLYGON_2D -//------------------------------------------------------------------------------ -static void PolygonExteriorRingFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - auto &poly_vec = args.data[0]; - auto poly_entries = ListVector::GetData(poly_vec); - auto &ring_vec = ListVector::GetEntry(poly_vec); - auto ring_entries = ListVector::GetData(ring_vec); - auto &vertex_vec = ListVector::GetEntry(ring_vec); - auto &vertex_vec_children = StructVector::GetEntries(vertex_vec); - auto poly_x_data = FlatVector::GetData(*vertex_vec_children[0]); - auto poly_y_data = FlatVector::GetData(*vertex_vec_children[1]); - - auto count = args.size(); - UnifiedVectorFormat poly_format; - poly_vec.ToUnifiedFormat(count, poly_format); - - // First figure out how many vertices we need - idx_t total_vertex_count = 0; - for (idx_t i = 0; i < count; i++) { - auto row_idx = poly_format.sel->get_index(i); - if (poly_format.validity.RowIsValid(row_idx)) { - auto poly = poly_entries[row_idx]; - if (poly.length != 0) { - // We only care about the exterior ring (first entry) - auto &ring = ring_entries[poly.offset]; - total_vertex_count += ring.length; - } - } - } - - // Now we can allocate the result vector - auto &line_vec = result; - ListVector::Reserve(line_vec, total_vertex_count); - ListVector::SetListSize(line_vec, total_vertex_count); - - auto line_entries = ListVector::GetData(line_vec); - auto &line_coord_vec = StructVector::GetEntries(ListVector::GetEntry(line_vec)); - auto line_data_x = FlatVector::GetData(*line_coord_vec[0]); - auto line_data_y = FlatVector::GetData(*line_coord_vec[1]); - - // Now we can fill the result vector - idx_t line_data_offset = 0; - for (idx_t i = 0; i < count; i++) { - auto row_idx = poly_format.sel->get_index(i); - if (poly_format.validity.RowIsValid(row_idx)) { - auto poly = poly_entries[row_idx]; - - if (poly.length == 0) { - line_entries[i].offset = 0; - line_entries[i].length = 0; - continue; - } - - // We only care about the exterior ring (first entry) - auto &ring = ring_entries[poly.offset]; - - auto &line_entry = line_entries[i]; - line_entry.offset = line_data_offset; - line_entry.length = ring.length; - - for (idx_t coord_idx = 0; coord_idx < ring.length; coord_idx++) { - line_data_x[line_entry.offset + coord_idx] = poly_x_data[ring.offset + coord_idx]; - line_data_y[line_entry.offset + coord_idx] = poly_y_data[ring.offset + coord_idx]; - } - - line_data_offset += ring.length; - } else { - FlatVector::SetNull(line_vec, i, true); - } - } - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -static void GeometryExteriorRingFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - auto &input = args.data[0]; - auto count = args.size(); - - UnaryExecutor::ExecuteWithNulls( - input, result, count, [&](geometry_t input, ValidityMask &validity, idx_t idx) { - if (input.GetType() != GeometryType::POLYGON) { - validity.SetInvalid(idx); - return geometry_t {}; - } - auto polygon = Geometry::Deserialize(arena, input); - if (Polygon::IsEmpty(polygon)) { - auto empty = LineString::CreateEmpty(polygon.GetProperties().HasZ(), polygon.GetProperties().HasM()); - return Geometry::Serialize(empty, result); - } - auto &shell = Polygon::ExteriorRing(polygon); - return Geometry::Serialize(shell, result); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns the exterior ring (shell) of a polygon geometry. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStExteriorRing(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_ExteriorRing"); - set.AddFunction(ScalarFunction({GeoTypes::POLYGON_2D()}, GeoTypes::LINESTRING_2D(), PolygonExteriorRingFunction)); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), GeometryExteriorRingFunction, nullptr, - nullptr, nullptr, GeometryFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_ExteriorRing", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/scalar/st_flipcoordinates.cpp b/spatial/src/spatial/core/functions/scalar/st_flipcoordinates.cpp deleted file mode 100644 index af6839a3..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_flipcoordinates.cpp +++ /dev/null @@ -1,220 +0,0 @@ -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/types.hpp" - -namespace spatial { - -namespace core { - -// TODO: We should be able to optimize these and avoid the flatten - -//------------------------------------------------------------------------------ -// POINT_2D -//------------------------------------------------------------------------------ -static void PointFlipCoordinatesFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto input = args.data[0]; - auto count = args.size(); - - // TODO: Avoid flatten - input.Flatten(count); - - auto &coords_in = StructVector::GetEntries(input); - auto x_data_in = FlatVector::GetData(*coords_in[0]); - auto y_data_in = FlatVector::GetData(*coords_in[1]); - - auto &coords_out = StructVector::GetEntries(result); - auto x_data_out = FlatVector::GetData(*coords_out[0]); - auto y_data_out = FlatVector::GetData(*coords_out[1]); - - memcpy(x_data_out, y_data_in, count * sizeof(double)); - memcpy(y_data_out, x_data_in, count * sizeof(double)); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// LINESTRING_2D -//------------------------------------------------------------------------------ -static void LineStringFlipCoordinatesFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto input = args.data[0]; - auto count = args.size(); - - // TODO: Avoid flatten - input.Flatten(count); - - auto coord_vec_in = ListVector::GetEntry(input); - auto &coords_in = StructVector::GetEntries(coord_vec_in); - auto x_data_in = FlatVector::GetData(*coords_in[0]); - auto y_data_in = FlatVector::GetData(*coords_in[1]); - - auto coord_count = ListVector::GetListSize(input); - ListVector::Reserve(result, coord_count); - ListVector::SetListSize(result, coord_count); - - auto line_entries_in = ListVector::GetData(input); - auto line_entries_out = ListVector::GetData(result); - memcpy(line_entries_out, line_entries_in, count * sizeof(list_entry_t)); - - auto coord_vec_out = ListVector::GetEntry(result); - auto &coords_out = StructVector::GetEntries(coord_vec_out); - auto x_data_out = FlatVector::GetData(*coords_out[0]); - auto y_data_out = FlatVector::GetData(*coords_out[1]); - - memcpy(x_data_out, y_data_in, coord_count * sizeof(double)); - memcpy(y_data_out, x_data_in, coord_count * sizeof(double)); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// POLYGON_2D -//------------------------------------------------------------------------------ -static void PolygonFlipCoordinatesFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto input = args.data[0]; - auto count = args.size(); - - // TODO: Avoid flatten - input.Flatten(count); - - auto ring_vec_in = ListVector::GetEntry(input); - auto ring_count = ListVector::GetListSize(input); - - auto coord_vec_in = ListVector::GetEntry(ring_vec_in); - auto &coords_in = StructVector::GetEntries(coord_vec_in); - auto x_data_in = FlatVector::GetData(*coords_in[0]); - auto y_data_in = FlatVector::GetData(*coords_in[1]); - - auto coord_count = ListVector::GetListSize(ring_vec_in); - - ListVector::Reserve(result, ring_count); - ListVector::SetListSize(result, ring_count); - auto ring_vec_out = ListVector::GetEntry(result); - ListVector::Reserve(ring_vec_out, coord_count); - ListVector::SetListSize(ring_vec_out, coord_count); - - auto ring_entries_in = ListVector::GetData(input); - auto ring_entries_out = ListVector::GetData(result); - memcpy(ring_entries_out, ring_entries_in, count * sizeof(list_entry_t)); - - auto coord_entries_in = ListVector::GetData(ring_vec_in); - auto coord_entries_out = ListVector::GetData(ring_vec_out); - memcpy(coord_entries_out, coord_entries_in, ring_count * sizeof(list_entry_t)); - - auto coord_vec_out = ListVector::GetEntry(ring_vec_out); - auto &coords_out = StructVector::GetEntries(coord_vec_out); - auto x_data_out = FlatVector::GetData(*coords_out[0]); - auto y_data_out = FlatVector::GetData(*coords_out[1]); - - memcpy(x_data_out, y_data_in, coord_count * sizeof(double)); - memcpy(y_data_out, x_data_in, coord_count * sizeof(double)); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// BOX_2D -//------------------------------------------------------------------------------ -static void BoxFlipCoordinatesFunction(DataChunk &args, ExpressionState &state, Vector &result) { - - auto input = args.data[0]; - auto count = args.size(); - - // TODO: Avoid flatten - input.Flatten(count); - - auto &children_in = StructVector::GetEntries(input); - auto min_x_in = FlatVector::GetData(*children_in[0]); - auto min_y_in = FlatVector::GetData(*children_in[1]); - auto max_x_in = FlatVector::GetData(*children_in[2]); - auto max_y_in = FlatVector::GetData(*children_in[3]); - - auto &children_out = StructVector::GetEntries(result); - auto min_x_out = FlatVector::GetData(*children_out[0]); - auto min_y_out = FlatVector::GetData(*children_out[1]); - auto max_x_out = FlatVector::GetData(*children_out[2]); - auto max_y_out = FlatVector::GetData(*children_out[3]); - - memcpy(min_x_out, min_y_in, count * sizeof(double)); - memcpy(min_y_out, min_x_in, count * sizeof(double)); - memcpy(max_x_out, max_y_in, count * sizeof(double)); - memcpy(max_y_out, max_x_in, count * sizeof(double)); -} - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ - -static void GeometryFlipCoordinatesFunction(DataChunk &args, ExpressionState &state, Vector &result) { - - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - - auto input = args.data[0]; - auto count = args.size(); - - struct op { - static void Case(Geometry::Tags::SinglePartGeometry, Geometry &geom, ArenaAllocator &arena) { - SinglePartGeometry::MakeMutable(geom, arena); - for (idx_t i = 0; i < SinglePartGeometry::VertexCount(geom); i++) { - auto vertex = SinglePartGeometry::GetVertex(geom, i); - std::swap(vertex.x, vertex.y); - SinglePartGeometry::SetVertex(geom, i, vertex); - } - } - static void Case(Geometry::Tags::MultiPartGeometry, Geometry &geom, ArenaAllocator &arena) { - for (uint32_t i = 0; i < MultiPartGeometry::PartCount(geom); i++) { - auto &part = MultiPartGeometry::Part(geom, i); - Geometry::Match(part, arena); - } - } - }; - - UnaryExecutor::Execute(input, result, count, [&](geometry_t input) { - auto geom = Geometry::Deserialize(lstate.arena, input); - Geometry::Match(geom, lstate.arena); - return Geometry::Serialize(geom, result); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns a new geometry with the coordinates of the input geometry "flipped" so that x = y and y = x. -)"; - -static constexpr const char *DOC_EXAMPLE = R"()"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStFlipCoordinates(DatabaseInstance &db) { - ScalarFunctionSet flip_function_set("ST_FlipCoordinates"); - flip_function_set.AddFunction( - ScalarFunction({GeoTypes::POINT_2D()}, GeoTypes::POINT_2D(), PointFlipCoordinatesFunction)); - flip_function_set.AddFunction( - ScalarFunction({GeoTypes::LINESTRING_2D()}, GeoTypes::LINESTRING_2D(), LineStringFlipCoordinatesFunction)); - flip_function_set.AddFunction( - ScalarFunction({GeoTypes::POLYGON_2D()}, GeoTypes::POLYGON_2D(), PolygonFlipCoordinatesFunction)); - flip_function_set.AddFunction(ScalarFunction({GeoTypes::BOX_2D()}, GeoTypes::BOX_2D(), BoxFlipCoordinatesFunction)); - flip_function_set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), - GeometryFlipCoordinatesFunction, nullptr, nullptr, nullptr, - GeometryFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, flip_function_set); - DocUtil::AddDocumentation(db, "ST_FlipCoordinates", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/scalar/st_force.cpp b/spatial/src/spatial/core/functions/scalar/st_force.cpp deleted file mode 100644 index 07e73a50..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_force.cpp +++ /dev/null @@ -1,144 +0,0 @@ -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" - -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/types.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -template -static void GeometryFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - auto count = args.size(); - auto &input = args.data[0]; - - if (HAS_Z && HAS_M) { - auto &z_values = args.data[1]; - auto &m_values = args.data[2]; - TernaryExecutor::Execute( - input, z_values, m_values, result, count, [&](const geometry_t &blob, double default_z, double default_m) { - auto geom = Geometry::Deserialize(arena, blob); - geom.SetVertexType(arena, HAS_Z, HAS_M, default_z, default_m); - return Geometry::Serialize(geom, result); - }); - - } else if (HAS_Z || HAS_M) { - auto &z_values = args.data[1]; - BinaryExecutor::Execute( - input, z_values, result, count, [&](const geometry_t &blob, double default_value) { - auto def_z = HAS_Z ? default_value : 0; - auto def_m = HAS_M ? default_value : 0; - - auto geom = Geometry::Deserialize(arena, blob); - geom.SetVertexType(arena, HAS_Z, HAS_M, def_z, def_m); - return Geometry::Serialize(geom, result); - }); - } else { - UnaryExecutor::Execute(input, result, count, [&](const geometry_t &blob) { - auto geom = Geometry::Deserialize(arena, blob); - geom.SetVertexType(arena, HAS_Z, HAS_M); - return Geometry::Serialize(geom, result); - }); - } -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; - -// FORCE_2D -static constexpr const char *FORCE2D_DOC_DESCRIPTION = R"( -Forces the vertices of a geometry to have X and Y components - -This function will drop any Z and M values from the input geometry, if present. If the input geometry is already 2D, it will be returned as is. -)"; - -static constexpr const char *FORCE2D_DOC_EXAMPLE = R"( - -)"; - -// FORCE_3DZ -static constexpr const char *FORCE3DZ_DOC_DESCRIPTION = R"( -Forces the vertices of a geometry to have X, Y and Z components - -The following cases apply: -- If the input geometry has a M component but no Z component, the M component will be replaced with the new Z value. -- If the input geometry has a Z component but no M component, it will be returned as is. -- If the input geometry has both a Z component and a M component, the M component will be removed. -- Otherwise, if the input geometry has neither a Z or M component, the new Z value will be added to the vertices of the input geometry. -)"; - -static constexpr const char *FORCE3DZ_DOC_EXAMPLE = R"( - -)"; - -// FORCE_3DM -static constexpr const char *FORCE3DM_DOC_DESCRIPTION = R"( -Forces the vertices of a geometry to have X, Y and M components - -The following cases apply: -- If the input geometry has a Z component but no M component, the Z component will be replaced with the new M value. -- If the input geometry has a M component but no Z component, it will be returned as is. -- If the input geometry has both a Z component and a M component, the Z component will be removed. -- Otherwise, if the input geometry has neither a Z or M component, the new M value will be added to the vertices of the input geometry. -)"; - -static constexpr const char *FORCE3DM_DOC_EXAMPLE = R"( - -)"; - -// FORCE_4D -static constexpr const char *FORCE4D_DOC_DESCRIPTION = R"( -Forces the vertices of a geometry to have X, Y, Z and M components - -The following cases apply: -- If the input geometry has a Z component but no M component, the new M value will be added to the vertices of the input geometry. -- If the input geometry has a M component but no Z component, the new Z value will be added to the vertices of the input geometry. -- If the input geometry has both a Z component and a M component, the geometry will be returned as is. -- Otherwise, if the input geometry has neither a Z or M component, the new Z and M values will be added to the vertices of the input geometry. -)"; - -static constexpr const char *FORCE4D_DOC_EXAMPLE = R"( - -)"; - -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStForce(DatabaseInstance &db) { - ScalarFunction st_force2d("ST_Force2D", {GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), - GeometryFunction, nullptr, nullptr, nullptr, - GeometryFunctionLocalState::Init); - ScalarFunction st_force3dz("ST_Force3DZ", {GeoTypes::GEOMETRY(), LogicalType::DOUBLE}, GeoTypes::GEOMETRY(), - GeometryFunction, nullptr, nullptr, nullptr, - GeometryFunctionLocalState::Init); - ScalarFunction st_force3dm("ST_Force3DM", {GeoTypes::GEOMETRY(), LogicalType::DOUBLE}, GeoTypes::GEOMETRY(), - GeometryFunction, nullptr, nullptr, nullptr, - GeometryFunctionLocalState::Init); - ScalarFunction st_force4d("ST_Force4D", {GeoTypes::GEOMETRY(), LogicalType::DOUBLE, LogicalType::DOUBLE}, - GeoTypes::GEOMETRY(), GeometryFunction, nullptr, nullptr, nullptr, - GeometryFunctionLocalState::Init); - - ExtensionUtil::RegisterFunction(db, st_force2d); - ExtensionUtil::RegisterFunction(db, st_force3dz); - ExtensionUtil::RegisterFunction(db, st_force3dm); - ExtensionUtil::RegisterFunction(db, st_force4d); - - DocUtil::AddDocumentation(db, "ST_Force2D", FORCE2D_DOC_DESCRIPTION, FORCE2D_DOC_EXAMPLE, DOC_TAGS); - DocUtil::AddDocumentation(db, "ST_Force3DM", FORCE3DM_DOC_DESCRIPTION, FORCE3DM_DOC_EXAMPLE, DOC_TAGS); - DocUtil::AddDocumentation(db, "ST_Force3DZ", FORCE3DZ_DOC_DESCRIPTION, FORCE3DZ_DOC_EXAMPLE, DOC_TAGS); - DocUtil::AddDocumentation(db, "ST_Force4D", FORCE4D_DOC_DESCRIPTION, FORCE4D_DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/scalar/st_geometrytype.cpp b/spatial/src/spatial/core/functions/scalar/st_geometrytype.cpp deleted file mode 100644 index 2acfe200..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_geometrytype.cpp +++ /dev/null @@ -1,121 +0,0 @@ -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/types.hpp" - -namespace spatial { - -namespace core { - -static unique_ptr GeometryTypeFunctionBind(ClientContext &context, ScalarFunction &bound_function, - vector> &arguments) { - // Create an enum type for all geometry types - // Ensure that these are in the same order as the GeometryType enum - vector enum_values = {"POINT", "LINESTRING", "POLYGON", "MULTIPOINT", "MULTILINESTRING", "MULTIPOLYGON", - "GEOMETRYCOLLECTION", - // or... - "UNKNOWN"}; - - bound_function.return_type = GeoTypes::CreateEnumType("GEOMETRY_TYPE", enum_values); - - return nullptr; -} - -//------------------------------------------------------------------------------ -// Point2D -//------------------------------------------------------------------------------ -static void Point2DTypeFunction(DataChunk &args, ExpressionState &state, Vector &result) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - *ConstantVector::GetData(result) = static_cast(GeometryType::POINT); -} - -//------------------------------------------------------------------------------ -// LineString2D -//------------------------------------------------------------------------------ -static void Linestring2DTypeFunction(DataChunk &args, ExpressionState &state, Vector &result) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - *ConstantVector::GetData(result) = static_cast(GeometryType::LINESTRING); -} - -//------------------------------------------------------------------------------ -// Polygon2D -//------------------------------------------------------------------------------ -static void Polygon2DTypeFunction(DataChunk &args, ExpressionState &state, Vector &result) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - *ConstantVector::GetData(result) = static_cast(GeometryType::POLYGON); -} - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -static void GeometryTypeFunction(DataChunk &args, ExpressionState &state, Vector &result) { - const auto count = args.size(); - auto &input = args.data[0]; - - UnaryExecutor::Execute( - input, result, count, [&](const geometry_t &geom) { return static_cast(geom.GetType()); }); -} - -//------------------------------------------------------------------------------ -// WKB -//------------------------------------------------------------------------------ -static void WKBTypeFunction(DataChunk &args, ExpressionState &state, Vector &result) { - const auto count = args.size(); - auto &input = args.data[0]; - - UnaryExecutor::Execute(input, result, count, [&](const string_t &blob) { - Cursor cursor(blob); - const auto le = cursor.Read(); - const auto type = le ? cursor.Read() : cursor.ReadBigEndian(); - const auto normalized_type = (type & 0xffff) % 1000; - if (normalized_type == 0 || normalized_type > 7) { - throw InvalidInputException("WKB type '%d' is not a supported geometry type", type); - } - - // Return the geometry type - // Subtract 1 since the WKB type is 1-indexed - return static_cast(normalized_type - 1); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns a 'GEOMETRY_TYPE' enum identifying the input geometry type. Possible enum return types are: `POINT`, `LINESTRING`, `POLYGON`, `MULTIPOINT`, `MULTILINESTRING`, `MULTIPOLYGON`, and `GEOMETRYCOLLECTION`. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( -SELECT DISTINCT ST_GeometryType(ST_GeomFromText('POINT(1 1)')); ----- -POINT -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}}; -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStGeometryType(DatabaseInstance &db) { - - ScalarFunctionSet geometry_type_set("ST_GeometryType"); - geometry_type_set.AddFunction( - ScalarFunction({GeoTypes::POINT_2D()}, LogicalType::ANY, Point2DTypeFunction, GeometryTypeFunctionBind)); - geometry_type_set.AddFunction(ScalarFunction({GeoTypes::LINESTRING_2D()}, LogicalType::ANY, - Linestring2DTypeFunction, GeometryTypeFunctionBind)); - geometry_type_set.AddFunction( - ScalarFunction({GeoTypes::POLYGON_2D()}, LogicalType::ANY, Polygon2DTypeFunction, GeometryTypeFunctionBind)); - geometry_type_set.AddFunction( - ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::ANY, GeometryTypeFunction, GeometryTypeFunctionBind)); - - geometry_type_set.AddFunction( - ScalarFunction({GeoTypes::WKB_BLOB()}, LogicalType::ANY, WKBTypeFunction, GeometryTypeFunctionBind)); - - ExtensionUtil::RegisterFunction(db, geometry_type_set); - DocUtil::AddDocumentation(db, "ST_GeometryType", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/scalar/st_geomfromhexwkb.cpp b/spatial/src/spatial/core/functions/scalar/st_geomfromhexwkb.cpp deleted file mode 100644 index 3192d5c7..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_geomfromhexwkb.cpp +++ /dev/null @@ -1,100 +0,0 @@ -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/types/blob.hpp" - -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/geometry/wkb_writer.hpp" -#include "spatial/core/geometry/wkb_reader.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// HEX WKB -> GEOMETRY -//------------------------------------------------------------------------------ - -void GeometryFromHEXWKB(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - auto &input = args.data[0]; - auto count = args.size(); - - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - WKBReader reader(lstate.arena); - - UnaryExecutor::Execute(input, result, count, [&](string_t input_hex) { - auto hex_size = input_hex.GetSize(); - auto hex_ptr = const_data_ptr_cast(input_hex.GetData()); - - if (hex_size % 2 == 1) { - throw InvalidInputException("Invalid HEX WKB string, length must be even."); - } - - auto blob_size = hex_size / 2; - - unique_ptr wkb_blob(new data_t[blob_size]); - auto blob_ptr = wkb_blob.get(); - auto blob_idx = 0; - for (idx_t hex_idx = 0; hex_idx < hex_size; hex_idx += 2) { - auto byte_a = Blob::HEX_MAP[hex_ptr[hex_idx]]; - auto byte_b = Blob::HEX_MAP[hex_ptr[hex_idx + 1]]; - D_ASSERT(byte_a != -1); - D_ASSERT(byte_b != -1); - - blob_ptr[blob_idx++] = (byte_a << 4) + byte_b; - } - - auto geom = reader.Deserialize(blob_ptr, blob_size); - return Geometry::Serialize(geom, result); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "conversion"}}; - -// HexWKB -static constexpr const char *DOC_DESCRIPTION = R"( -Creates a GEOMETRY from a HEXWKB string -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -// HexEWKB -static constexpr const char *EXTENDED_DOC_DESCRIPTION = R"( - Deserialize a GEOMETRY from a HEXEWKB encoded string -)"; - -static constexpr const char *EXTENDED_DOC_EXAMPLE = R"( - -)"; - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStGeomFromHEXWKB(DatabaseInstance &db) { - ScalarFunction hexwkb("ST_GeomFromHEXWKB", {LogicalType::VARCHAR}, GeoTypes::GEOMETRY(), GeometryFromHEXWKB, - nullptr, nullptr, nullptr, GeometryFunctionLocalState::Init); - ExtensionUtil::RegisterFunction(db, hexwkb); - DocUtil::AddDocumentation(db, "ST_GeomFromHEXWKB", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); - - // Our WKB reader also parses EWKB, even though it will just ignore SRID's. - // so we'll just add an alias for now. In the future, once we actually handle - // EWKB and store SRID's, these functions should differentiate between - // the two formats. - ScalarFunction ewkb("ST_GeomFromHEXEWKB", {LogicalType::VARCHAR}, GeoTypes::GEOMETRY(), GeometryFromHEXWKB, nullptr, - nullptr, nullptr, GeometryFunctionLocalState::Init); - ExtensionUtil::RegisterFunction(db, ewkb); - DocUtil::AddDocumentation(db, "ST_GeomFromHEXEWKB", EXTENDED_DOC_DESCRIPTION, EXTENDED_DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/scalar/st_geomfromtext.cpp b/spatial/src/spatial/core/functions/scalar/st_geomfromtext.cpp deleted file mode 100644 index 50962670..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_geomfromtext.cpp +++ /dev/null @@ -1,120 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/wkt_reader.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/planner/expression/bound_function_expression.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" -#include "duckdb/execution/expression_executor.hpp" - -namespace spatial { - -namespace core { - -struct GeometryFromWKTBindData : public FunctionData { - bool ignore_invalid = false; - - explicit GeometryFromWKTBindData(bool ignore_invalid) : ignore_invalid(ignore_invalid) { - } - -public: - unique_ptr Copy() const override { - return make_uniq(ignore_invalid); - } - bool Equals(const FunctionData &other_p) const override { - return true; - } -}; - -// TODO: we should implement our own WKT parser asap. This is a temporary and really inefficient solution. -// TODO: Ignore_invalid doesnt make sense here, we should just use a try_cast instead. -static void GeometryFromWKTFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto count = args.size(); - auto input = args.data[0]; - - auto &func_expr = state.expr.Cast(); - const auto &info = func_expr.bind_info->Cast(); - - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - - WKTReader reader(arena); - UnaryExecutor::ExecuteWithNulls(input, result, count, - [&](string_t &wkt, ValidityMask &mask, idx_t idx) { - try { - auto geom = reader.Parse(wkt); - return Geometry::Serialize(geom, result); - } catch (InvalidInputException &error) { - if (!info.ignore_invalid) { - throw; - } - mask.SetInvalid(idx); - return geometry_t {}; - } - }); -} - -static unique_ptr GeometryFromWKTBind(ClientContext &context, ScalarFunction &bound_function, - vector> &arguments) { - if (arguments.empty()) { - throw InvalidInputException("ST_GeomFromText requires at least one argument"); - } - auto &input_type = arguments[0]->return_type; - if (input_type.id() != LogicalTypeId::VARCHAR) { - throw InvalidInputException("ST_GeomFromText requires a string argument"); - } - - bool ignore_invalid = false; - for (idx_t i = 1; i < arguments.size(); i++) { - auto &arg = arguments[i]; - if (arg->HasParameter()) { - throw InvalidInputException("Parameters are not supported in ST_GeomFromText optional arguments"); - } - if (!arg->IsFoldable()) { - throw InvalidInputException( - "Non-constant arguments are not supported in ST_GeomFromText optional arguments"); - } - if (arg->alias == "ignore_invalid") { - if (arg->return_type.id() != LogicalTypeId::BOOLEAN) { - throw InvalidInputException("ST_GeomFromText optional argument 'ignore_invalid' must be a boolean"); - } - ignore_invalid = BooleanValue::Get(ExpressionExecutor::EvaluateScalar(context, *arg)); - } - } - return make_uniq(ignore_invalid); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Deserializes a GEOMETRY from a WKT string, optionally ignoring invalid geometries -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr const DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "conversion"}}; - -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ - -void CoreScalarFunctions::RegisterStGeomFromText(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_GeomFromText"); - set.AddFunction(ScalarFunction({LogicalType::VARCHAR}, core::GeoTypes::GEOMETRY(), GeometryFromWKTFunction, - GeometryFromWKTBind, nullptr, nullptr, GeometryFunctionLocalState::Init)); - set.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::BOOLEAN}, core::GeoTypes::GEOMETRY(), - GeometryFromWKTFunction, GeometryFromWKTBind, nullptr, nullptr, - GeometryFunctionLocalState::Init)); - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_GeomFromText", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/scalar/st_geomfromwkb.cpp b/spatial/src/spatial/core/functions/scalar/st_geomfromwkb.cpp deleted file mode 100644 index 326fcc79..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_geomfromwkb.cpp +++ /dev/null @@ -1,325 +0,0 @@ -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/geometry/wkb_reader.hpp" -#include "spatial/core/types.hpp" - -namespace spatial { - -namespace core { - -struct SimpleWKBReader { - const char *data = nullptr; - uint32_t cursor = 0; - uint32_t length = 0; - - SimpleWKBReader(const char *data, uint32_t length) : data(data), length(length) { - } - - vector ReadLine() { - auto byte_order = ReadByte(); - D_ASSERT(byte_order == 1); // Little endian - (void)byte_order; - auto type = ReadInt(); - D_ASSERT(type == 2); // LineString - (void)type; - auto num_points = ReadInt(); - D_ASSERT(num_points > 0); - D_ASSERT(cursor + num_points * 2 * sizeof(double) <= length); - vector result; - for (uint32_t i = 0; i < num_points; i++) { - auto x = ReadDouble(); - auto y = ReadDouble(); - result.emplace_back(x, y); - } - return result; - } - - VertexXY ReadPoint() { - auto byte_order = ReadByte(); - D_ASSERT(byte_order == 1); // Little endian - (void)byte_order; - auto type = ReadInt(); - D_ASSERT(type == 1); // Point - (void)type; - auto x = ReadDouble(); - auto y = ReadDouble(); - return VertexXY(x, y); - } - - vector> ReadPolygon() { - auto byte_order = ReadByte(); - D_ASSERT(byte_order == 1); // Little endian - (void)byte_order; - auto type = ReadInt(); - D_ASSERT(type == 3); // Polygon - (void)type; - auto num_rings = ReadInt(); - D_ASSERT(num_rings > 0); - vector> result; - for (uint32_t i = 0; i < num_rings; i++) { - auto num_points = ReadInt(); - D_ASSERT(num_points > 0); - D_ASSERT(cursor + num_points * 2 * sizeof(double) <= length); - vector ring; - for (uint32_t j = 0; j < num_points; j++) { - auto x = ReadDouble(); - auto y = ReadDouble(); - ring.emplace_back(x, y); - } - result.push_back(ring); - } - return result; - } - - uint8_t ReadByte() { - D_ASSERT(cursor + sizeof(uint8_t) <= length); - uint8_t result = data[cursor]; - cursor += sizeof(uint8_t); - return result; - } - - uint32_t ReadInt() { - D_ASSERT(cursor + sizeof(uint32_t) <= length); - // Read uint32_t in little endian - uint32_t result = 0; - result |= (uint32_t)data[cursor + 0] << 0 & 0x000000FF; - result |= (uint32_t)data[cursor + 1] << 8 & 0x0000FF00; - result |= (uint32_t)data[cursor + 2] << 16 & 0x00FF0000; - result |= (uint32_t)data[cursor + 3] << 24 & 0xFF000000; - cursor += sizeof(uint32_t); - return result; - } - - double ReadDouble() { - D_ASSERT(cursor + sizeof(double) <= length); - // Read double in little endian - uint64_t result = 0; - result |= (uint64_t)data[cursor + 0] << 0 & 0x00000000000000FF; - result |= (uint64_t)data[cursor + 1] << 8 & 0x000000000000FF00; - result |= (uint64_t)data[cursor + 2] << 16 & 0x0000000000FF0000; - result |= (uint64_t)data[cursor + 3] << 24 & 0x00000000FF000000; - result |= (uint64_t)data[cursor + 4] << 32 & 0x000000FF00000000; - result |= (uint64_t)data[cursor + 5] << 40 & 0x0000FF0000000000; - result |= (uint64_t)data[cursor + 6] << 48 & 0x00FF000000000000; - result |= (uint64_t)data[cursor + 7] << 56 & 0xFF00000000000000; - cursor += sizeof(uint64_t); - double result_double; - memcpy(&result_double, &result, sizeof(double)); - return result_double; - } -}; - -//------------------------------------------------------------------------------ -// POINT_2D -//------------------------------------------------------------------------------ -static void Point2DFromWKBFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - auto count = args.size(); - auto &wkb_blobs = args.data[0]; - wkb_blobs.Flatten(count); - - auto &point_children = StructVector::GetEntries(result); - auto x_data = FlatVector::GetData(*point_children[0]); - auto y_data = FlatVector::GetData(*point_children[1]); - - auto wkb_data = FlatVector::GetData(wkb_blobs); - - for (idx_t i = 0; i < count; i++) { - auto wkb = wkb_data[i]; - auto wkb_ptr = wkb.GetDataUnsafe(); - auto wkb_size = wkb.GetSize(); - - SimpleWKBReader reader(wkb_ptr, wkb_size); - auto point = reader.ReadPoint(); - x_data[i] = point.x; - y_data[i] = point.y; - } - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// LINESTRING_2D -//------------------------------------------------------------------------------ -static void LineString2DFromWKBFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - auto count = args.size(); - auto &wkb_blobs = args.data[0]; - wkb_blobs.Flatten(count); - - auto &inner = ListVector::GetEntry(result); - auto lines = ListVector::GetData(result); - - auto wkb_data = FlatVector::GetData(wkb_blobs); - - idx_t total_size = 0; - for (idx_t i = 0; i < count; i++) { - auto wkb = wkb_data[i]; - auto wkb_ptr = wkb.GetDataUnsafe(); - auto wkb_size = wkb.GetSize(); - - SimpleWKBReader reader(wkb_ptr, wkb_size); - auto line = reader.ReadLine(); - auto line_size = line.size(); - - lines[i].offset = total_size; - lines[i].length = line_size; - - ListVector::Reserve(result, total_size + line_size); - - // Since ListVector::Reserve potentially reallocates, we need to re-fetch the inner vector pointers - auto &children = StructVector::GetEntries(inner); - auto &x_child = children[0]; - auto &y_child = children[1]; - auto x_data = FlatVector::GetData(*x_child); - auto y_data = FlatVector::GetData(*y_child); - - for (idx_t j = 0; j < line_size; j++) { - x_data[total_size + j] = line[j].x; - y_data[total_size + j] = line[j].y; - } - - total_size += line_size; - } - - ListVector::SetListSize(result, total_size); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// POLYGON_2D -//------------------------------------------------------------------------------ -static void Polygon2DFromWKBFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - auto count = args.size(); - - // Set up input data - auto &wkb_blobs = args.data[0]; - wkb_blobs.Flatten(count); - auto wkb_data = FlatVector::GetData(wkb_blobs); - - // Set up output data - auto &ring_vec = ListVector::GetEntry(result); - auto polygons = ListVector::GetData(result); - - idx_t total_ring_count = 0; - idx_t total_point_count = 0; - - for (idx_t i = 0; i < count; i++) { - auto wkb = wkb_data[i]; - auto wkb_ptr = wkb.GetDataUnsafe(); - auto wkb_size = wkb.GetSize(); - - SimpleWKBReader reader(wkb_ptr, wkb_size); - auto polygon = reader.ReadPolygon(); - auto ring_count = polygon.size(); - - polygons[i].offset = total_ring_count; - polygons[i].length = ring_count; - - ListVector::Reserve(result, total_ring_count + ring_count); - // Since ListVector::Reserve potentially reallocates, we need to re-fetch the inner vector pointers - - for (idx_t j = 0; j < ring_count; j++) { - auto ring = polygon[j]; - auto point_count = ring.size(); - - ListVector::Reserve(ring_vec, total_point_count + point_count); - auto ring_entries = ListVector::GetData(ring_vec); - auto &inner = ListVector::GetEntry(ring_vec); - - auto &children = StructVector::GetEntries(inner); - auto &x_child = children[0]; - auto &y_child = children[1]; - auto x_data = FlatVector::GetData(*x_child); - auto y_data = FlatVector::GetData(*y_child); - - for (idx_t k = 0; k < point_count; k++) { - x_data[total_point_count + k] = ring[k].x; - y_data[total_point_count + k] = ring[k].y; - } - - ring_entries[total_ring_count + j].offset = total_point_count; - ring_entries[total_ring_count + j].length = point_count; - - total_point_count += point_count; - } - total_ring_count += ring_count; - } - - ListVector::SetListSize(result, total_ring_count); - ListVector::SetListSize(ring_vec, total_point_count); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -static void GeometryFromWKBFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - auto &input = args.data[0]; - auto count = args.size(); - - WKBReader reader(arena); - UnaryExecutor::Execute(input, result, count, [&](string_t input) { - auto geom = reader.Deserialize(input); - return Geometry::Serialize(geom, result); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ - -static constexpr const char *DOC_DESCRIPTION = R"( - Deserializes a GEOMETRY from a WKB encoded blob -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr const DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "conversion"}}; - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStGeomFromWKB(DatabaseInstance &db) { - - ScalarFunction point2d_from_wkb_info("ST_Point2DFromWKB", {GeoTypes::WKB_BLOB()}, GeoTypes::POINT_2D(), - Point2DFromWKBFunction); - ExtensionUtil::RegisterFunction(db, point2d_from_wkb_info); - - ScalarFunction linestring2d_from_wkb_info("ST_LineString2DFromWKB", {GeoTypes::WKB_BLOB()}, - GeoTypes::LINESTRING_2D(), LineString2DFromWKBFunction); - ExtensionUtil::RegisterFunction(db, linestring2d_from_wkb_info); - - ScalarFunction polygon2d_from_wkb("ST_Polygon2DFromWKB", {GeoTypes::WKB_BLOB()}, GeoTypes::POLYGON_2D(), - Polygon2DFromWKBFunction); - ExtensionUtil::RegisterFunction(db, polygon2d_from_wkb); - - ScalarFunctionSet st_geom_from_wkb("ST_GeomFromWKB"); - st_geom_from_wkb.AddFunction(ScalarFunction({GeoTypes::WKB_BLOB()}, GeoTypes::GEOMETRY(), GeometryFromWKBFunction, - nullptr, nullptr, nullptr, GeometryFunctionLocalState::Init)); - st_geom_from_wkb.AddFunction(ScalarFunction({LogicalType::BLOB}, GeoTypes::GEOMETRY(), GeometryFromWKBFunction, - nullptr, nullptr, nullptr, GeometryFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, st_geom_from_wkb); - DocUtil::AddDocumentation(db, "ST_GeomFromWKB", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/scalar/st_has.cpp b/spatial/src/spatial/core/functions/scalar/st_has.cpp deleted file mode 100644 index c611d4f9..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_has.cpp +++ /dev/null @@ -1,207 +0,0 @@ -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" - -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/types.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -template -static void GeometryHasFunction(DataChunk &args, ExpressionState &state, Vector &result) { - const auto count = args.size(); - auto &input = args.data[0]; - UnaryExecutor::Execute(input, result, count, [&](const geometry_t &blob) { - const auto props = blob.GetProperties(); - return HAS_Z_NOT_M ? props.HasZ() : props.HasM(); - }); -} - -static void GeometryZMFlagFunction(DataChunk &args, ExpressionState &state, Vector &result) { - const auto count = args.size(); - auto &input = args.data[0]; - UnaryExecutor::Execute(input, result, count, [&](const geometry_t &blob) { - const auto props = blob.GetProperties(); - const auto has_z = props.HasZ(); - const auto has_m = props.HasM(); - - if (has_z && has_m) { - return 3; - } - if (has_z) { - return 2; - } - if (has_m) { - return 1; - } - return 0; - }); -} - -//------------------------------------------------------------------------------ -// WKB -//------------------------------------------------------------------------------ -template -static void WKBHasFunction(DataChunk &args, ExpressionState &state, Vector &result) { - const auto count = args.size(); - auto &input = args.data[0]; - UnaryExecutor::Execute(input, result, count, [&](const string_t &blob) { - Cursor cursor(blob); - const auto le = cursor.Read(); - const auto type = le ? cursor.Read() : cursor.ReadBigEndian(); - // Check for ISO WKB and EWKB Z and M flags - const uint32_t iso_wkb_props = (type & 0xffff) / 1000; - return HAS_Z_NOT_M ? (iso_wkb_props == 1) || (iso_wkb_props == 3) || ((type & 0x80000000) != 0) - : (iso_wkb_props == 2) || (iso_wkb_props == 3) || ((type & 0x40000000) != 0); - }); -} - -static void WKBZMFlagFunction(DataChunk &args, ExpressionState &state, Vector &result) { - const auto count = args.size(); - auto &input = args.data[0]; - UnaryExecutor::Execute(input, result, count, [&](const string_t &blob) { - Cursor cursor(blob); - const auto le = cursor.Read(); - const auto type = le ? cursor.Read() : cursor.ReadBigEndian(); - // Check for ISO WKB and EWKB Z and M flags - const uint32_t iso_wkb_props = (type & 0xffff) / 1000; - const auto has_z = (iso_wkb_props == 1) || (iso_wkb_props == 3) || ((type & 0x80000000) != 0); - const auto has_m = (iso_wkb_props == 2) || (iso_wkb_props == 3) || ((type & 0x40000000) != 0); - - if (has_z && has_m) { - return 3; - } - if (has_z) { - return 2; - } - if (has_m) { - return 1; - } - return 0; - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}}; - -// HAS_Z -static constexpr const char *HAS_Z_DESCRIPTION = R"( - Check if the input geometry has Z values. -)"; - -static constexpr const char *HAS_Z_EXAMPLE = R"( - -- HasZ for a 2D geometry - SELECT ST_HasZ(ST_GeomFromText('POINT(1 1)')); - ---- - false - - -- HasZ for a 3DZ geometry - SELECT ST_HasZ(ST_GeomFromText('POINT Z(1 1 1)')); - ---- - true - - -- HasZ for a 3DM geometry - SELECT ST_HasZ(ST_GeomFromText('POINT M(1 1 1)')); - ---- - false - - -- HasZ for a 4D geometry - SELECT ST_HasZ(ST_GeomFromText('POINT ZM(1 1 1 1)')); - ---- - true -)"; - -// HAS_M -static constexpr const char *HAS_M_DESCRIPTION = R"( - Check if the input geometry has M values. -)"; - -static constexpr const char *HAS_M_EXAMPLE = R"( - -- HasM for a 2D geometry - SELECT ST_HasM(ST_GeomFromText('POINT(1 1)')); - ---- - false - - -- HasM for a 3DZ geometry - SELECT ST_HasM(ST_GeomFromText('POINT Z(1 1 1)')); - ---- - false - - -- HasM for a 3DM geometry - SELECT ST_HasM(ST_GeomFromText('POINT M(1 1 1)')); - ---- - true - - -- HasM for a 4D geometry - SELECT ST_HasM(ST_GeomFromText('POINT ZM(1 1 1 1)')); - ---- - true -)"; - -// ZMFLAG -static constexpr const char *ZMFLAG_DESCRIPTION = R"( - Returns a flag indicating the presence of Z and M values in the input geometry. - 0 = No Z or M values - 1 = M values only - 2 = Z values only - 3 = Z and M values -)"; - -static constexpr const char *ZMFLAG_EXAMPLE = R"( - -- ZMFlag for a 2D geometry - SELECT ST_ZMFlag(ST_GeomFromText('POINT(1 1)')); - ---- - 0 - - -- ZMFlag for a 3DZ geometry - SELECT ST_ZMFlag(ST_GeomFromText('POINT Z(1 1 1)')); - ---- - 2 - - -- ZMFlag for a 3DM geometry - SELECT ST_ZMFlag(ST_GeomFromText('POINT M(1 1 1)')); - ---- - 1 - - -- ZMFlag for a 4D geometry - SELECT ST_ZMFlag(ST_GeomFromText('POINT ZM(1 1 1 1)')); - ---- - 3 -)"; - -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStHas(DatabaseInstance &db) { - ScalarFunctionSet st_hasz("ST_HasZ"); - st_hasz.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::BOOLEAN, GeometryHasFunction)); - st_hasz.AddFunction(ScalarFunction({GeoTypes::WKB_BLOB()}, LogicalType::BOOLEAN, WKBHasFunction)); - - ScalarFunctionSet st_hasm("ST_HasM"); - st_hasm.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::BOOLEAN, GeometryHasFunction)); - st_hasm.AddFunction(ScalarFunction({GeoTypes::WKB_BLOB()}, LogicalType::BOOLEAN, WKBHasFunction)); - - ScalarFunctionSet st_zmflag("ST_ZMFlag"); - st_zmflag.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::UTINYINT, GeometryZMFlagFunction)); - st_zmflag.AddFunction(ScalarFunction({GeoTypes::WKB_BLOB()}, LogicalType::UTINYINT, WKBZMFlagFunction)); - - ExtensionUtil::RegisterFunction(db, st_hasz); - ExtensionUtil::RegisterFunction(db, st_hasm); - ExtensionUtil::RegisterFunction(db, st_zmflag); - - DocUtil::AddDocumentation(db, "ST_HasZ", HAS_Z_DESCRIPTION, HAS_Z_EXAMPLE, DOC_TAGS); - DocUtil::AddDocumentation(db, "ST_HasM", HAS_M_DESCRIPTION, HAS_M_EXAMPLE, DOC_TAGS); - DocUtil::AddDocumentation(db, "ST_ZMFlag", ZMFLAG_DESCRIPTION, ZMFLAG_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/scalar/st_hilbert.cpp b/spatial/src/spatial/core/functions/scalar/st_hilbert.cpp deleted file mode 100644 index 4a4eb74d..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_hilbert.cpp +++ /dev/null @@ -1,257 +0,0 @@ -#include "duckdb/common/constants.hpp" -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "spatial/common.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/util/math.hpp" -#include "spatial/core/types.hpp" - -#include - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// Hilbert Curve Encoding -// From (Public Domain): https://github.com/rawrunprotected/hilbert_curves -//------------------------------------------------------------------------------ -inline uint32_t Interleave(uint32_t x) { - x = (x | (x << 8)) & 0x00FF00FF; - x = (x | (x << 4)) & 0x0F0F0F0F; - x = (x | (x << 2)) & 0x33333333; - x = (x | (x << 1)) & 0x55555555; - return x; -} - -inline uint32_t HilbertEncode(uint32_t n, uint32_t x, uint32_t y) { - x = x << (16 - n); - y = y << (16 - n); - - // Initial prefix scan round, prime with x and y - uint32_t a = x ^ y; - uint32_t b = 0xFFFF ^ a; - uint32_t c = 0xFFFF ^ (x | y); - uint32_t d = x & (y ^ 0xFFFF); - uint32_t A = a | (b >> 1); - uint32_t B = (a >> 1) ^ a; - uint32_t C = ((c >> 1) ^ (b & (d >> 1))) ^ c; - uint32_t D = ((a & (c >> 1)) ^ (d >> 1)) ^ d; - - a = A; - b = B; - c = C; - d = D; - A = ((a & (a >> 2)) ^ (b & (b >> 2))); - B = ((a & (b >> 2)) ^ (b & ((a ^ b) >> 2))); - C ^= ((a & (c >> 2)) ^ (b & (d >> 2))); - D ^= ((b & (c >> 2)) ^ ((a ^ b) & (d >> 2))); - - a = A; - b = B; - c = C; - d = D; - A = ((a & (a >> 4)) ^ (b & (b >> 4))); - B = ((a & (b >> 4)) ^ (b & ((a ^ b) >> 4))); - C ^= ((a & (c >> 4)) ^ (b & (d >> 4))); - D ^= ((b & (c >> 4)) ^ ((a ^ b) & (d >> 4))); - - // Final round and projection - a = A; - b = B; - c = C; - d = D; - C ^= ((a & (c >> 8)) ^ (b & (d >> 8))); - D ^= ((b & (c >> 8)) ^ ((a ^ b) & (d >> 8))); - - // Undo transformation prefix scan - a = C ^ (C >> 1); - b = D ^ (D >> 1); - - // Recover index bits - uint32_t i0 = x ^ y; - uint32_t i1 = b | (0xFFFF ^ (i0 | a)); - - return ((Interleave(i1) << 1) | Interleave(i0)) >> (32 - 2 * n); -} - -static uint32_t FloatToUint32(float f) { - if (std::isnan(f)) { - return 0xFFFFFFFF; - } - uint32_t res; - memcpy(&res, &f, sizeof(res)); - if ((res & 0x80000000) != 0) { - res ^= 0xFFFFFFFF; - } else { - res |= 0x80000000; - } - return res; -} - -//------------------------------------------------------------------------------ -// Coordinates -//------------------------------------------------------------------------------ -static void HilbertEncodeCoordsFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &x_in = args.data[0]; - auto &y_in = args.data[1]; - auto &box_in = args.data[2]; - - const auto count = args.size(); - - using DOUBLE_TYPE = PrimitiveType; - using UINT32_TYPE = PrimitiveType; - using BOX_TYPE = StructTypeQuaternary; - - auto constexpr max_hilbert = std::numeric_limits::max(); - - GenericExecutor::ExecuteTernary( - x_in, y_in, box_in, result, count, [&](DOUBLE_TYPE x, DOUBLE_TYPE y, BOX_TYPE &box) { - const auto hilbert_width = max_hilbert / (box.c_val - box.a_val); - const auto hilbert_height = max_hilbert / (box.d_val - box.b_val); - - // TODO: Check for overflow - const auto hilbert_x = static_cast((x.val - box.a_val) * hilbert_width); - const auto hilbert_y = static_cast((y.val - box.b_val) * hilbert_height); - const auto h = HilbertEncode(16, hilbert_x, hilbert_y); - return UINT32_TYPE {h}; - }); -} - -//------------------------------------------------------------------------------ -// GEOMETRY (points) -//------------------------------------------------------------------------------ -static void HilbertEncodeBoundsFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &input_vec = args.data[0]; - auto &bounds_vec = args.data[1]; - - const auto count = args.size(); - - auto constexpr max_hilbert = std::numeric_limits::max(); - - using BOX_TYPE = StructTypeQuaternary; - using GEOM_TYPE = PrimitiveType; - using UINT32_TYPE = PrimitiveType; - - GenericExecutor::ExecuteBinary( - input_vec, bounds_vec, result, count, [&](const GEOM_TYPE &geom_type, const BOX_TYPE &bounds) { - const auto geom = geom_type.val; - - Box2D geom_bounds; - if (!geom.TryGetCachedBounds(geom_bounds)) { - throw InvalidInputException( - "ST_Hilbert(geom, bounds) requires that all geometries have a bounding box"); - } - - const auto dx = geom_bounds.min.x + (geom_bounds.max.x - geom_bounds.min.x) / 2; - const auto dy = geom_bounds.min.y + (geom_bounds.max.y - geom_bounds.min.y) / 2; - - const auto hilbert_width = max_hilbert / (bounds.c_val - bounds.a_val); - const auto hilbert_height = max_hilbert / (bounds.d_val - bounds.b_val); - // TODO: Check for overflow - const auto hilbert_x = static_cast((dx - bounds.a_val) * hilbert_width); - const auto hilbert_y = static_cast((dy - bounds.b_val) * hilbert_height); - - const auto h = HilbertEncode(16, hilbert_x, hilbert_y); - return UINT32_TYPE {h}; - }); -} - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -static void HilbertEncodeGeometryFunction(DataChunk &args, ExpressionState &state, Vector &result) { - const auto count = args.size(); - auto &input_vec = args.data[0]; - - UnaryExecutor::ExecuteWithNulls( - input_vec, result, count, [&](const geometry_t &geom, ValidityMask &mask, idx_t out_idx) -> uint32_t { - Box2D bounds; - if (!geom.TryGetCachedBounds(bounds)) { - mask.SetInvalid(out_idx); - return 0; - } - - Box2D bounds_f; - bounds_f.min.x = MathUtil::DoubleToFloatDown(bounds.min.x); - bounds_f.min.y = MathUtil::DoubleToFloatDown(bounds.min.y); - bounds_f.max.x = MathUtil::DoubleToFloatUp(bounds.max.x); - bounds_f.max.y = MathUtil::DoubleToFloatUp(bounds.max.y); - - const auto dx = bounds_f.min.x + (bounds_f.max.x - bounds_f.min.x) / 2; - const auto dy = bounds_f.min.y + (bounds_f.max.y - bounds_f.min.y) / 2; - - const auto hx = FloatToUint32(dx); - const auto hy = FloatToUint32(dy); - - return HilbertEncode(16, hx, hy); - }); -} - -//------------------------------------------------------------------------------ -// BOX_2D/BOX_2DF -//------------------------------------------------------------------------------ -template -static void HilbertEncodeBoxFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &input_vec = args.data[0]; - auto &bounds_vec = args.data[1]; - auto count = args.size(); - - constexpr auto max_hilbert = std::numeric_limits::max(); - - using BOX_TYPE = StructTypeQuaternary; - using UINT32_TYPE = PrimitiveType; - - GenericExecutor::ExecuteBinary( - input_vec, bounds_vec, result, count, [&](BOX_TYPE &box, BOX_TYPE &bounds) { - const auto x = box.a_val + (box.c_val - box.a_val) / static_cast(2); - const auto y = box.b_val + (box.d_val - box.b_val) / static_cast(2); - - const auto hilbert_width = max_hilbert / (bounds.c_val - bounds.a_val); - const auto hilbert_height = max_hilbert / (bounds.d_val - bounds.b_val); - - // TODO: Check for overflow - const auto hilbert_x = static_cast((x - bounds.a_val) * hilbert_width); - const auto hilbert_y = static_cast((y - bounds.b_val) * hilbert_height); - const auto h = HilbertEncode(16, hilbert_x, hilbert_y); - return UINT32_TYPE {h}; - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}}; -static constexpr const char *DOC_DESCRIPTION = R"( -Encodes the X and Y values as the hilbert curve index for a curve covering the given bounding box. -If a geometry is provided, the center of the approximate bounding box is used as the point to encode. -If no bounding box is provided, the hilbert curve index is mapped to the full range of a single-presicion float. -For the BOX_2D and BOX_2DF variants, the center of the box is used as the point to encode. -)"; -static constexpr const char *DOC_EXAMPLE = R"( - -)"; -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStHilbert(DatabaseInstance &db) { - ScalarFunctionSet set("ST_Hilbert"); - set.AddFunction(ScalarFunction({LogicalType::DOUBLE, LogicalType::DOUBLE, GeoTypes::BOX_2D()}, - LogicalType::UINTEGER, HilbertEncodeCoordsFunction)); - set.AddFunction( - ScalarFunction({GeoTypes::GEOMETRY(), GeoTypes::BOX_2D()}, LogicalType::UINTEGER, HilbertEncodeBoundsFunction)); - set.AddFunction(ScalarFunction({GeoTypes::BOX_2D(), GeoTypes::BOX_2D()}, LogicalType::UINTEGER, - HilbertEncodeBoxFunction)); - set.AddFunction(ScalarFunction({GeoTypes::BOX_2DF(), GeoTypes::BOX_2DF()}, LogicalType::UINTEGER, - HilbertEncodeBoxFunction)); - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::UINTEGER, HilbertEncodeGeometryFunction)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Hilbert", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/scalar/st_intersects.cpp b/spatial/src/spatial/core/functions/scalar/st_intersects.cpp deleted file mode 100644 index 554c6962..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_intersects.cpp +++ /dev/null @@ -1,34 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/parser/parsed_data/create_macro_info.hpp" -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "duckdb/function/scalar_macro_function.hpp" -#include "duckdb/parser/expression/function_expression.hpp" - -namespace spatial { - -namespace core { - -static void IntersectsBox2DFunction(DataChunk &args, ExpressionState &state, Vector &result) { - using BOX_TYPE = StructTypeQuaternary; - using BOOL_TYPE = PrimitiveType; - - GenericExecutor::ExecuteBinary( - args.data[0], args.data[1], result, args.size(), [&](BOX_TYPE &left, BOX_TYPE &right) { - return !(left.a_val > right.c_val || left.c_val < right.a_val || left.b_val > right.d_val || - left.d_val < right.b_val); - }); -} - -void CoreScalarFunctions::RegisterStIntersects(DatabaseInstance &db) { - ScalarFunction intersects_func("ST_Intersects", {GeoTypes::BOX_2D(), GeoTypes::BOX_2D()}, LogicalType::BOOLEAN, - IntersectsBox2DFunction); - - ExtensionUtil::RegisterFunction(db, intersects_func); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/scalar/st_intersects_extent.cpp b/spatial/src/spatial/core/functions/scalar/st_intersects_extent.cpp deleted file mode 100644 index 0cace53e..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_intersects_extent.cpp +++ /dev/null @@ -1,84 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/geometry/bbox.hpp" -#include "spatial/core/geometry/geometry_type.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/parser/parsed_data/create_macro_info.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" -#include "duckdb/function/scalar_macro_function.hpp" -#include "duckdb/parser/expression/function_expression.hpp" - -namespace spatial { - -namespace core { - -static void IntersectsExtentFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &left = args.data[0]; - auto &right = args.data[1]; - auto count = args.size(); - - BinaryExecutor::Execute( - left, right, result, count, [&](geometry_t left, geometry_t right) { - Box2D left_bbox; - Box2D right_bbox; - if (left.TryGetCachedBounds(left_bbox) && right.TryGetCachedBounds(right_bbox)) { - return left_bbox.Intersects(right_bbox); - } - return false; - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns true if the extent of two geometries intersects -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "relation"}}; - -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStIntersectsExtent(DatabaseInstance &db) { - ScalarFunction intersects_func("ST_Intersects_Extent", {GeoTypes::GEOMETRY(), GeoTypes::GEOMETRY()}, - LogicalType::BOOLEAN, IntersectsExtentFunction); - - ExtensionUtil::RegisterFunction(db, intersects_func); - DocUtil::AddDocumentation(db, "ST_Intersects_Extent", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); - - // So because this is a macro, we cant overload it. crap. - // Provide a "&&" macro - /* - vector> args; - args.push_back(make_uniq("left")); - args.push_back(make_uniq("right")); - - auto func = make_uniq_base("st_intersects_extent", std::move(args)); - auto macro = make_uniq_base(std::move(func)); - - vector> macro_args; - macro_args.push_back(make_uniq("left")); - macro_args.push_back(make_uniq("right")); - macro->parameters = std::move(macro_args); - - CreateMacroInfo macro_info(CatalogType::MACRO_ENTRY); - macro_info.name = "&&"; - macro_info.function = std::move(macro); - macro_info.schema = DEFAULT_SCHEMA; - macro_info.internal = true; - macro_info.parameter_names = {"left", "right"}; - macro_info.on_conflict = OnCreateConflict::ALTER_ON_CONFLICT; - ExtensionUtil::RegisterFunction(db, macro_info); - */ -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/scalar/st_is_closed.cpp b/spatial/src/spatial/core/functions/scalar/st_is_closed.cpp deleted file mode 100644 index 88d24989..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_is_closed.cpp +++ /dev/null @@ -1,66 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace core { - -static void IsClosedFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - - // TODO: We should support more than just LINESTRING and MULTILINESTRING (like PostGIS does) - UnaryExecutor::Execute(args.data[0], result, args.size(), [&](geometry_t input) { - struct op { - static bool Case(Geometry::Tags::LineString, const Geometry &geom) { - return LineString::IsClosed(geom); - } - static bool Case(Geometry::Tags::MultiLineString, const Geometry &geom) { - return MultiLineString::IsClosed(geom); - } - static bool Case(Geometry::Tags::AnyGeometry, const Geometry &) { - throw InvalidInputException("ST_IsClosed only accepts LINESTRING and MULTILINESTRING geometries"); - } - }; - auto geom = Geometry::Deserialize(arena, input); - return Geometry::Match(geom); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns true if a geometry is "closed" -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}}; - -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStIsClosed(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_IsClosed"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::BOOLEAN, IsClosedFunction, nullptr, nullptr, - nullptr, GeometryFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_IsClosed", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/scalar/st_isempty.cpp b/spatial/src/spatial/core/functions/scalar/st_isempty.cpp deleted file mode 100644 index 011e3d0b..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_isempty.cpp +++ /dev/null @@ -1,98 +0,0 @@ -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/types.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// LineString2D -//------------------------------------------------------------------------------ -static void LineIsEmptyFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - - auto &line_vec = args.data[0]; - auto count = args.size(); - - UnaryExecutor::Execute(line_vec, result, count, - [&](list_entry_t line) { return line.length == 0; }); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// POLYGON_2D -//------------------------------------------------------------------------------ -static void PolygonIsEmptyFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - - auto &polygon_vec = args.data[0]; - auto count = args.size(); - - UnaryExecutor::Execute(polygon_vec, result, count, - [&](list_entry_t poly) { return poly.length == 0; }); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -static void GeometryIsEmptyFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - - auto &input = args.data[0]; - auto count = args.size(); - - UnaryExecutor::Execute(input, result, count, [&](geometry_t input) { - auto geom = Geometry::Deserialize(lstate.arena, input); - return Geometry::IsEmpty(geom); - }); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns true if the geometry is "empty" -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}}; -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStIsEmpty(DatabaseInstance &db) { - - ScalarFunctionSet is_empty_function_set("ST_IsEmpty"); - - is_empty_function_set.AddFunction( - ScalarFunction({GeoTypes::LINESTRING_2D()}, LogicalType::BOOLEAN, LineIsEmptyFunction)); - is_empty_function_set.AddFunction( - ScalarFunction({GeoTypes::POLYGON_2D()}, LogicalType::BOOLEAN, PolygonIsEmptyFunction)); - is_empty_function_set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::BOOLEAN, - GeometryIsEmptyFunction, nullptr, nullptr, nullptr, - GeometryFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, is_empty_function_set); - DocUtil::AddDocumentation(db, "ST_IsEmpty", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/scalar/st_length.cpp b/spatial/src/spatial/core/functions/scalar/st_length.cpp deleted file mode 100644 index 52d094f7..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_length.cpp +++ /dev/null @@ -1,99 +0,0 @@ -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/types.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// LineString2D -//------------------------------------------------------------------------------ -static void LineLengthFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - - auto &line_vec = args.data[0]; - auto count = args.size(); - - auto &coord_vec = ListVector::GetEntry(line_vec); - auto &coord_vec_children = StructVector::GetEntries(coord_vec); - auto x_data = FlatVector::GetData(*coord_vec_children[0]); - auto y_data = FlatVector::GetData(*coord_vec_children[1]); - - UnaryExecutor::Execute(line_vec, result, count, [&](list_entry_t line) { - auto offset = line.offset; - auto length = line.length; - double sum = 0; - // Loop over the segments - for (idx_t j = offset; j < offset + length - 1; j++) { - auto x1 = x_data[j]; - auto y1 = y_data[j]; - auto x2 = x_data[j + 1]; - auto y2 = y_data[j + 1]; - sum += std::sqrt(std::pow(x1 - x2, 2) + std::pow(y1 - y2, 2)); - } - return sum; - }); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -static void GeometryLengthFunction(DataChunk &args, ExpressionState &state, Vector &result) { - - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - - auto &input = args.data[0]; - auto count = args.size(); - - UnaryExecutor::Execute(input, result, count, [&](geometry_t input) { - auto geom = Geometry::Deserialize(arena, input); - double length = 0.0; - Geometry::ExtractLines(geom, [&](const Geometry &line) { length += LineString::Length(line); }); - return length; - }); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns the length of the input line geometry -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}}; -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStLength(DatabaseInstance &db) { - - ScalarFunctionSet length_function_set("ST_Length"); - - length_function_set.AddFunction( - ScalarFunction({GeoTypes::LINESTRING_2D()}, LogicalType::DOUBLE, LineLengthFunction)); - length_function_set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::DOUBLE, GeometryLengthFunction, - nullptr, nullptr, nullptr, GeometryFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, length_function_set); - DocUtil::AddDocumentation(db, "ST_Length", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/scalar/st_makeenvelope.cpp b/spatial/src/spatial/core/functions/scalar/st_makeenvelope.cpp deleted file mode 100644 index 48e9c04e..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_makeenvelope.cpp +++ /dev/null @@ -1,69 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/function_builder.hpp" - -#include "duckdb/common/vector_operations/generic_executor.hpp" - -namespace spatial { - -namespace core { - -static void MakeEnvelopeFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto count = args.size(); - - auto &min_x_vec = args.data[0]; - auto &min_y_vec = args.data[1]; - auto &max_x_vec = args.data[2]; - auto &max_y_vec = args.data[3]; - - using DOUBLE_TYPE = PrimitiveType; - using GEOMETRY_TYPE = PrimitiveType; - - GenericExecutor::ExecuteQuaternary( - min_x_vec, min_y_vec, max_x_vec, max_y_vec, result, count, - [&](DOUBLE_TYPE x_min, DOUBLE_TYPE y_min, DOUBLE_TYPE x_max, DOUBLE_TYPE y_max) { - auto box = Polygon::CreateFromBox(lstate.arena, x_min.val, y_min.val, x_max.val, y_max.val); - return Geometry::Serialize(box, result); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns a minimal bounding box polygon enclosing the input geometry -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStMakeEnvelope(DatabaseInstance &db) { - FunctionBuilder::RegisterScalar(db, "ST_MakeEnvelope", [](ScalarFunctionBuilder &func) { - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("min_x", LogicalType::DOUBLE); - variant.AddParameter("min_y", LogicalType::DOUBLE); - variant.AddParameter("max_x", LogicalType::DOUBLE); - variant.AddParameter("max_y", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::GEOMETRY()); - variant.SetFunction(MakeEnvelopeFunction); - variant.SetInit(GeometryFunctionLocalState::Init); - - variant.SetExample(DOC_EXAMPLE); - variant.SetDescription(DOC_DESCRIPTION); - }); - - func.SetTag("ext", "spatial"); - func.SetTag("category", "construction"); - }); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/scalar/st_makeline.cpp b/spatial/src/spatial/core/functions/scalar/st_makeline.cpp deleted file mode 100644 index 76e45305..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_makeline.cpp +++ /dev/null @@ -1,136 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace core { - -static void MakeLineListFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - - auto count = args.size(); - auto &child_vec = ListVector::GetEntry(args.data[0]); - UnifiedVectorFormat format; - child_vec.ToUnifiedFormat(count, format); - - UnaryExecutor::Execute(args.data[0], result, count, [&](list_entry_t &geometry_list) { - auto offset = geometry_list.offset; - auto length = geometry_list.length; - - auto line = LineString::Create(arena, length, false, false); - - uint32_t vertex_idx = 0; - for (idx_t i = offset; i < offset + length; i++) { - - auto mapped_idx = format.sel->get_index(i); - if (!format.validity.RowIsValid(mapped_idx)) { - continue; - } - auto geometry_blob = ((geometry_t *)format.data)[mapped_idx]; - - if (geometry_blob.GetType() != GeometryType::POINT) { - throw InvalidInputException("ST_MakeLine only accepts POINT geometries"); - } - - // TODO: Support Z and M - if (geometry_blob.GetProperties().HasZ() || geometry_blob.GetProperties().HasM()) { - throw InvalidInputException("ST_MakeLine from list does not support Z or M geometries"); - } - auto point = Geometry::Deserialize(arena, geometry_blob); - if (Point::IsEmpty(point)) { - continue; - } - LineString::SetVertex(line, vertex_idx++, Point::GetVertex(point)); - } - - // Shrink the vertex array to the actual size - LineString::Resize(line, arena, vertex_idx); - - if (line.Count() == 1) { - throw InvalidInputException("ST_MakeLine requires zero or two or more POINT geometries"); - } - - return Geometry::Serialize(line, result); - }); -} - -static void MakeLineBinaryFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - - auto count = args.size(); - - BinaryExecutor::Execute( - args.data[0], args.data[1], result, count, [&](geometry_t &geom_blob_left, geometry_t &geom_blob_right) { - if (geom_blob_left.GetType() != GeometryType::POINT || geom_blob_right.GetType() != GeometryType::POINT) { - throw InvalidInputException("ST_MakeLine only accepts POINT geometries"); - } - - auto geometry_left = Geometry::Deserialize(arena, geom_blob_left); - auto geometry_right = Geometry::Deserialize(arena, geom_blob_right); - - if (Point::IsEmpty(geometry_left) && Point::IsEmpty(geometry_right)) { - // Empty linestring - auto empty = LineString::CreateEmpty(false, false); - return Geometry::Serialize(empty, result); - } - - if (Point::IsEmpty(geometry_left) || Point::IsEmpty(geometry_right)) { - throw InvalidInputException("ST_MakeLine requires zero or two or more POINT geometries"); - } - - auto has_z = geom_blob_left.GetProperties().HasZ() || geom_blob_right.GetProperties().HasZ(); - auto has_m = geom_blob_left.GetProperties().HasM() || geom_blob_right.GetProperties().HasM(); - - // TODO: Dont upcast the child geometries, just append and let the append function handle upcasting of the - // target instead. - geometry_left.SetVertexType(arena, has_z, has_m); - geometry_right.SetVertexType(arena, has_z, has_m); - - auto line_geom = LineString::CreateEmpty(has_z, has_m); - LineString::Append(line_geom, arena, geometry_left); - LineString::Append(line_geom, arena, geometry_right); - return Geometry::Serialize(line_geom, result); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( -Creates a LINESTRING geometry from a pair or list of input points -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStMakeLine(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_MakeLine"); - - set.AddFunction(ScalarFunction({LogicalType::LIST(GeoTypes::GEOMETRY())}, GeoTypes::GEOMETRY(), - MakeLineListFunction, nullptr, nullptr, nullptr, GeometryFunctionLocalState::Init)); - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY(), GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), - MakeLineBinaryFunction, nullptr, nullptr, nullptr, - GeometryFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_MakeLine", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/scalar/st_makepolygon.cpp b/spatial/src/spatial/core/functions/scalar/st_makepolygon.cpp deleted file mode 100644 index 2300d60b..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_makepolygon.cpp +++ /dev/null @@ -1,152 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace core { - -static void MakePolygonFromRingsFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - auto count = args.size(); - - auto &child_vec = ListVector::GetEntry(args.data[1]); - UnifiedVectorFormat format; - child_vec.ToUnifiedFormat(count, format); - - BinaryExecutor::Execute( - args.data[0], args.data[1], result, count, [&](geometry_t line_blob, list_entry_t &rings_list) { - // First, setup the shell - if (line_blob.GetType() != GeometryType::LINESTRING) { - throw InvalidInputException("ST_MakePolygon only accepts LINESTRING geometries"); - } - - // TODO: Support Z and M - if (line_blob.GetProperties().HasM() || line_blob.GetProperties().HasZ()) { - throw InvalidInputException("ST_MakePolygon does not support Z or M geometries"); - } - - auto shell = Geometry::Deserialize(arena, line_blob); - if (LineString::VertexCount(shell) < 4) { - throw InvalidInputException("ST_MakePolygon shell requires at least 4 vertices"); - } - - if (!LineString::IsClosed(shell)) { - throw InvalidInputException( - "ST_MakePolygon shell must be closed (first and last vertex must be equal)"); - } - - // Validate and count the hole ring sizes - auto holes_offset = rings_list.offset; - auto holes_length = rings_list.length; - - vector rings; - rings.push_back(shell); - - for (idx_t hole_idx = 0; hole_idx < holes_length; hole_idx++) { - auto mapped_idx = format.sel->get_index(holes_offset + hole_idx); - if (!format.validity.RowIsValid(mapped_idx)) { - continue; - } - - auto geometry_blob = UnifiedVectorFormat::GetData(format)[mapped_idx]; - - // TODO: Support Z and M - if (geometry_blob.GetProperties().HasZ() || geometry_blob.GetProperties().HasM()) { - throw InvalidInputException("ST_MakePolygon does not support Z or M geometries"); - } - if (geometry_blob.GetType() != GeometryType::LINESTRING) { - throw InvalidInputException( - StringUtil::Format("ST_MakePolygon hole #%lu is not a LINESTRING geometry", hole_idx + 1)); - } - auto hole = Geometry::Deserialize(arena, geometry_blob); - if (LineString::VertexCount(hole) < 4) { - throw InvalidInputException( - StringUtil::Format("ST_MakePolygon hole #%lu requires at least 4 vertices", hole_idx + 1)); - } - - if (!LineString::IsClosed(hole)) { - throw InvalidInputException(StringUtil::Format( - "ST_MakePolygon hole #%lu must be closed (first and last vertex must be equal)", hole_idx + 1)); - } - - rings.push_back(hole); - } - // TODO: Add constructor that takes a vector of rings - auto polygon = Polygon::Create(arena, rings.size(), false, false); - for (idx_t ring_idx = 0; ring_idx < rings.size(); ring_idx++) { - Polygon::Part(polygon, ring_idx) = std::move(rings[ring_idx]); - } - return Geometry::Serialize(polygon, result); - }); -} - -static void MakePolygonFromShellFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - - auto count = args.size(); - - UnaryExecutor::Execute(args.data[0], result, count, [&](geometry_t &line_blob) { - if (line_blob.GetType() != GeometryType::LINESTRING) { - throw InvalidInputException("ST_MakePolygon only accepts LINESTRING geometries"); - } - - auto line = Geometry::Deserialize(arena, line_blob); - - if (LineString::VertexCount(line) < 4) { - throw InvalidInputException("ST_MakePolygon shell requires at least 4 vertices"); - } - - if (!LineString::IsClosed(line)) { - throw InvalidInputException("ST_MakePolygon shell must be closed (first and last vertex must be equal)"); - } - - auto props = line_blob.GetProperties(); - - auto polygon = Polygon::Create(arena, 1, props.HasZ(), props.HasM()); - Polygon::Part(polygon, 0) = std::move(line); - return Geometry::Serialize(polygon, result); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Creates a polygon from a shell geometry and an optional set of holes -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStMakePolygon(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_MakePolygon"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY(), LogicalType::LIST(GeoTypes::GEOMETRY())}, - GeoTypes::GEOMETRY(), MakePolygonFromRingsFunction, nullptr, nullptr, nullptr, - GeometryFunctionLocalState::Init)); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), MakePolygonFromShellFunction, nullptr, - nullptr, nullptr, GeometryFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_MakePolygon", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/scalar/st_multi.cpp b/spatial/src/spatial/core/functions/scalar/st_multi.cpp deleted file mode 100644 index e411d728..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_multi.cpp +++ /dev/null @@ -1,82 +0,0 @@ -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/geometry/geometry_processor.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ - -static void GeometryMultiFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - auto &input = args.data[0]; - - UnaryExecutor::Execute(input, result, args.size(), [&](const geometry_t &geom_blob) { - const bool has_z = geom_blob.GetProperties().HasZ(); - const bool has_m = geom_blob.GetProperties().HasM(); - - switch (geom_blob.GetType()) { - case GeometryType::POINT: { - auto mpoint = MultiPoint::Create(arena, 1, has_z, has_m); - MultiPoint::Part(mpoint, 0) = Geometry::Deserialize(arena, geom_blob); - return Geometry::Serialize(mpoint, result); - } - case GeometryType::LINESTRING: { - auto mline = MultiLineString::Create(arena, 1, has_z, has_m); - MultiLineString::Part(mline, 0) = Geometry::Deserialize(arena, geom_blob); - return Geometry::Serialize(mline, result); - } - case GeometryType::POLYGON: { - auto mpoly = MultiPolygon::Create(arena, 1, has_z, has_m); - MultiPolygon::Part(mpoly, 0) = Geometry::Deserialize(arena, geom_blob); - return Geometry::Serialize(mpoly, result); - } - default: - return geom_blob; - } - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Turns a single geometry into a multi geometry. - - If the geometry is already a multi geometry, it is returned as is. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( -SELECT ST_Multi(ST_GeomFromText('POINT(1 2)')); --- MULTIPOINT (1 2) - -SELECT ST_Multi(ST_GeomFromText('LINESTRING(1 1, 2 2)')); --- MULTILINESTRING ((1 1, 2 2)) - -SELECT ST_Multi(ST_GeomFromText('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')); --- MULTIPOLYGON (((0 0, 0 1, 1 1, 1 0, 0 0))) -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStMulti(DatabaseInstance &db) { - ScalarFunction function("ST_Multi", {GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), GeometryMultiFunction); - function.init_local_state = GeometryFunctionLocalState::Init; - ExtensionUtil::RegisterFunction(db, function); - DocUtil::AddDocumentation(db, "ST_Multi", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/scalar/st_ngeometries.cpp b/spatial/src/spatial/core/functions/scalar/st_ngeometries.cpp deleted file mode 100644 index 6aac4e3d..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_ngeometries.cpp +++ /dev/null @@ -1,71 +0,0 @@ -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/types.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -static void GeometryNGeometriesFunction(DataChunk &args, ExpressionState &state, Vector &result) { - - auto &ctx = GeometryFunctionLocalState::ResetAndGet(state); - - auto &input = args.data[0]; - auto count = args.size(); - - UnaryExecutor::Execute(input, result, count, [&](geometry_t input) { - struct op { - static int32_t Case(Geometry::Tags::CollectionGeometry, const Geometry &collection) { - return static_cast(CollectionGeometry::PartCount(collection)); - } - static int32_t Case(Geometry::Tags::Polygon, const Geometry &geom) { - return Polygon::IsEmpty(geom) ? 0 : 1; - } - static int32_t Case(Geometry::Tags::SinglePartGeometry, const Geometry &geom) { - return SinglePartGeometry::IsEmpty(geom) ? 0 : 1; - } - }; - auto geom = Geometry::Deserialize(ctx.arena, input); - return Geometry::Match(geom); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns the number of component geometries in a collection geometry. - If the input geometry is not a collection, this function returns 0 or 1 depending on if the geometry is empty or not. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}}; -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStNGeometries(DatabaseInstance &db) { - - const char *aliases[] = {"ST_NGeometries", "ST_NumGeometries"}; - for (auto alias : aliases) { - ScalarFunctionSet set(alias); - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::INTEGER, GeometryNGeometriesFunction, - nullptr, nullptr, nullptr, GeometryFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, alias, DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); - } -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/scalar/st_ninteriorrings.cpp b/spatial/src/spatial/core/functions/scalar/st_ninteriorrings.cpp deleted file mode 100644 index b3a312dc..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_ninteriorrings.cpp +++ /dev/null @@ -1,80 +0,0 @@ -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/types.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// POLYGON_2D -//------------------------------------------------------------------------------ -static void PolygonInteriorRingsFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - - auto &input = args.data[0]; - auto count = args.size(); - - UnaryExecutor::Execute(input, result, count, [&](list_entry_t polygon) { - auto rings = polygon.length; - return rings == 0 ? rings : static_cast(polygon.length) - 1; // -1 for the exterior ring - }); -} - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -static void GeometryInteriorRingsFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - auto &input = args.data[0]; - auto count = args.size(); - - UnaryExecutor::ExecuteWithNulls( - input, result, count, [&](geometry_t input, ValidityMask &validity, idx_t idx) { - if (input.GetType() != GeometryType::POLYGON) { - validity.SetInvalid(idx); - return 0; - } - auto polygon = Geometry::Deserialize(arena, input); - auto rings = Polygon::PartCount(polygon); - return rings == 0 ? 0 : static_cast(rings - 1); // -1 for the exterior ring - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns the number if interior rings of a polygon -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}}; -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStNInteriorRings(DatabaseInstance &db) { - const char *aliases[] = {"ST_NumInteriorRings", "ST_NInteriorRings"}; - for (auto alias : aliases) { - ScalarFunctionSet set(alias); - set.AddFunction(ScalarFunction({GeoTypes::POLYGON_2D()}, LogicalType::INTEGER, PolygonInteriorRingsFunction)); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::INTEGER, GeometryInteriorRingsFunction, - nullptr, nullptr, nullptr, GeometryFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, alias, DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); - } -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/scalar/st_npoints.cpp b/spatial/src/spatial/core/functions/scalar/st_npoints.cpp deleted file mode 100644 index 0c4d7572..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_npoints.cpp +++ /dev/null @@ -1,136 +0,0 @@ -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/types.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// POINT_2D -//------------------------------------------------------------------------------ -static void PointNumPointsFunction(DataChunk &args, ExpressionState &state, Vector &result) { - using POINT_TYPE = StructTypeBinary; - using COUNT_TYPE = PrimitiveType; - - GenericExecutor::ExecuteUnary(args.data[0], result, args.size(), - [](POINT_TYPE) { return 1; }); -} - -//------------------------------------------------------------------------------ -// LINESTRING_2D -//------------------------------------------------------------------------------ -static void LineStringNumPointsFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto input = args.data[0]; - UnaryExecutor::Execute(input, result, args.size(), - [](list_entry_t input) { return input.length; }); -} - -//------------------------------------------------------------------------------ -// POLYGON_2D -//------------------------------------------------------------------------------ -static void PolygonNumPointsFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - - auto &input = args.data[0]; - auto count = args.size(); - auto &ring_vec = ListVector::GetEntry(input); - auto ring_entries = ListVector::GetData(ring_vec); - - UnaryExecutor::Execute(input, result, count, [&](list_entry_t polygon) { - auto polygon_offset = polygon.offset; - auto polygon_length = polygon.length; - idx_t npoints = 0; - for (idx_t ring_idx = polygon_offset; ring_idx < polygon_offset + polygon_length; ring_idx++) { - auto ring = ring_entries[ring_idx]; - npoints += ring.length; - } - return npoints; - }); -} - -//------------------------------------------------------------------------------ -// BOX_2D -//------------------------------------------------------------------------------ -static void BoxNumPointsFunction(DataChunk &args, ExpressionState &state, Vector &result) { - - using BOX_TYPE = StructTypeQuaternary; - using COUNT_TYPE = PrimitiveType; - - GenericExecutor::ExecuteUnary(args.data[0], result, args.size(), [](BOX_TYPE) { return 4; }); -} - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ - -static void GeometryNumPointsFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - - auto &input = args.data[0]; - auto count = args.size(); - - struct op { - static uint32_t Case(Geometry::Tags::SinglePartGeometry, const Geometry &geom) { - return geom.Count(); - } - static uint32_t Case(Geometry::Tags::MultiPartGeometry, const Geometry &geom) { - uint32_t count = 0; - for (uint32_t i = 0; i < MultiPartGeometry::PartCount(geom); i++) { - auto part = MultiPartGeometry::Part(geom, i); - count += Geometry::Match(part); - } - return count; - } - }; - - UnaryExecutor::Execute(input, result, count, [&](geometry_t input) { - auto geom = Geometry::Deserialize(arena, input); - return Geometry::Match(geom); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns the number of vertices within a geometry -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}}; - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStNPoints(DatabaseInstance &db) { - const char *aliases[] = {"ST_NPoints", "ST_NumPoints"}; - for (auto alias : aliases) { - ScalarFunctionSet area_function_set(alias); - area_function_set.AddFunction( - ScalarFunction({GeoTypes::POINT_2D()}, LogicalType::UBIGINT, PointNumPointsFunction)); - area_function_set.AddFunction( - ScalarFunction({GeoTypes::LINESTRING_2D()}, LogicalType::UBIGINT, LineStringNumPointsFunction)); - area_function_set.AddFunction( - ScalarFunction({GeoTypes::POLYGON_2D()}, LogicalType::UBIGINT, PolygonNumPointsFunction)); - area_function_set.AddFunction(ScalarFunction({GeoTypes::BOX_2D()}, LogicalType::UBIGINT, BoxNumPointsFunction)); - area_function_set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::UINTEGER, - GeometryNumPointsFunction, nullptr, nullptr, nullptr, - GeometryFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, area_function_set); - DocUtil::AddDocumentation(db, alias, DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); - } -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/scalar/st_perimeter.cpp b/spatial/src/spatial/core/functions/scalar/st_perimeter.cpp deleted file mode 100644 index cdd30c4f..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_perimeter.cpp +++ /dev/null @@ -1,128 +0,0 @@ -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" - -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/functions/scalar.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// POLYGON_2D -//------------------------------------------------------------------------------ -static void Polygon2DPerimeterFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - - auto &input = args.data[0]; - auto count = args.size(); - - auto &ring_vec = ListVector::GetEntry(input); - auto ring_entries = ListVector::GetData(ring_vec); - auto &coord_vec = ListVector::GetEntry(ring_vec); - auto &coord_vec_children = StructVector::GetEntries(coord_vec); - auto x_data = FlatVector::GetData(*coord_vec_children[0]); - auto y_data = FlatVector::GetData(*coord_vec_children[1]); - - UnaryExecutor::Execute(input, result, count, [&](list_entry_t polygon) { - auto polygon_offset = polygon.offset; - auto polygon_length = polygon.length; - double perimeter = 0; - for (idx_t ring_idx = polygon_offset; ring_idx < polygon_offset + polygon_length; ring_idx++) { - auto ring = ring_entries[ring_idx]; - auto ring_offset = ring.offset; - auto ring_length = ring.length; - - for (idx_t coord_idx = ring_offset; coord_idx < ring_offset + ring_length - 1; coord_idx++) { - auto x1 = x_data[coord_idx]; - auto y1 = y_data[coord_idx]; - auto x2 = x_data[coord_idx + 1]; - auto y2 = y_data[coord_idx + 1]; - perimeter += std::sqrt(std::pow(x1 - x2, 2) + std::pow(y1 - y2, 2)); - } - } - return perimeter; - }); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// BOX_2D -//------------------------------------------------------------------------------ -static void Box2DPerimeterFunction(DataChunk &args, ExpressionState &state, Vector &result) { - using BOX_TYPE = StructTypeQuaternary; - using PERIMETER_TYPE = PrimitiveType; - - GenericExecutor::ExecuteUnary(args.data[0], result, args.size(), [&](BOX_TYPE &box) { - auto minx = box.a_val; - auto miny = box.b_val; - auto maxx = box.c_val; - auto maxy = box.d_val; - return 2 * (maxx - minx + maxy - miny); - }); -} - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -static void GeometryPerimeterFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = core::GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - - auto &input = args.data[0]; - auto count = args.size(); - - UnaryExecutor::Execute(input, result, count, [&](geometry_t input) { - auto geom = Geometry::Deserialize(arena, input); - double perimeter = 0.0; - Geometry::ExtractPolygons(geom, [&](const Geometry &poly) { - for (auto &p : Polygon::Parts(poly)) { - perimeter += LineString::Length(p); - } - }); - return perimeter; - }); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ - -static constexpr const char *DOC_DESCRIPTION = R"( - Returns the length of the perimeter of the geometry -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}}; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStPerimeter(DatabaseInstance &db) { - - // Perimiter - ScalarFunctionSet set("ST_Perimeter"); - set.AddFunction(ScalarFunction({GeoTypes::BOX_2D()}, LogicalType::DOUBLE, Box2DPerimeterFunction)); - set.AddFunction(ScalarFunction({GeoTypes::POLYGON_2D()}, LogicalType::DOUBLE, Polygon2DPerimeterFunction)); - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::DOUBLE, GeometryPerimeterFunction, nullptr, - nullptr, nullptr, GeometryFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Perimeter", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/scalar/st_point.cpp b/spatial/src/spatial/core/functions/scalar/st_point.cpp deleted file mode 100644 index 10d00a3d..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_point.cpp +++ /dev/null @@ -1,183 +0,0 @@ -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/function_builder.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// POINT_2D -//------------------------------------------------------------------------------ -static void Point2DFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 2); - auto count = args.size(); - - auto &x = args.data[0]; - auto &y = args.data[1]; - - x.Flatten(count); - y.Flatten(count); - - auto &children = StructVector::GetEntries(result); - auto &x_child = children[0]; - auto &y_child = children[1]; - - x_child->Reference(x); - y_child->Reference(y); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// POINT_3D -//------------------------------------------------------------------------------ -static void Point3DFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 3); - auto count = args.size(); - - auto &x = args.data[0]; - auto &y = args.data[1]; - auto &z = args.data[2]; - - x.Flatten(count); - y.Flatten(count); - z.Flatten(count); - - auto &children = StructVector::GetEntries(result); - auto &x_child = children[0]; - auto &y_child = children[1]; - auto &z_child = children[2]; - - x_child->Reference(x); - y_child->Reference(y); - z_child->Reference(z); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// POINT_4D -//------------------------------------------------------------------------------ -static void Point4DFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 4); - auto count = args.size(); - - auto &x = args.data[0]; - auto &y = args.data[1]; - auto &z = args.data[2]; - auto &m = args.data[3]; - - x.Flatten(count); - y.Flatten(count); - z.Flatten(count); - m.Flatten(count); - - auto &children = StructVector::GetEntries(result); - auto &x_child = children[0]; - auto &y_child = children[1]; - auto &z_child = children[2]; - auto &m_child = children[3]; - - x_child->Reference(x); - y_child->Reference(y); - z_child->Reference(z); - m_child->Reference(m); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -static void PointFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - auto &x = args.data[0]; - auto &y = args.data[1]; - auto count = args.size(); - - BinaryExecutor::Execute(x, y, result, count, [&](double x, double y) { - return Geometry::Serialize(Point::CreateFromVertex(arena, VertexXY {x, y}), result); - }); -} - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStPoint(DatabaseInstance &db) { - - FunctionBuilder::RegisterScalar(db, "ST_Point", [](ScalarFunctionBuilder &func) { - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("x", LogicalType::DOUBLE); - variant.AddParameter("y", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::GEOMETRY()); - variant.SetFunction(PointFunction); - variant.SetInit(GeometryFunctionLocalState::Init); - - variant.SetDescription("Creates a GEOMETRY point"); - }); - - func.SetTag("ext", "spatial"); - func.SetTag("category", "construction"); - }); - - FunctionBuilder::RegisterScalar(db, "ST_Point2D", [](ScalarFunctionBuilder &func) { - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("x", LogicalType::DOUBLE); - variant.AddParameter("y", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::POINT_2D()); - variant.SetFunction(Point2DFunction); - - variant.SetDescription("Creates a POINT_2D"); - }); - - func.SetTag("ext", "spatial"); - func.SetTag("category", "construction"); - }); - - FunctionBuilder::RegisterScalar(db, "ST_Point3D", [](ScalarFunctionBuilder &func) { - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("x", LogicalType::DOUBLE); - variant.AddParameter("y", LogicalType::DOUBLE); - variant.AddParameter("z", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::POINT_3D()); - variant.SetFunction(Point3DFunction); - - variant.SetDescription("Creates a POINT_3D"); - }); - - func.SetTag("ext", "spatial"); - func.SetTag("category", "construction"); - }); - - FunctionBuilder::RegisterScalar(db, "ST_Point4D", [](ScalarFunctionBuilder &func) { - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("x", LogicalType::DOUBLE); - variant.AddParameter("y", LogicalType::DOUBLE); - variant.AddParameter("z", LogicalType::DOUBLE); - variant.AddParameter("m", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::POINT_4D()); - variant.SetFunction(Point4DFunction); - - variant.SetDescription("Creates a POINT_4D"); - }); - - func.SetTag("ext", "spatial"); - func.SetTag("category", "construction"); - }); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/scalar/st_pointn.cpp b/spatial/src/spatial/core/functions/scalar/st_pointn.cpp deleted file mode 100644 index e4d33d57..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_pointn.cpp +++ /dev/null @@ -1,127 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// LINESTRING_2D -//------------------------------------------------------------------------------ -static void LineStringPointNFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto geom_vec = args.data[0]; - auto index_vec = args.data[1]; - auto count = args.size(); - UnifiedVectorFormat geom_format; - geom_vec.ToUnifiedFormat(count, geom_format); - UnifiedVectorFormat index_format; - index_vec.ToUnifiedFormat(count, index_format); - - auto line_vertex_entries = ListVector::GetData(geom_vec); - auto &line_vertex_vec = ListVector::GetEntry(geom_vec); - auto &line_vertex_vec_children = StructVector::GetEntries(line_vertex_vec); - auto line_x_data = FlatVector::GetData(*line_vertex_vec_children[0]); - auto line_y_data = FlatVector::GetData(*line_vertex_vec_children[1]); - - auto &point_vertex_children = StructVector::GetEntries(result); - auto point_x_data = FlatVector::GetData(*point_vertex_children[0]); - auto point_y_data = FlatVector::GetData(*point_vertex_children[1]); - - auto index_data = FlatVector::GetData(index_vec); - - for (idx_t out_row_idx = 0; out_row_idx < count; out_row_idx++) { - - auto in_row_idx = geom_format.sel->get_index(out_row_idx); - auto in_idx_idx = index_format.sel->get_index(out_row_idx); - if (geom_format.validity.RowIsValid(in_row_idx) && index_format.validity.RowIsValid(in_idx_idx)) { - auto line = line_vertex_entries[in_row_idx]; - auto line_offset = line.offset; - auto line_length = line.length; - auto index = index_data[in_idx_idx]; - - if (line_length == 0 || index == 0 || index < -static_cast(line_length) || - index > static_cast(line_length)) { - FlatVector::SetNull(result, out_row_idx, true); - continue; - } - auto actual_index = index < 0 ? line_length + index : index - 1; - point_x_data[out_row_idx] = line_x_data[line_offset + actual_index]; - point_y_data[out_row_idx] = line_y_data[line_offset + actual_index]; - } else { - FlatVector::SetNull(result, out_row_idx, true); - } - } - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -static void GeometryPointNFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - auto &geom_vec = args.data[0]; - auto &index_vec = args.data[1]; - - auto count = args.size(); - - BinaryExecutor::ExecuteWithNulls( - geom_vec, index_vec, result, count, [&](geometry_t input, int32_t index, ValidityMask &mask, idx_t row_idx) { - if (input.GetType() != GeometryType::LINESTRING) { - mask.SetInvalid(row_idx); - return geometry_t {}; - } - auto line = Geometry::Deserialize(arena, input); - auto point_count = LineString::VertexCount(line); - - if (point_count == 0 || index == 0 || index < -static_cast(point_count) || - index > static_cast(point_count)) { - mask.SetInvalid(row_idx); - return geometry_t {}; - } - - auto actual_index = index < 0 ? point_count + index : index - 1; - auto point = LineString::GetPointAsReference(line, actual_index); - return Geometry::Serialize(point, result); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns the n'th vertex from the input geometry as a point geometry -)"; - -static constexpr const char *DOC_EXAMPLE = R"()"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStPointN(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_PointN"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY(), LogicalType::INTEGER}, GeoTypes::GEOMETRY(), - GeometryPointNFunction, nullptr, nullptr, nullptr, - GeometryFunctionLocalState::Init)); - - set.AddFunction(ScalarFunction({GeoTypes::LINESTRING_2D(), LogicalType::INTEGER}, GeoTypes::POINT_2D(), - LineStringPointNFunction)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_PointN", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/scalar/st_points.cpp b/spatial/src/spatial/core/functions/scalar/st_points.cpp deleted file mode 100644 index 7d654ae6..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_points.cpp +++ /dev/null @@ -1,110 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -static void GeometryPointsFunction(DataChunk &args, ExpressionState &state, Vector &result) { - - // Collect all vertex data into a buffer - struct op { - static void Case(Geometry::Tags::SinglePartGeometry, const Geometry &geom, vector &buffer) { - const auto vertex_count = SinglePartGeometry::VertexCount(geom); - const auto vertex_size = SinglePartGeometry::VertexSize(geom); - - // Reserve size for the pointers to the vertices - buffer.reserve(buffer.size() + vertex_count); - - const auto vertex_ptr = geom.GetData(); - - for (uint32_t i = 0; i < vertex_count; i++) { - buffer.push_back(vertex_ptr + i * vertex_size); - } - } - static void Case(Geometry::Tags::MultiPartGeometry, const Geometry &geom, vector &buffer) { - for (auto &part : MultiPartGeometry::Parts(geom)) { - Geometry::Match(part, buffer); - } - } - }; - - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - auto &geom_vec = args.data[0]; - const auto count = args.size(); - - vector vertex_ptr_buffer; - - UnaryExecutor::Execute(geom_vec, result, count, [&](geometry_t input) { - const auto geom = Geometry::Deserialize(arena, input); - const auto has_z = geom.GetProperties().HasZ(); - const auto has_m = geom.GetProperties().HasM(); - - // Reset the vertex pointer buffer - vertex_ptr_buffer.clear(); - - // Collect the vertex pointers - Geometry::Match(geom, vertex_ptr_buffer); - - if (vertex_ptr_buffer.empty()) { - const auto mpoint = MultiPoint::CreateEmpty(has_z, has_m); - return Geometry::Serialize(mpoint, result); - } - - auto mpoint = MultiPoint::Create(arena, vertex_ptr_buffer.size(), has_z, has_m); - for (size_t i = 0; i < vertex_ptr_buffer.size(); i++) { - // Get the nth point - auto &point = MultiPoint::Part(mpoint, i); - // Set the point to reference the data pointer to the current vertex - Point::ReferenceData(point, vertex_ptr_buffer[i], 1, has_z, has_m); - } - return Geometry::Serialize(mpoint, result); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Collects all the vertices in the geometry into a multipoint -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - select st_points('LINESTRING(1 1, 2 2)'::geometry); - ---- - MULTIPOINT (1 1, 2 2) - - select st_points('MULTIPOLYGON Z EMPTY'::geometry); - ---- - MULTIPOINT Z EMPTY -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStPoints(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_Points"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), GeometryPointsFunction, nullptr, - nullptr, nullptr, GeometryFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Points", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/scalar/st_quadkey.cpp b/spatial/src/spatial/core/functions/scalar/st_quadkey.cpp deleted file mode 100644 index cd14b85b..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_quadkey.cpp +++ /dev/null @@ -1,145 +0,0 @@ -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "duckdb/common/constants.hpp" -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/function_builder.hpp" - -#include - -namespace spatial { - -namespace core { - -static void GetQuadKey(double lon, double lat, int32_t level, char *buffer) { - - lat = std::max(-85.05112878, std::min(85.05112878, lat)); - lon = std::max(-180.0, std::min(180.0, lon)); - - double lat_rad = lat * PI / 180.0; - auto x = static_cast((lon + 180.0) / 360.0 * (1 << level)); - auto y = - static_cast((1.0 - std::log(std::tan(lat_rad) + 1.0 / std::cos(lat_rad)) / PI) / 2.0 * (1 << level)); - - for (int i = level; i > 0; --i) { - char digit = '0'; - int32_t mask = 1 << (i - 1); - if ((x & mask) != 0) { - digit += 1; - } - if ((y & mask) != 0) { - digit += 2; - } - - buffer[level - i] = digit; - } -} -//------------------------------------------------------------------------------ -// Coordinates -//------------------------------------------------------------------------------ -static void CoordinateQuadKeyFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lon_in = args.data[0]; - auto &lat_in = args.data[1]; - auto &level = args.data[2]; - auto count = args.size(); - - TernaryExecutor::Execute( - lon_in, lat_in, level, result, count, [&](double lon, double lat, int32_t level) { - if (level < 1 || level > 23) { - throw InvalidInputException("ST_QuadKey: Level must be between 1 and 23"); - } - char buffer[64]; - GetQuadKey(lon, lat, level, buffer); - return StringVector::AddString(result, buffer, level); - }); -} - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -static void GeometryQuadKeyFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - - auto &geom = args.data[0]; - auto &level = args.data[1]; - auto count = args.size(); - - BinaryExecutor::Execute( - geom, level, result, count, [&](geometry_t input, int32_t level) { - if (level < 1 || level > 23) { - throw InvalidInputException("ST_QuadKey: Level must be between 1 and 23"); - } - if (input.GetType() != GeometryType::POINT) { - throw InvalidInputException("ST_QuadKey: Only POINT geometries are supported"); - } - auto point = Geometry::Deserialize(arena, input); - if (Point::IsEmpty(point)) { - throw InvalidInputException("ST_QuadKey: Empty geometries are not supported"); - } - auto vertex = Point::GetVertex(point); - char buffer[64]; - GetQuadKey(vertex.x, vertex.y, level, buffer); - return StringVector::AddString(result, buffer, level); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - -Compute the [quadkey](https://learn.microsoft.com/en-us/bingmaps/articles/bing-maps-tile-system) for a given lon/lat point at a given level. -Note that the parameter order is __longitude__, __latitude__. - -`level` has to be between 1 and 23, inclusive. - -The input coordinates will be clamped to the lon/lat bounds of the earth (longitude between -180 and 180, latitude between -85.05112878 and 85.05112878). - -The geometry overload throws an error if the input geometry is not a `POINT` -)"; - -static constexpr const char *DOC_EXAMPLE = R"( -SELECT ST_QuadKey(st_point(11.08, 49.45), 10); ----- -1333203202 -)"; -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStQuadKey(DatabaseInstance &db) { - - FunctionBuilder::RegisterScalar(db, "ST_QuadKey", [](ScalarFunctionBuilder &func) { - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("longitude", LogicalType::DOUBLE); - variant.AddParameter("latitude", LogicalType::DOUBLE); - variant.AddParameter("level", LogicalType::INTEGER); - variant.SetReturnType(LogicalType::VARCHAR); - variant.SetFunction(CoordinateQuadKeyFunction); - - variant.SetExample(DOC_EXAMPLE); - variant.SetDescription(DOC_DESCRIPTION); - }); - - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("point", GeoTypes::GEOMETRY()); - variant.AddParameter("level", LogicalType::INTEGER); - variant.SetReturnType(LogicalType::VARCHAR); - variant.SetFunction(GeometryQuadKeyFunction); - variant.SetInit(GeometryFunctionLocalState::Init); - - variant.SetExample(DOC_EXAMPLE); - variant.SetDescription(DOC_DESCRIPTION); - }); - - func.SetTag("ext", "spatial"); - func.SetTag("category", "property"); - }); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/scalar/st_removerepeatedpoints.cpp b/spatial/src/spatial/core/functions/scalar/st_removerepeatedpoints.cpp deleted file mode 100644 index e298f5e7..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_removerepeatedpoints.cpp +++ /dev/null @@ -1,285 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// LINESTRING_2D -//------------------------------------------------------------------------------ -static void LineStringRemoveRepeatedPointsFunctions(DataChunk &args, ExpressionState &state, Vector &result) { - auto input = args.data[0]; - auto count = args.size(); - UnifiedVectorFormat format; - input.ToUnifiedFormat(count, format); - - auto in_line_entries = ListVector::GetData(input); - auto &in_line_vertex_vec = StructVector::GetEntries(ListVector::GetEntry(input)); - auto in_x_data = FlatVector::GetData(*in_line_vertex_vec[0]); - auto in_y_data = FlatVector::GetData(*in_line_vertex_vec[1]); - - auto out_line_entries = ListVector::GetData(result); - auto &out_line_vertex_vec = StructVector::GetEntries(ListVector::GetEntry(result)); - - idx_t out_offset = 0; - for (idx_t out_row_idx = 0; out_row_idx < count; out_row_idx++) { - - auto in_row_idx = format.sel->get_index(out_row_idx); - if (!format.validity.RowIsValid(in_row_idx)) { - FlatVector::SetNull(result, out_row_idx, true); - continue; - } - auto in = in_line_entries[in_row_idx]; - auto in_offset = in.offset; - auto in_length = in.length; - - // Special case: if the line has less than 3 points, we can't remove any points - if (in_length < 3) { - - ListVector::Reserve(result, out_offset + in_length); - auto out_x_data = FlatVector::GetData(*out_line_vertex_vec[0]); - auto out_y_data = FlatVector::GetData(*out_line_vertex_vec[1]); - - // If the line has less than 3 points, we can't remove any points - // so we just copy the line - out_line_entries[out_row_idx] = list_entry_t {out_offset, in_length}; - for (idx_t coord_idx = 0; coord_idx < in_length; coord_idx++) { - out_x_data[out_offset + coord_idx] = in_x_data[in_offset + coord_idx]; - out_y_data[out_offset + coord_idx] = in_y_data[in_offset + coord_idx]; - } - out_offset += in_length; - continue; - } - - // First pass, calculate how many points we need to keep - // We always keep the first and last point, so we start at 2 - uint32_t points_to_keep = 0; - - auto last_x = in_x_data[in_offset]; - auto last_y = in_y_data[in_offset]; - points_to_keep++; - - for (idx_t i = 1; i < in_length; i++) { - auto curr_x = in_x_data[in_offset + i]; - auto curr_y = in_y_data[in_offset + i]; - - if (curr_x != last_x || curr_y != last_y) { - points_to_keep++; - last_x = curr_x; - last_y = curr_y; - } - } - - // Special case: there is only 1 unique point in the line, so just keep - // the start and end points - if (points_to_keep == 1) { - out_line_entries[out_row_idx] = list_entry_t {out_offset, 2}; - ListVector::Reserve(result, out_offset + 2); - auto out_x_data = FlatVector::GetData(*out_line_vertex_vec[0]); - auto out_y_data = FlatVector::GetData(*out_line_vertex_vec[1]); - out_x_data[out_offset] = in_x_data[in_offset]; - out_y_data[out_offset] = in_y_data[in_offset]; - out_x_data[out_offset + 1] = in_x_data[in_offset + in_length - 1]; - out_y_data[out_offset + 1] = in_y_data[in_offset + in_length - 1]; - out_offset += 2; - continue; - } - - // Set the list entry - out_line_entries[out_row_idx] = list_entry_t {out_offset, points_to_keep}; - - // Second pass, copy the points we need to keep - ListVector::Reserve(result, out_offset + points_to_keep); - auto out_x_data = FlatVector::GetData(*out_line_vertex_vec[0]); - auto out_y_data = FlatVector::GetData(*out_line_vertex_vec[1]); - - // Copy the first point - out_x_data[out_offset] = in_x_data[in_offset]; - out_y_data[out_offset] = in_y_data[in_offset]; - out_offset++; - - // Copy the middle points (skip the last one, we'll copy it at the end) - last_x = in_x_data[in_offset]; - last_y = in_y_data[in_offset]; - - for (idx_t i = 1; i < in_length; i++) { - auto curr_x = in_x_data[in_offset + i]; - auto curr_y = in_y_data[in_offset + i]; - - if (curr_x != last_x || curr_y != last_y) { - out_x_data[out_offset] = curr_x; - out_y_data[out_offset] = curr_y; - last_x = curr_x; - last_y = curr_y; - out_offset++; - } - } - } - ListVector::SetListSize(result, out_offset); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -static void LineStringRemoveRepeatedPointsFunctionsWithTolerance(DataChunk &args, ExpressionState &state, - Vector &result) { - auto input = args.data[0]; - auto tolerance = args.data[1]; - auto count = args.size(); - UnifiedVectorFormat format; - input.ToUnifiedFormat(count, format); - - UnifiedVectorFormat tolerance_format; - tolerance.ToUnifiedFormat(count, tolerance_format); - - auto in_line_entries = ListVector::GetData(input); - auto &in_line_vertex_vec = StructVector::GetEntries(ListVector::GetEntry(input)); - auto in_x_data = FlatVector::GetData(*in_line_vertex_vec[0]); - auto in_y_data = FlatVector::GetData(*in_line_vertex_vec[1]); - - auto out_line_entries = ListVector::GetData(result); - auto &out_line_vertex_vec = StructVector::GetEntries(ListVector::GetEntry(result)); - - idx_t out_offset = 0; - - for (idx_t out_row_idx = 0; out_row_idx < count; out_row_idx++) { - auto in_row_idx = format.sel->get_index(out_row_idx); - auto in_tol_idx = tolerance_format.sel->get_index(out_row_idx); - if (!format.validity.RowIsValid(in_row_idx) || !tolerance_format.validity.RowIsValid(in_tol_idx)) { - FlatVector::SetNull(result, out_row_idx, true); - continue; - } - - auto in = in_line_entries[in_row_idx]; - auto in_offset = in.offset; - auto in_length = in.length; - - auto tolerance = Load(tolerance_format.data + in_tol_idx); - auto tolerance_squared = tolerance * tolerance; - - if (in_length < 3) { - - ListVector::Reserve(result, out_offset + in_length); - auto out_x_data = FlatVector::GetData(*out_line_vertex_vec[0]); - auto out_y_data = FlatVector::GetData(*out_line_vertex_vec[1]); - - // If the line has less than 3 points, we can't remove any points - // so we just copy the line - out_line_entries[out_row_idx] = list_entry_t {out_offset, in_length}; - for (idx_t coord_idx = 0; coord_idx < in_length; coord_idx++) { - out_x_data[out_offset + coord_idx] = in_x_data[in_offset + coord_idx]; - out_y_data[out_offset + coord_idx] = in_y_data[in_offset + coord_idx]; - } - out_offset += in_length; - continue; - } - - // First pass, calculate how many points we need to keep - uint32_t points_to_keep = 0; - - auto last_x = in_x_data[in_offset]; - auto last_y = in_y_data[in_offset]; - points_to_keep++; - - for (idx_t i = 1; i < in_length; i++) { - auto curr_x = in_x_data[in_offset + i]; - auto curr_y = in_y_data[in_offset + i]; - - auto dist_squared = (curr_x - last_x) * (curr_x - last_x) + (curr_y - last_y) * (curr_y - last_y); - - if (dist_squared > tolerance_squared) { - last_x = curr_x; - last_y = curr_y; - points_to_keep++; - } - } - - // Special case: there is only 1 unique point in the line, so just keep - // the start and end points - if (points_to_keep == 1) { - out_line_entries[out_row_idx] = list_entry_t {out_offset, 2}; - ListVector::Reserve(result, out_offset + 2); - auto out_x_data = FlatVector::GetData(*out_line_vertex_vec[0]); - auto out_y_data = FlatVector::GetData(*out_line_vertex_vec[1]); - out_x_data[out_offset] = in_x_data[in_offset]; - out_y_data[out_offset] = in_y_data[in_offset]; - out_x_data[out_offset + 1] = in_x_data[in_offset + in_length - 1]; - out_y_data[out_offset + 1] = in_y_data[in_offset + in_length - 1]; - out_offset += 2; - continue; - } - - // Set the list entry - out_line_entries[out_row_idx] = list_entry_t {out_offset, points_to_keep}; - - // Second pass, copy the points we need to keep - ListVector::Reserve(result, out_offset + points_to_keep); - auto out_x_data = FlatVector::GetData(*out_line_vertex_vec[0]); - auto out_y_data = FlatVector::GetData(*out_line_vertex_vec[1]); - - // Copy the first point - out_x_data[out_offset] = in_x_data[in_offset]; - out_y_data[out_offset] = in_y_data[in_offset]; - out_offset++; - - // With tolerance its different, we always keep the first and last point - // regardless of distance to the previous point - // Copy the middle points - last_x = in_x_data[in_offset]; - last_y = in_y_data[in_offset]; - - for (idx_t i = 1; i < in_length - 1; i++) { - - auto curr_x = in_x_data[in_offset + i]; - auto curr_y = in_y_data[in_offset + i]; - - auto dist_squared = (curr_x - last_x) * (curr_x - last_x) + (curr_y - last_y) * (curr_y - last_y); - if (dist_squared > tolerance_squared) { - out_x_data[out_offset] = curr_x; - out_y_data[out_offset] = curr_y; - last_x = curr_x; - last_y = curr_y; - out_offset++; - } - } - - // Copy the last point - out_x_data[points_to_keep - 1] = in_x_data[in_offset + in_length - 1]; - out_y_data[points_to_keep - 1] = in_y_data[in_offset + in_length - 1]; - out_offset++; - } - ListVector::SetListSize(result, out_offset); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStRemoveRepeatedPoints(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_RemoveRepeatedPoints"); - - set.AddFunction(ScalarFunction({GeoTypes::LINESTRING_2D()}, GeoTypes::LINESTRING_2D(), - LineStringRemoveRepeatedPointsFunctions)); - - set.AddFunction(ScalarFunction({GeoTypes::LINESTRING_2D(), LogicalType::DOUBLE}, GeoTypes::LINESTRING_2D(), - LineStringRemoveRepeatedPointsFunctionsWithTolerance)); - - ExtensionUtil::RegisterFunction(db, set); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/scalar/st_startpoint.cpp b/spatial/src/spatial/core/functions/scalar/st_startpoint.cpp deleted file mode 100644 index 91cd5156..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_startpoint.cpp +++ /dev/null @@ -1,113 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// LINESTRING_2D -//------------------------------------------------------------------------------ -static void LineStringStartPointFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto geom_vec = args.data[0]; - auto count = args.size(); - - UnifiedVectorFormat geom_format; - geom_vec.ToUnifiedFormat(count, geom_format); - - auto line_vertex_entries = ListVector::GetData(geom_vec); - auto &line_vertex_vec = ListVector::GetEntry(geom_vec); - auto &line_vertex_vec_children = StructVector::GetEntries(line_vertex_vec); - auto line_x_data = FlatVector::GetData(*line_vertex_vec_children[0]); - auto line_y_data = FlatVector::GetData(*line_vertex_vec_children[1]); - - auto &point_vertex_children = StructVector::GetEntries(result); - auto point_x_data = FlatVector::GetData(*point_vertex_children[0]); - auto point_y_data = FlatVector::GetData(*point_vertex_children[1]); - - for (idx_t out_row_idx = 0; out_row_idx < count; out_row_idx++) { - auto in_row_idx = geom_format.sel->get_index(out_row_idx); - - if (!geom_format.validity.RowIsValid(in_row_idx)) { - FlatVector::SetNull(result, out_row_idx, true); - continue; - } - - auto line = line_vertex_entries[in_row_idx]; - auto line_offset = line.offset; - auto line_length = line.length; - - if (line_length == 0) { - FlatVector::SetNull(result, out_row_idx, true); - continue; - } - - point_x_data[out_row_idx] = line_x_data[line_offset]; - point_y_data[out_row_idx] = line_y_data[line_offset]; - } - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -static void GeometryStartPointFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &geom_vec = args.data[0]; - auto count = args.size(); - - UnaryExecutor::ExecuteWithNulls(geom_vec, result, count, - [&](geometry_t input, ValidityMask &mask, idx_t row_idx) { - if (input.GetType() != GeometryType::LINESTRING) { - mask.SetInvalid(row_idx); - return geometry_t {}; - } - - auto line = Geometry::Deserialize(lstate.arena, input); - if (LineString::IsEmpty(line)) { - mask.SetInvalid(row_idx); - return geometry_t {}; - } - auto point = LineString::GetPointAsReference(line, 0); - return Geometry::Serialize(point, result); - }); -} -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( -Returns the first point of a line geometry -)"; - -static constexpr const char *DOC_EXAMPLE = R"( -select ST_StartPoint('LINESTRING(0 0, 1 1)'::geometry); --- POINT(0 0) -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void CoreScalarFunctions::RegisterStStartPoint(DatabaseInstance &db) { - ScalarFunctionSet set("ST_StartPoint"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), GeometryStartPointFunction, nullptr, - nullptr, nullptr, GeometryFunctionLocalState::Init)); - - set.AddFunction(ScalarFunction({GeoTypes::LINESTRING_2D()}, GeoTypes::POINT_2D(), LineStringStartPointFunction)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_StartPoint", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/scalar/st_xyzm.cpp b/spatial/src/spatial/core/functions/scalar/st_xyzm.cpp deleted file mode 100644 index b488604f..00000000 --- a/spatial/src/spatial/core/functions/scalar/st_xyzm.cpp +++ /dev/null @@ -1,413 +0,0 @@ -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" - -#include "spatial/common.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/geometry/geometry_processor.hpp" -#include "spatial/core/types.hpp" - -namespace spatial { - -namespace core { - -struct MinOp { - static double Default() { - return std::numeric_limits::max(); - } - static double Operation(double left, double right) { - return std::min(left, right); - } -}; - -struct MaxOp { - static double Default() { - return std::numeric_limits::lowest(); - } - static double Operation(double left, double right) { - return std::max(left, right); - } -}; - -struct AnyOp { - static double Default() { - return 0.0; - } - static double Operation(double left, double right) { - return right; - } -}; - -//------------------------------------------------------------------------------ -// POINT_2D -//------------------------------------------------------------------------------ -template -static void Point2DFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - auto &point = args.data[0]; - auto &point_children = StructVector::GetEntries(point); - auto &n_child = point_children[N]; - result.Reference(*n_child); -} - -//------------------------------------------------------------------------------ -// BOX_2D -//------------------------------------------------------------------------------ - -template // 0: x_min, 1: y_min, 2: x_max, 3: y_max -static void Box2DFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &input = args.data[0]; - auto &box_vec = StructVector::GetEntries(input); - auto &ordinate_vec = box_vec[N]; - result.Reference(*ordinate_vec); -} - -//------------------------------------------------------------------------------ -// LINESTRING_2D -//------------------------------------------------------------------------------ -template -static void LineString2DFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - - auto &line = args.data[0]; - auto &line_coords = ListVector::GetEntry(line); - auto &line_coords_vec = StructVector::GetEntries(line_coords); - auto ordinate_data = FlatVector::GetData(*line_coords_vec[N]); - - UnaryExecutor::ExecuteWithNulls( - line, result, args.size(), [&](list_entry_t &line, ValidityMask &mask, idx_t idx) { - // Empty line, return NULL - if (line.length == 0) { - mask.SetInvalid(idx); - return 0.0; - } - - auto val = OP::Default(); - for (idx_t i = line.offset; i < line.offset + line.length; i++) { - auto ordinate = ordinate_data[i]; - val = OP::Operation(val, ordinate); - } - return val; - }); - - if (line.GetVectorType() == VectorType::CONSTANT_VECTOR) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// POLYGON_2D -//------------------------------------------------------------------------------ -template -static void Polygon2DFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - - auto input = args.data[0]; - auto count = args.size(); - - UnifiedVectorFormat format; - input.ToUnifiedFormat(count, format); - - auto &ring_vec = ListVector::GetEntry(input); - auto ring_entries = ListVector::GetData(ring_vec); - auto &vertex_vec = ListVector::GetEntry(ring_vec); - auto &vertex_vec_children = StructVector::GetEntries(vertex_vec); - auto ordinate_data = FlatVector::GetData(*vertex_vec_children[N]); - - UnaryExecutor::ExecuteWithNulls( - input, result, count, [&](list_entry_t polygon, ValidityMask &mask, idx_t idx) { - auto polygon_offset = polygon.offset; - - // Empty polygon, return NULL - if (polygon.length == 0) { - mask.SetInvalid(idx); - return 0.0; - } - - // We only have to check the outer shell - auto shell_ring = ring_entries[polygon_offset]; - auto ring_offset = shell_ring.offset; - auto ring_length = shell_ring.length; - - // Polygon is invalid. This should never happen but just in case - if (ring_length == 0) { - mask.SetInvalid(idx); - return 0.0; - } - - auto val = OP::Default(); - for (idx_t coord_idx = ring_offset; coord_idx < ring_offset + ring_length - 1; coord_idx++) { - auto ordinate = ordinate_data[coord_idx]; - val = OP::Operation(val, ordinate); - } - return val; - }); -} - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ - -template -class BoundsProcessor final : GeometryProcessor<> { - - bool is_empty = true; - double result = 0; - - void HandleVertexData(const VertexData &vertices) { - if (!vertices.IsEmpty()) { - is_empty = false; - } - for (uint32_t i = 0; i < vertices.count; i++) { - result = OP::Operation(result, Load(vertices.data[N] + i * vertices.stride[N])); - } - } - - void ProcessPoint(const VertexData &vertices) override { - return HandleVertexData(vertices); - } - - void ProcessLineString(const VertexData &vertices) override { - return HandleVertexData(vertices); - } - - void ProcessPolygon(PolygonState &state) override { - while (!state.IsDone()) { - HandleVertexData(state.Next()); - } - } - - void ProcessCollection(CollectionState &state) override { - while (!state.IsDone()) { - state.Next(); - } - } - -public: - double Execute(const geometry_t &geom) { - is_empty = true; - result = OP::Default(); - Process(geom); - return result; - } - - bool ResultIsEmpty() const { - return is_empty; - } -}; - -template -static void GeometryFunction(DataChunk &args, ExpressionState &state, Vector &result) { - static_assert(N < 4, "Invalid ordinate index"); - D_ASSERT(args.data.size() == 1); - - auto count = args.size(); - auto &input = args.data[0]; - - BoundsProcessor processor; - UnaryExecutor::ExecuteWithNulls(input, result, count, - [&](geometry_t blob, ValidityMask &mask, idx_t idx) { - auto res = processor.Execute(blob); - if (processor.ResultIsEmpty()) { - mask.SetInvalid(idx); - return 0.0; - } else { - return res; - } - }); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -template -static void GeometryAccessFunction(DataChunk &args, ExpressionState &state, Vector &result) { - static_assert(N < 4, "Invalid ordinate index"); - D_ASSERT(args.data.size() == 1); - - auto count = args.size(); - auto &input = args.data[0]; - - BoundsProcessor processor; - UnaryExecutor::ExecuteWithNulls( - input, result, count, [&](geometry_t blob, ValidityMask &mask, idx_t idx) { - if (blob.GetType() != GeometryType::POINT) { - throw InvalidInputException("ST_X/ST_Y/ST_Z/ST_M only supports POINT geometries"); - } - auto res = processor.Execute(blob); - if (processor.ResultIsEmpty()) { - mask.SetInvalid(idx); - return 0.0; - } else { - return res; - } - }); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ - -DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}}; - -void CoreScalarFunctions::RegisterStX(DatabaseInstance &db) { - - ScalarFunctionSet st_x("ST_X"); - st_x.AddFunction(ScalarFunction({GeoTypes::POINT_2D()}, LogicalType::DOUBLE, Point2DFunction<0>)); - st_x.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::DOUBLE, GeometryAccessFunction<0>)); - - ExtensionUtil::RegisterFunction(db, st_x); - - auto DOC_DESCRIPTION = "Returns the X value of a point geometry, or NULL if not a point or empty"; - DocUtil::AddDocumentation(db, "ST_X", DOC_DESCRIPTION, nullptr, DOC_TAGS); -} - -void CoreScalarFunctions::RegisterStXMax(DatabaseInstance &db) { - - ScalarFunctionSet st_xmax("ST_XMax"); - st_xmax.AddFunction(ScalarFunction({GeoTypes::BOX_2D()}, LogicalType::DOUBLE, Box2DFunction<2>)); - st_xmax.AddFunction(ScalarFunction({GeoTypes::BOX_2DF()}, LogicalType::FLOAT, Box2DFunction<2>)); - st_xmax.AddFunction(ScalarFunction({GeoTypes::POINT_2D()}, LogicalType::DOUBLE, Point2DFunction<0>)); - st_xmax.AddFunction( - ScalarFunction({GeoTypes::LINESTRING_2D()}, LogicalType::DOUBLE, LineString2DFunction<0, MaxOp>)); - st_xmax.AddFunction(ScalarFunction({GeoTypes::POLYGON_2D()}, LogicalType::DOUBLE, Polygon2DFunction<0, MaxOp>)); - st_xmax.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::DOUBLE, GeometryFunction<0, MaxOp>)); - - ExtensionUtil::RegisterFunction(db, st_xmax); - - auto DOC_DESCRIPTION = "Returns the maximum X value of a geometry"; - DocUtil::AddDocumentation(db, "ST_XMax", DOC_DESCRIPTION, nullptr, DOC_TAGS); -} - -void CoreScalarFunctions::RegisterStXMin(DatabaseInstance &db) { - - ScalarFunctionSet st_xmin("ST_XMin"); - st_xmin.AddFunction(ScalarFunction({GeoTypes::BOX_2D()}, LogicalType::DOUBLE, Box2DFunction<0>)); - st_xmin.AddFunction(ScalarFunction({GeoTypes::BOX_2DF()}, LogicalType::FLOAT, Box2DFunction<0>)); - st_xmin.AddFunction(ScalarFunction({GeoTypes::POINT_2D()}, LogicalType::DOUBLE, Point2DFunction<0>)); - st_xmin.AddFunction( - ScalarFunction({GeoTypes::LINESTRING_2D()}, LogicalType::DOUBLE, LineString2DFunction<0, MinOp>)); - st_xmin.AddFunction(ScalarFunction({GeoTypes::POLYGON_2D()}, LogicalType::DOUBLE, Polygon2DFunction<0, MinOp>)); - st_xmin.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::DOUBLE, GeometryFunction<0, MinOp>)); - - ExtensionUtil::RegisterFunction(db, st_xmin); - - auto DOC_DESCRIPTION = "Returns the minimum X value of a geometry"; - DocUtil::AddDocumentation(db, "ST_XMin", DOC_DESCRIPTION, nullptr, DOC_TAGS); -} - -void CoreScalarFunctions::RegisterStY(DatabaseInstance &db) { - - ScalarFunctionSet st_y("ST_Y"); - st_y.AddFunction(ScalarFunction({GeoTypes::POINT_2D()}, LogicalType::DOUBLE, Point2DFunction<1>)); - st_y.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::DOUBLE, GeometryAccessFunction<1>)); - - ExtensionUtil::RegisterFunction(db, st_y); - - auto DOC_DESCRIPTION = "Returns the Y value of a point geometry, or NULL if not a point or empty"; - DocUtil::AddDocumentation(db, "ST_Y", DOC_DESCRIPTION, nullptr, DOC_TAGS); -} - -void CoreScalarFunctions::RegisterStYMax(DatabaseInstance &db) { - - ScalarFunctionSet st_ymax("ST_YMax"); - st_ymax.AddFunction(ScalarFunction({GeoTypes::BOX_2D()}, LogicalType::DOUBLE, Box2DFunction<3>)); - st_ymax.AddFunction(ScalarFunction({GeoTypes::BOX_2DF()}, LogicalType::FLOAT, Box2DFunction<3>)); - st_ymax.AddFunction(ScalarFunction({GeoTypes::POINT_2D()}, LogicalType::DOUBLE, Point2DFunction<1>)); - st_ymax.AddFunction( - ScalarFunction({GeoTypes::LINESTRING_2D()}, LogicalType::DOUBLE, LineString2DFunction<1, MaxOp>)); - st_ymax.AddFunction(ScalarFunction({GeoTypes::POLYGON_2D()}, LogicalType::DOUBLE, Polygon2DFunction<1, MaxOp>)); - st_ymax.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::DOUBLE, GeometryFunction<1, MaxOp>)); - - ExtensionUtil::RegisterFunction(db, st_ymax); - - auto DOC_DESCRIPTION = "Returns the maximum Y value of a geometry"; - DocUtil::AddDocumentation(db, "ST_YMax", DOC_DESCRIPTION, nullptr, DOC_TAGS); -} - -void CoreScalarFunctions::RegisterStYMin(DatabaseInstance &db) { - - ScalarFunctionSet st_ymin("ST_YMin"); - st_ymin.AddFunction(ScalarFunction({GeoTypes::BOX_2D()}, LogicalType::DOUBLE, Box2DFunction<1>)); - st_ymin.AddFunction(ScalarFunction({GeoTypes::BOX_2DF()}, LogicalType::FLOAT, Box2DFunction<1>)); - st_ymin.AddFunction(ScalarFunction({GeoTypes::POINT_2D()}, LogicalType::DOUBLE, Point2DFunction<1>)); - st_ymin.AddFunction( - ScalarFunction({GeoTypes::LINESTRING_2D()}, LogicalType::DOUBLE, LineString2DFunction<1, MinOp>)); - st_ymin.AddFunction(ScalarFunction({GeoTypes::POLYGON_2D()}, LogicalType::DOUBLE, Polygon2DFunction<1, MinOp>)); - st_ymin.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::DOUBLE, GeometryFunction<1, MinOp>)); - - ExtensionUtil::RegisterFunction(db, st_ymin); - - auto DOC_DESCRIPTION = "Returns the minimum Y value of a geometry"; - DocUtil::AddDocumentation(db, "ST_YMin", DOC_DESCRIPTION, nullptr, DOC_TAGS); -} - -void CoreScalarFunctions::RegisterStZ(DatabaseInstance &db) { - - ScalarFunctionSet st_z("ST_Z"); - st_z.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::DOUBLE, GeometryAccessFunction<2>)); - - ExtensionUtil::RegisterFunction(db, st_z); - - auto DOC_DESCRIPTION = "Returns the Z value of a point geometry, or NULL if not a point or empty"; - DocUtil::AddDocumentation(db, "ST_Z", DOC_DESCRIPTION, nullptr, DOC_TAGS); -} - -void CoreScalarFunctions::RegisterStZMax(DatabaseInstance &db) { - ScalarFunctionSet st_zmax("ST_ZMax"); - st_zmax.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::DOUBLE, GeometryFunction<2, MaxOp>)); - - ExtensionUtil::RegisterFunction(db, st_zmax); - - auto DOC_DESCRIPTION = "Returns the maximum Z value of a geometry"; - DocUtil::AddDocumentation(db, "ST_ZMax", DOC_DESCRIPTION, nullptr, DOC_TAGS); -} - -void CoreScalarFunctions::RegisterStZMin(DatabaseInstance &db) { - ScalarFunctionSet st_zmin("ST_ZMin"); - st_zmin.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::DOUBLE, GeometryFunction<2, MinOp>)); - - ExtensionUtil::RegisterFunction(db, st_zmin); - - auto DOC_DESCRIPTION = "Returns the minimum Z value of a geometry"; - DocUtil::AddDocumentation(db, "ST_ZMin", DOC_DESCRIPTION, nullptr, DOC_TAGS); -} - -void CoreScalarFunctions::RegisterStM(DatabaseInstance &db) { - ScalarFunctionSet st_m("ST_M"); - st_m.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::DOUBLE, GeometryAccessFunction<3>)); - - ExtensionUtil::RegisterFunction(db, st_m); - - auto DOC_DESCRIPTION = "Returns the M value of a point geometry, or NULL if not a point or empty"; - DocUtil::AddDocumentation(db, "ST_M", DOC_DESCRIPTION, nullptr, DOC_TAGS); -} - -void CoreScalarFunctions::RegisterStMMax(DatabaseInstance &db) { - ScalarFunctionSet st_mmax("ST_MMax"); - st_mmax.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::DOUBLE, GeometryFunction<3, MaxOp>)); - - ExtensionUtil::RegisterFunction(db, st_mmax); - - auto DOC_DESCRIPTION = "Returns the maximum M value of a geometry"; - DocUtil::AddDocumentation(db, "ST_MMax", DOC_DESCRIPTION, nullptr, DOC_TAGS); -} - -void CoreScalarFunctions::RegisterStMMin(DatabaseInstance &db) { - ScalarFunctionSet st_mmin("ST_MMin"); - st_mmin.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::DOUBLE, GeometryFunction<3, MinOp>)); - - ExtensionUtil::RegisterFunction(db, st_mmin); - - auto DOC_DESCRIPTION = "Returns the minimum M value of a geometry"; - DocUtil::AddDocumentation(db, "ST_MMin", DOC_DESCRIPTION, nullptr, DOC_TAGS); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/functions/table/CMakeLists.txt b/spatial/src/spatial/core/functions/table/CMakeLists.txt deleted file mode 100644 index abd674cb..00000000 --- a/spatial/src/spatial/core/functions/table/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -set(EXTENSION_SOURCES - ${EXTENSION_SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/st_generatepoints.cpp - PARENT_SCOPE -) \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/table/st_generatepoints.cpp b/spatial/src/spatial/core/functions/table/st_generatepoints.cpp deleted file mode 100644 index 8754619a..00000000 --- a/spatial/src/spatial/core/functions/table/st_generatepoints.cpp +++ /dev/null @@ -1,118 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/functions/table.hpp" -#include "spatial/core/types.hpp" - -#include - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// Bind -//------------------------------------------------------------------------------ -struct GeneratePointsBindData final : public TableFunctionData { - idx_t count = 0; - int64_t seed = -1; - Box2D bbox; -}; - -static unique_ptr Bind(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names) { - auto result = make_uniq(); - - return_types.push_back(GeoTypes::POINT_2D()); - names.push_back("point"); - - // Extract the bounding box - const auto &box_value = input.inputs[0]; - auto &box_components = StructValue::GetChildren(box_value); - result->bbox.min.x = box_components[0].GetValue(); - result->bbox.min.y = box_components[1].GetValue(); - result->bbox.max.x = box_components[2].GetValue(); - result->bbox.max.y = box_components[3].GetValue(); - - // Extract the count - const auto &count_value = input.inputs[1]; - const auto count = count_value.GetValue(); - if (count < 0) { - throw BinderException("Count must be a non-negative integer"); - } - result->count = UnsafeNumericCast(count); - - // Extract the seed (optional) - if (input.inputs.size() == 3) { - result->seed = input.inputs[2].GetValue(); - } - - return std::move(result); -} - -//------------------------------------------------------------------------------ -// Init -//------------------------------------------------------------------------------ -struct GeneratePointsState final : public GlobalTableFunctionState { - RandomEngine rng; - idx_t current_idx; - - explicit GeneratePointsState(const int64_t seed) : rng(seed), current_idx(0) { - } -}; - -static unique_ptr Init(ClientContext &context, TableFunctionInitInput &input) { - auto &bind_data = input.bind_data->Cast(); - auto result = make_uniq(bind_data.seed); - return std::move(result); -} - -//------------------------------------------------------------------------------ -// Execute -//------------------------------------------------------------------------------ -static void Execute(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { - auto &bind_data = data_p.bind_data->Cast(); - auto &state = data_p.global_state->Cast(); - - const auto &point_vec = StructVector::GetEntries(output.data[0]); - const auto &x_data = FlatVector::GetData(*point_vec[0]); - const auto &y_data = FlatVector::GetData(*point_vec[1]); - - const auto chunk_size = MinValue(STANDARD_VECTOR_SIZE, bind_data.count - state.current_idx); - for (idx_t i = 0; i < chunk_size; i++) { - - x_data[i] = state.rng.NextRandom32(bind_data.bbox.min.x, bind_data.bbox.max.x); - y_data[i] = state.rng.NextRandom32(bind_data.bbox.min.y, bind_data.bbox.max.y); - - state.current_idx++; - } - output.SetCardinality(chunk_size); -} - -//------------------------------------------------------------------------------ -// Cardinality -//------------------------------------------------------------------------------ -unique_ptr Cardinality(ClientContext &context, const FunctionData *bind_data_p) { - auto &bind_data = bind_data_p->Cast(); - return make_uniq(bind_data.count, bind_data.count); -} - -//------------------------------------------------------------------------------ -// Register -//------------------------------------------------------------------------------ -void CoreTableFunctions::RegisterGeneratePointsTableFunction(DatabaseInstance &db) { - TableFunctionSet set("ST_GeneratePoints"); - - TableFunction generate_points({GeoTypes::BOX_2D(), LogicalType::BIGINT}, Execute, Bind, Init); - generate_points.cardinality = Cardinality; - - // Overload without seed - set.AddFunction(generate_points); - - // Overload with seed - generate_points.arguments = {GeoTypes::BOX_2D(), LogicalType::BIGINT, LogicalType::BIGINT}; - set.AddFunction(generate_points); - ExtensionUtil::RegisterFunction(db, set); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/geometry/geometry.cpp b/spatial/src/spatial/core/geometry/geometry.cpp deleted file mode 100644 index 0358b4a9..00000000 --- a/spatial/src/spatial/core/geometry/geometry.cpp +++ /dev/null @@ -1,509 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/util/math.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// Single Part Geometry -//------------------------------------------------------------------------------ -void SinglePartGeometry::Resize(Geometry &geom, ArenaAllocator &alloc, uint32_t new_count) { - D_ASSERT(GeometryTypes::IsSinglePart(geom.type)); - - auto vertex_size = geom.properties.VertexSize(); - if (new_count == geom.data_count) { - return; - } - if (geom.data_ptr == nullptr) { - geom.data_ptr = alloc.AllocateAligned(vertex_size * new_count); - geom.data_count = new_count; - geom.is_readonly = false; - memset(geom.data_ptr, 0, vertex_size * new_count); - return; - } - - if (!geom.is_readonly) { - geom.data_ptr = alloc.ReallocateAligned(geom.data_ptr, geom.data_count * vertex_size, vertex_size * new_count); - geom.data_count = new_count; - } else { - auto new_data = alloc.AllocateAligned(vertex_size * new_count); - memset(new_data, 0, vertex_size * new_count); - auto copy_count = std::min(geom.data_count, new_count); - memcpy(new_data, geom.data_ptr, vertex_size * copy_count); - geom.data_ptr = new_data; - geom.data_count = new_count; - geom.is_readonly = false; - } -} - -void SinglePartGeometry::Append(Geometry &geom, ArenaAllocator &alloc, const Geometry &other) { - Append(geom, alloc, &other, 1); -} - -void SinglePartGeometry::Append(Geometry &geom, ArenaAllocator &alloc, const Geometry *others, uint32_t others_count) { - D_ASSERT(GeometryTypes::IsSinglePart(geom.type)); - if (geom.IsReadOnly()) { - MakeMutable(geom, alloc); - } - - auto old_count = geom.data_count; - auto new_count = old_count; - for (uint32_t i = 0; i < others_count; i++) { - new_count += others[i].Count(); - // The other geometries has to be single part - D_ASSERT(GeometryTypes::IsSinglePart(others[i].type)); - // And have the same z and m properties - D_ASSERT(geom.properties.HasZ() == others[i].properties.HasZ()); - D_ASSERT(geom.properties.HasM() == others[i].properties.HasM()); - } - - Resize(geom, alloc, new_count); - - auto vertex_size = geom.properties.VertexSize(); - for (uint32_t i = 0; i < others_count; i++) { - auto other = others[i]; - memcpy(geom.data_ptr + old_count * vertex_size, other.data_ptr, vertex_size * other.data_count); - old_count += other.data_count; - } - geom.data_count = new_count; -} - -void SinglePartGeometry::SetVertexType(Geometry &geom, ArenaAllocator &alloc, bool has_z, bool has_m, double default_z, - double default_m) { - if (geom.properties.HasZ() == has_z && geom.properties.HasM() == has_m) { - return; - } - if (geom.is_readonly) { - MakeMutable(geom, alloc); - } - - const auto used_to_have_z = geom.properties.HasZ(); - const auto used_to_have_m = geom.properties.HasM(); - const auto old_vertex_size = geom.properties.VertexSize(); - - geom.properties.SetZ(has_z); - geom.properties.SetM(has_m); - - const auto new_vertex_size = geom.properties.VertexSize(); - // Case 1: The new vertex size is larger than the old vertex size - if (new_vertex_size > old_vertex_size) { - geom.data_ptr = alloc.ReallocateAligned(geom.data_ptr, geom.data_count * old_vertex_size, - geom.data_count * new_vertex_size); - - // There are 5 cases here: - if (used_to_have_m && has_m && !used_to_have_z && has_z) { - // 1. We go from XYM to XYZM - // This is special, because we need to slide the M value to the end of each vertex - for (int64_t i = geom.data_count - 1; i >= 0; i--) { - const auto old_offset = i * old_vertex_size; - const auto new_offset = i * new_vertex_size; - const auto old_m_offset = old_offset + sizeof(double) * 2; - const auto new_z_offset = new_offset + sizeof(double) * 2; - const auto new_m_offset = new_offset + sizeof(double) * 3; - // Move the M value - memcpy(geom.data_ptr + new_m_offset, geom.data_ptr + old_m_offset, sizeof(double)); - // Set the new Z value - memcpy(geom.data_ptr + new_z_offset, &default_z, sizeof(double)); - // Move the X and Y values - memmove(geom.data_ptr + new_offset, geom.data_ptr + old_offset, sizeof(double) * 2); - } - } else if (!used_to_have_z && has_z && !used_to_have_m && has_m) { - // 2. We go from XY to XYZM - // This is special, because we need to add both the default Z and M values to the end of each vertex - for (int64_t i = geom.data_count - 1; i >= 0; i--) { - const auto old_offset = i * old_vertex_size; - const auto new_offset = i * new_vertex_size; - memcpy(geom.data_ptr + new_offset, geom.data_ptr + old_offset, sizeof(double) * 2); - memcpy(geom.data_ptr + new_offset + sizeof(double) * 2, &default_z, sizeof(double)); - memcpy(geom.data_ptr + new_offset + sizeof(double) * 3, &default_m, sizeof(double)); - } - } else { - // Otherwise: - // 3. We go from XY to XYZ - // 4. We go from XY to XYM - // 5. We go from XYZ to XYZM - // These are all really the same, we just add the default to the end - const auto default_value = has_m ? default_m : default_z; - for (int64_t i = geom.data_count - 1; i >= 0; i--) { - const auto old_offset = i * old_vertex_size; - const auto new_offset = i * new_vertex_size; - memmove(geom.data_ptr + new_offset, geom.data_ptr + old_offset, old_vertex_size); - memcpy(geom.data_ptr + new_offset + old_vertex_size, &default_value, sizeof(double)); - } - } - } - // Case 2: The new vertex size is equal to the old vertex size - else if (new_vertex_size == old_vertex_size) { - // This only happens when we go from XYZ -> XYM or XYM -> XYZ - // In this case we just need to set the default on the third dimension - const auto default_value = has_m ? default_m : default_z; - for (uint32_t i = 0; i < geom.data_count; i++) { - const auto offset = i * new_vertex_size + sizeof(double) * 2; - memcpy(geom.data_ptr + offset, &default_value, sizeof(double)); - } - } - // Case 3: The new vertex size is smaller than the old vertex size. - // In this case we need to allocate new memory and copy the data over to not lose any data - else { - auto new_data = alloc.AllocateAligned(geom.data_count * new_vertex_size); - memset(new_data, 0, geom.data_count * new_vertex_size); - - // Special case: If we go from XYZM to XYM, we need to slide the M value to the end of each vertex - if (used_to_have_z && used_to_have_m && !has_z && has_m) { - for (uint32_t i = 0; i < geom.data_count; i++) { - const auto old_offset = i * old_vertex_size; - const auto new_offset = i * new_vertex_size; - memcpy(new_data + new_offset, geom.data_ptr + old_offset, sizeof(double) * 2); - const auto m_offset = old_offset + sizeof(double) * 3; - memcpy(new_data + new_offset + sizeof(double) * 2, geom.data_ptr + m_offset, sizeof(double)); - } - } else { - // Otherwise, we just copy the data over - for (uint32_t i = 0; i < geom.data_count; i++) { - const auto old_offset = i * old_vertex_size; - const auto new_offset = i * new_vertex_size; - memcpy(new_data + new_offset, geom.data_ptr + old_offset, new_vertex_size); - } - } - geom.data_ptr = new_data; - } -} - -void SinglePartGeometry::MakeMutable(Geometry &geom, ArenaAllocator &alloc) { - if (!geom.is_readonly) { - return; - } - - if (geom.data_count == 0) { - geom.data_ptr = nullptr; - geom.is_readonly = false; - return; - } - - auto data_size = ByteSize(geom); - auto new_data = alloc.AllocateAligned(data_size); - memcpy(new_data, geom.data_ptr, data_size); - geom.data_ptr = new_data; - geom.is_readonly = false; -} - -bool SinglePartGeometry::IsClosed(const Geometry &geom) { - switch (geom.Count()) { - case 0: - return false; - case 1: - return true; - default: - VertexXY first = GetVertex(geom, 0); - VertexXY last = GetVertex(geom, geom.Count() - 1); - // TODO: Approximate comparison? - return first.x == last.x && first.y == last.y; - } -} - -double SinglePartGeometry::Length(const Geometry &geom) { - D_ASSERT(GeometryTypes::IsSinglePart(geom.type)); - double length = 0; - for (uint32_t i = 1; i < geom.data_count; i++) { - auto p1 = GetVertex(geom, i - 1); - auto p2 = GetVertex(geom, i); - length += sqrt((p2.x - p1.x) * (p2.x - p1.x) + (p2.y - p1.y) * (p2.y - p1.y)); - } - return length; -} - -string SinglePartGeometry::ToString(const Geometry &geom, uint32_t start, uint32_t count) { - D_ASSERT(GeometryTypes::IsSinglePart(geom.type)); - auto has_z = geom.properties.HasZ(); - auto has_m = geom.properties.HasM(); - - D_ASSERT(geom.type == GeometryType::POINT || geom.type == GeometryType::LINESTRING); - auto type_name = geom.type == GeometryType::POINT ? "POINT" : "LINESTRING"; - - if (has_z && has_m) { - string result = StringUtil::Format("%s XYZM ([%d-%d]/%d) [", type_name, start, start + count, geom.data_count); - for (uint32_t i = start; i < count; i++) { - auto vertex = GetVertex(geom, i); - result += "(" + MathUtil::format_coord(vertex.x, vertex.y, vertex.z, vertex.m) + ")"; - if (i < count - 1) { - result += ", "; - } - } - result += "]"; - return result; - } else if (has_z) { - string result = StringUtil::Format("%s XYZ ([%d-%d]/%d) [", type_name, start, start + count, geom.data_count); - for (uint32_t i = start; i < count; i++) { - auto vertex = GetVertex(geom, i); - result += "(" + MathUtil::format_coord(vertex.x, vertex.y, vertex.z) + ")"; - if (i < count - 1) { - result += ", "; - } - } - result += "]"; - return result; - } else if (has_m) { - string result = StringUtil::Format("%s XYM ([%d-%d]/%d) [", type_name, start, start + count, geom.data_count); - for (uint32_t i = start; i < count; i++) { - auto vertex = GetVertex(geom, i); - result += "(" + MathUtil::format_coord(vertex.x, vertex.y, vertex.m) + ")"; - if (i < count - 1) { - result += ", "; - } - } - result += "]"; - return result; - } else { - string result = StringUtil::Format("%s XY ([%d-%d]/%d) [", type_name, start, start + count, geom.data_count); - for (uint32_t i = start; i < count; i++) { - auto vertex = GetVertex(geom, i); - result += "(" + MathUtil::format_coord(vertex.x, vertex.y) + ")"; - if (i < count - 1) { - result += ", "; - } - } - result += "]"; - return result; - } -} - -//------------------------------------------------------------------------------ -// Geometry -//------------------------------------------------------------------------------ -void Geometry::SetVertexType(ArenaAllocator &alloc, bool has_z, bool has_m, double default_z, double default_m) { - struct op { - static void Case(Geometry::Tags::SinglePartGeometry, Geometry &geom, ArenaAllocator &alloc, bool has_z, - bool has_m, double default_z, double default_m) { - SinglePartGeometry::SetVertexType(geom, alloc, has_z, has_m, default_z, default_m); - } - static void Case(Geometry::Tags::MultiPartGeometry, Geometry &geom, ArenaAllocator &alloc, bool has_z, - bool has_m, double default_z, double default_m) { - geom.properties.SetZ(has_z); - geom.properties.SetM(has_m); - for (auto &p : MultiPartGeometry::Parts(geom)) { - p.SetVertexType(alloc, has_z, has_m, default_z, default_m); - } - } - }; - Geometry::Match(*this, alloc, has_z, has_m, default_z, default_m); -} - -//------------------------------------------------------------------------------ -// Multi Part Geometry -//------------------------------------------------------------------------------ -/* -void MultiPartGeometry::Resize(Geometry& geom, ArenaAllocator &alloc, uint32_t new_count) { - D_ASSERT(GeometryTypes::IsMultiPart(geom.type)); - if (new_count == geom.data_count) { - return; - } - if (geom.data_ptr == nullptr) { - geom.data_ptr = alloc.AllocateAligned(sizeof(Geometry) * new_count); - // Need to create a new Geometry for each entry - for (uint32_t i = 0; i < new_count; i++) { - new (geom.data_ptr + i * sizeof(Geometry)) Geometry(); - } - } - else if(geom.IsReadOnly()) { - auto new_data = alloc.AllocateAligned(sizeof(Geometry) * new_count); - for(uint32_t i = 0; i < geom.data_count; i++) { - new (new_data + i * sizeof(Geometry)) Geometry(); - new_data[i] = geom.data_ptr[i]; - } - - - geom.data_ptr = new_data; - } - else { - geom.data_ptr = alloc.ReallocateAligned( - geom.data_ptr, geom.data_count * sizeof(Geometry), new_count * sizeof(Geometry)); - // If we added new entries, we need to create a new Geometry for each entry - for (uint32_t i = geom.data_count; i < new_count; i++) { - new (geom.data_ptr + i * sizeof(Geometry)) Geometry(); - } - } - geom.data_count = new_count; -} - */ - -/* -string Point::ToString() const { - if (IsEmpty()) { - return "POINT EMPTY"; - } - auto vert = vertices.Get(0); - if (std::isnan(vert.x) && std::isnan(vert.y)) { - // This is a special case for WKB. WKB does not support empty points, - // and instead writes a point with NaN coordinates. We therefore need to - // check for this case and return POINT EMPTY instead to round-trip safely - return "POINT EMPTY"; - } - return StringUtil::Format("POINT (%s)", Utils::format_coord(vert.x, vert.y)); -} - -string LineString::ToString() const { - auto count = vertices.Count(); - if (count == 0) { - return "LINESTRING EMPTY"; - } - - string result = "LINESTRING ("; - for (uint32_t i = 0; i < vertices.Count(); i++) { - auto x = vertices.Get(i).x; - auto y = vertices.Get(i).y; - result += Utils::format_coord(x, y); - if (i < vertices.Count() - 1) { - result += ", "; - } - } - result += ")"; - return result; -} - -string Polygon::ToString() const { - - // check if the polygon is empty - uint32_t total_verts = 0; - auto num_rings = ring_count; - for (uint32_t i = 0; i < num_rings; i++) { - total_verts += rings[i].Count(); - } - if (total_verts == 0) { - return "POLYGON EMPTY"; - } - - string result = "POLYGON ("; - for (uint32_t i = 0; i < num_rings; i++) { - result += "("; - for (uint32_t j = 0; j < rings[i].Count(); j++) { - auto x = rings[i].Get(j).x; - auto y = rings[i].Get(j).y; - result += Utils::format_coord(x, y); - if (j < rings[i].Count() - 1) { - result += ", "; - } - } - result += ")"; - if (i < num_rings - 1) { - result += ", "; - } - } - result += ")"; - return result; -} - -string MultiPoint::ToString() const { - auto num_points = ItemCount(); - if (num_points == 0) { - return "MULTIPOINT EMPTY"; - } - string str = "MULTIPOINT ("; - auto &points = *this; - for (uint32_t i = 0; i < num_points; i++) { - auto &point = points[i]; - if (point.IsEmpty()) { - str += "EMPTY"; - } else { - auto vert = point.Vertices().Get(0); - str += Utils::format_coord(vert.x, vert.y); - } - if (i < num_points - 1) { - str += ", "; - } - } - return str + ")"; -} - - -string MultiLineString::ToString() const { - auto count = ItemCount(); - if (count == 0) { - return "MULTILINESTRING EMPTY"; - } - string str = "MULTILINESTRING ("; - - bool first_line = true; - for (auto &line : *this) { - if (first_line) { - first_line = false; - } else { - str += ", "; - } - str += "("; - bool first_vert = true; - for (uint32_t i = 0; i < line.Vertices().Count(); i++) { - auto vert = line.Vertices().Get(i); - if (first_vert) { - first_vert = false; - } else { - str += ", "; - } - str += Utils::format_coord(vert.x, vert.y); - } - str += ")"; - } - return str + ")"; -} - -string MultiPolygon::ToString() const { - auto count = ItemCount(); - if (count == 0) { - return "MULTIPOLYGON EMPTY"; - } - string str = "MULTIPOLYGON ("; - - bool first_poly = true; - for (auto &poly : *this) { - if (first_poly) { - first_poly = false; - } else { - str += ", "; - } - str += "("; - bool first_ring = true; - for (auto &ring : poly) { - if (first_ring) { - first_ring = false; - } else { - str += ", "; - } - str += "("; - bool first_vert = true; - for (uint32_t v = 0; v < ring.Count(); v++) { - auto vert = ring.Get(v); - if (first_vert) { - first_vert = false; - } else { - str += ", "; - } - str += Utils::format_coord(vert.x, vert.y); - } - str += ")"; - } - str += ")"; - } - - return str + ")"; -} - -string GeometryCollection::ToString() const { - auto count = ItemCount(); - if (count == 0) { - return "GEOMETRYCOLLECTION EMPTY"; - } - string str = "GEOMETRYCOLLECTION ("; - for (uint32_t i = 0; i < count; i++) { - str += (*this)[i].ToString(); - if (i < count - 1) { - str += ", "; - } - } - return str + ")"; -} -*/ - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/geometry/geometry_processor.cpp b/spatial/src/spatial/core/geometry/geometry_processor.cpp deleted file mode 100644 index 1c382652..00000000 --- a/spatial/src/spatial/core/geometry/geometry_processor.cpp +++ /dev/null @@ -1,11 +0,0 @@ -#include "spatial/core/geometry/geometry_processor.hpp" - -namespace spatial { - -namespace core { - -constexpr double VertexData::EMPTY_DATA; - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/geometry/geometry_serialization.cpp b/spatial/src/spatial/core/geometry/geometry_serialization.cpp deleted file mode 100644 index a5a619d7..00000000 --- a/spatial/src/spatial/core/geometry/geometry_serialization.cpp +++ /dev/null @@ -1,396 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/util/cursor.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/geometry/geometry_processor.hpp" -#include "spatial/core/util/math.hpp" - -namespace spatial { - -namespace core { - -//---------------------------------------------------------------------- -// Serialization -//---------------------------------------------------------------------- -// We always want the coordinates to be double aligned (8 bytes) -// layout: -// GeometryHeader (4 bytes) -// Padding (4 bytes) (or SRID?) -// Data (variable length) -// -- Point -// Type ( 4 bytes) -// Count (4 bytes) (count == 0 if empty point, otherwise 1) -// X (8 bytes) -// Y (8 bytes) -// -- LineString -// Type (4 bytes) -// Length (4 bytes) -// Points (variable length) -// -- Polygon -// Type (4 bytes) -// NumRings (4 bytes) -// RingsLengths (variable length) -// padding (4 bytes if num_rings is odd) -// RingsData (variable length) -// --- Multi/Point/LineString/Polygon & GeometryCollection -// Type (4 bytes) -// NumGeometries (4 bytes) -// Geometries (variable length) - -template -struct GetRequiredSizeOp { - static uint32_t Case(Geometry::Tags::SinglePartGeometry, const Geometry &geom) { - // 4 bytes for the type - // 4 bytes for the length - // sizeof(vertex) * count - return 4 + 4 + (geom.Count() * sizeof(VERTEX)); - } - - static uint32_t Case(Geometry::Tags::Polygon, const Geometry &polygon) { - // Polygons are special because they may pad between the rings and the ring data - // 4 bytes for the type - // 4 bytes for the number of rings - // 4 bytes for the number of vertices in each ring - // - sizeof(vertex) * count for each ring - // (+ 4 bytes for padding if num_rings is odd) - uint32_t size = 4 + 4; - for (uint32_t i = 0; i < Polygon::PartCount(polygon); i++) { - size += 4; - size += Polygon::Part(polygon, i).Count() * sizeof(VERTEX); - } - if (Polygon::PartCount(polygon) % 2 == 1) { - size += 4; - } - return size; - } - - static uint32_t Case(Geometry::Tags::CollectionGeometry, const Geometry &collection) { - // 4 bytes for the type - // 4 bytes for the number of items - // recursive call for each item - uint32_t size = 4 + 4; - for (uint32_t i = 0; i < CollectionGeometry::PartCount(collection); i++) { - auto &part = CollectionGeometry::Part(collection, i); - size += Geometry::Match>(part); - } - return size; - } -}; - -template -struct SerializeOp { - static constexpr uint32_t MAX_DEPTH = 256; - - static void SerializeVertices(const Geometry &verts, Cursor &cursor, Box &bbox, bool update_bounds) { - // Write the vertex data - auto byte_size = SinglePartGeometry::ByteSize(verts); - memcpy(cursor.GetPtr(), verts.GetData(), byte_size); - // Move the cursor forward - cursor.Skip(byte_size); - // Also update the bounds real quick - if (update_bounds) { - for (uint32_t i = 0; i < verts.Count(); i++) { - auto vertex = SinglePartGeometry::GetVertex(verts, i); - bbox.Stretch(vertex); - } - } - } - - static void Case(Geometry::Tags::Point, const Geometry &point, Cursor &cursor, Box &bbox, uint32_t depth) { - D_ASSERT(point.GetProperties().HasZ() == VERTEX::HAS_Z); - D_ASSERT(point.GetProperties().HasM() == VERTEX::HAS_M); - - // Write type (4 bytes) - cursor.Write(SerializedGeometryType::POINT); - - // Write point count (0 or 1) (4 bytes) - cursor.Write(point.Count()); - - // write data - // We only update the bounds if this is a point part of a larger geometry - SerializeVertices(point, cursor, bbox, depth != 0); - } - - static void Case(Geometry::Tags::LineString, const Geometry &linestring, Cursor &cursor, Box &bbox, - uint32_t) { - D_ASSERT(linestring.GetProperties().HasZ() == VERTEX::HAS_Z); - D_ASSERT(linestring.GetProperties().HasM() == VERTEX::HAS_M); - - // Write type (4 bytes) - cursor.Write(SerializedGeometryType::LINESTRING); - - // Write point count (4 bytes) - cursor.Write(linestring.Count()); - - // write data - SerializeVertices(linestring, cursor, bbox, true); - } - - static void Case(Geometry::Tags::Polygon, const Geometry &polygon, Cursor &cursor, Box &bbox, uint32_t) { - D_ASSERT(polygon.GetProperties().HasZ() == VERTEX::HAS_Z); - D_ASSERT(polygon.GetProperties().HasM() == VERTEX::HAS_M); - - // Write type (4 bytes) - cursor.Write(SerializedGeometryType::POLYGON); - - // Write number of rings (4 bytes) - cursor.Write(polygon.Count()); - - // Write ring lengths - for (uint32_t i = 0; i < Polygon::PartCount(polygon); i++) { - cursor.Write(Polygon::Part(polygon, i).Count()); - } - - if (polygon.Count() % 2 == 1) { - // Write padding (4 bytes) - cursor.Write(0); - } - - // Write ring data - for (uint32_t i = 0; i < polygon.Count(); i++) { - // The first ring is always the shell, and must be the only ring contributing to the bounding box - // or the geometry is invalid. - SerializeVertices(Polygon::Part(polygon, i), cursor, bbox, i == 0); - } - } - - static void Case(Geometry::Tags::MultiPoint, const Geometry &multipoint, Cursor &cursor, Box &bbox, - uint32_t depth) { - D_ASSERT(multipoint.GetProperties().HasZ() == VERTEX::HAS_Z); - D_ASSERT(multipoint.GetProperties().HasM() == VERTEX::HAS_M); - - // Write type (4 bytes) - cursor.Write(SerializedGeometryType::MULTIPOINT); - - // Write number of points (4 bytes) - cursor.Write(multipoint.Count()); - - // Write point data - for (uint32_t i = 0; i < MultiPoint::PartCount(multipoint); i++) { - Case(Geometry::Tags::Point {}, MultiPoint::Part(multipoint, i), cursor, bbox, depth + 1); - } - } - - static void Case(Geometry::Tags::MultiLineString, const Geometry &multilinestring, Cursor &cursor, - Box &bbox, uint32_t depth) { - D_ASSERT(multilinestring.GetProperties().HasZ() == VERTEX::HAS_Z); - D_ASSERT(multilinestring.GetProperties().HasM() == VERTEX::HAS_M); - - // Write type (4 bytes) - cursor.Write(SerializedGeometryType::MULTILINESTRING); - - // Write number of linestrings (4 bytes) - cursor.Write(multilinestring.Count()); - - // Write linestring data - for (uint32_t i = 0; i < MultiLineString::PartCount(multilinestring); i++) { - Case(Geometry::Tags::LineString {}, MultiLineString::Part(multilinestring, i), cursor, bbox, depth + 1); - } - } - - static void Case(Geometry::Tags::MultiPolygon, const Geometry &multipolygon, Cursor &cursor, Box &bbox, - uint32_t depth) { - D_ASSERT(multipolygon.GetProperties().HasZ() == VERTEX::HAS_Z); - D_ASSERT(multipolygon.GetProperties().HasM() == VERTEX::HAS_M); - - // Write type (4 bytes) - cursor.Write(SerializedGeometryType::MULTIPOLYGON); - - // Write number of polygons (4 bytes) - cursor.Write(multipolygon.Count()); - - // Write polygon data - for (uint32_t i = 0; i < MultiPolygon::PartCount(multipolygon); i++) { - Case(Geometry::Tags::Polygon {}, MultiPolygon::Part(multipolygon, i), cursor, bbox, depth + 1); - } - } - - static void Case(Geometry::Tags::GeometryCollection, const Geometry &collection, Cursor &cursor, Box &bbox, - uint32_t depth) { - D_ASSERT(collection.GetProperties().HasZ() == VERTEX::HAS_Z); - D_ASSERT(collection.GetProperties().HasM() == VERTEX::HAS_M); - - // TODO: Maybe make this configurable? - if (depth > MAX_DEPTH) { - throw SerializationException("GeometryCollection depth exceeded 256!"); - } - - // Write type (4 bytes) - cursor.Write(SerializedGeometryType::GEOMETRYCOLLECTION); - - // Write number of geometries (4 bytes) - cursor.Write(collection.Count()); - - // write geometry data - for (uint32_t i = 0; i < GeometryCollection::PartCount(collection); i++) { - auto &geom = GeometryCollection::Part(collection, i); - Geometry::Match>(geom, cursor, bbox, depth + 1); - } - } -}; - -template -void SerializeTemplated(const Geometry &geom, Cursor &cursor, bool has_bbox, uint32_t bbox_size) { - - // All geometries except points have a bounding box - Box bbox; - // skip the bounding box for now - // we will come back and write it later - auto bbox_ptr = cursor.GetPtr(); - cursor.Skip(bbox_size); - - // Serialize the geometry - Geometry::Match>(geom, cursor, bbox, 0); - - // Now write the bounding box - if (has_bbox) { - cursor.SetPtr(bbox_ptr); - // We serialize the bounding box as floats to save space, but ensure that the bounding box is - // still large enough to contain the original double values by rounding up and down - // TODO: If we ever break storage, swap the order of these so that its minx/maxx/miny/maxy - cursor.Write(MathUtil::DoubleToFloatDown(bbox.min.x)); - cursor.Write(MathUtil::DoubleToFloatDown(bbox.min.y)); - cursor.Write(MathUtil::DoubleToFloatUp(bbox.max.x)); - cursor.Write(MathUtil::DoubleToFloatUp(bbox.max.y)); - - // Write the extra M and Z values, if needed - for (idx_t i = 2; i < V::SIZE; i++) { - cursor.Write(MathUtil::DoubleToFloatDown(bbox.min[i])); - cursor.Write(MathUtil::DoubleToFloatUp(bbox.max[i])); - } - } -} - -geometry_t Geometry::Serialize(const Geometry &geom, Vector &result) { - auto type = geom.GetType(); - bool has_bbox = type != GeometryType::POINT && !Geometry::IsEmpty(geom); - - auto properties = geom.GetProperties(); - auto has_z = properties.HasZ(); - auto has_m = properties.HasM(); - properties.SetBBox(has_bbox); - - uint32_t geom_size = 0; - if (has_z && has_m) { - geom_size = Geometry::Match>(geom); - } else if (has_z) { - geom_size = Geometry::Match>(geom); - } else if (has_m) { - geom_size = Geometry::Match>(geom); - } else { - geom_size = Geometry::Match>(geom); - } - - auto header_size = 4; - auto dims = 2 + (has_z ? 1 : 0) + (has_m ? 1 : 0); - auto bbox_size = has_bbox ? (sizeof(float) * 2 * dims) : 0; - auto size = header_size + 4 + bbox_size + geom_size; // + 4 for padding, + 16 for bbox - auto blob = StringVector::EmptyString(result, size); - - Cursor cursor(blob); - - // Write the header - cursor.Write(type); - cursor.Write(properties); - cursor.Write(0); - // Pad with 4 bytes (we might want to use this to store SRID in the future) - cursor.Write(0); - - if (has_z && has_m) { - SerializeTemplated(geom, cursor, has_bbox, bbox_size); - } else if (has_z) { - SerializeTemplated(geom, cursor, has_bbox, bbox_size); - } else if (has_m) { - SerializeTemplated(geom, cursor, has_bbox, bbox_size); - } else { - SerializeTemplated(geom, cursor, has_bbox, bbox_size); - } - - blob.Finalize(); - return geometry_t(blob); -} - -//---------------------------------------------------------------------- -// Deserialization -//---------------------------------------------------------------------- -class GeometryDeserializer final : GeometryProcessor { - ArenaAllocator &allocator; - - Geometry ProcessPoint(const VertexData &vertices) override { - auto point = Point::CreateEmpty(HasZ(), HasM()); - if (!vertices.IsEmpty()) { - Point::ReferenceData(point, vertices.data[0], vertices.count); - } - return point; - } - - Geometry ProcessLineString(const VertexData &vertices) override { - auto line_string = LineString::Create(allocator, vertices.count, HasZ(), HasM()); - if (!vertices.IsEmpty()) { - LineString::ReferenceData(line_string, vertices.data[0], vertices.count); - } - return line_string; - } - - Geometry ProcessPolygon(PolygonState &state) override { - auto polygon = Polygon::Create(allocator, state.RingCount(), HasZ(), HasM()); - for (auto i = 0; i < state.RingCount(); i++) { - auto vertices = state.Next(); - if (!vertices.IsEmpty()) { - auto &part = Polygon::Part(polygon, i); - LineString::ReferenceData(part, vertices.data[0], vertices.count); - } - } - return polygon; - } - - Geometry ProcessCollection(CollectionState &state) override { - switch (CurrentType()) { - case GeometryType::MULTIPOINT: { - auto multi_point = MultiPoint::Create(allocator, state.ItemCount(), HasZ(), HasM()); - for (auto i = 0; i < state.ItemCount(); i++) { - MultiPoint::Part(multi_point, i) = state.Next(); - } - return multi_point; - } - case GeometryType::MULTILINESTRING: { - auto multi_line_string = MultiLineString::Create(allocator, state.ItemCount(), HasZ(), HasM()); - for (auto i = 0; i < state.ItemCount(); i++) { - MultiLineString::Part(multi_line_string, i) = state.Next(); - } - return multi_line_string; - } - case GeometryType::MULTIPOLYGON: { - auto multi_polygon = MultiPolygon::Create(allocator, state.ItemCount(), HasZ(), HasM()); - for (auto i = 0; i < state.ItemCount(); i++) { - MultiPolygon::Part(multi_polygon, i) = state.Next(); - } - return multi_polygon; - } - case GeometryType::GEOMETRYCOLLECTION: { - auto collection = GeometryCollection::Create(allocator, state.ItemCount(), HasZ(), HasM()); - for (auto i = 0; i < state.ItemCount(); i++) { - GeometryCollection::Part(collection, i) = state.Next(); - } - return collection; - } - default: - throw NotImplementedException("GeometryDeserializer: Unimplemented geometry type: %d", CurrentType()); - } - } - -public: - explicit GeometryDeserializer(ArenaAllocator &allocator) : allocator(allocator) { - } - Geometry Execute(const geometry_t &data) { - return Process(data); - } -}; - -Geometry Geometry::Deserialize(ArenaAllocator &arena, const geometry_t &data) { - GeometryDeserializer deserializer(arena); - return deserializer.Execute(data); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/geometry/wkb_reader.cpp b/spatial/src/spatial/core/geometry/wkb_reader.cpp deleted file mode 100644 index 713646fb..00000000 --- a/spatial/src/spatial/core/geometry/wkb_reader.cpp +++ /dev/null @@ -1,201 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/geometry/wkb_reader.hpp" -#include "spatial/core/geometry/geometry.hpp" - -namespace spatial { - -namespace core { - -Geometry WKBReader::Deserialize(const string_t &wkb) { - return Deserialize(const_data_ptr_cast(wkb.GetDataUnsafe()), wkb.GetSize()); -} - -Geometry WKBReader::Deserialize(const_data_ptr_t wkb, uint32_t size) { - Cursor cursor(const_cast(wkb), const_cast(wkb + size)); - - has_any_m = false; - has_any_z = false; - - auto geom = ReadGeometry(cursor); - - // Make sure the geometry has unified vertex type, in case we got some funky nested WKB with mixed dimensions - geom.SetVertexType(arena, has_any_z, has_any_m); - - return geom; -} - -uint32_t WKBReader::ReadInt(Cursor &cursor, bool little_endian) { - if (little_endian) { - return cursor.Read(); - } else { - return cursor.ReadBigEndian(); - } -} - -double WKBReader::ReadDouble(Cursor &cursor, bool little_endian) { - if (little_endian) { - return cursor.Read(); - } else { - return cursor.ReadBigEndian(); - } -} - -WKBReader::WKBType WKBReader::ReadType(Cursor &cursor, bool little_endian) { - auto wkb_type = ReadInt(cursor, little_endian); - // Subtract 1 since the WKB type is 1-indexed - auto geometry_type = static_cast(((wkb_type & 0xffff) % 1000) - 1); - bool has_z = false; - bool has_m = false; - bool has_srid = false; - // Check for ISO WKB Z and M flags - uint32_t iso_wkb_props = (wkb_type & 0xffff) / 1000; - has_z = (iso_wkb_props == 1) || (iso_wkb_props == 3); - has_m = (iso_wkb_props == 2) || (iso_wkb_props == 3); - - // Check for EWKB Z and M flags - has_z = has_z | ((wkb_type & 0x80000000) != 0); - has_m = has_m | ((wkb_type & 0x40000000) != 0); - has_srid = (wkb_type & 0x20000000) != 0; - - if (has_srid) { - // We don't support SRID yet, so just skip it if we encounter it - cursor.Skip(sizeof(uint32_t)); - } - - has_any_z |= has_z; - has_any_m |= has_m; - - return {geometry_type, has_z, has_m}; -} - -Geometry WKBReader::ReadPoint(Cursor &cursor, bool little_endian, bool has_z, bool has_m) { - uint32_t dims = 2 + has_z + has_m; - bool all_nan = true; - double coords[4]; - for (uint32_t i = 0; i < dims; i++) { - coords[i] = ReadDouble(cursor, little_endian); - if (!std::isnan(coords[i])) { - all_nan = false; - } - } - if (all_nan) { - return Point::CreateEmpty(has_z, has_m); - } else { - return Point::CreateFromCopy(arena, data_ptr_cast(coords), 1, has_z, has_m); - } -} - -void WKBReader::ReadVertices(Cursor &cursor, bool little_endian, bool has_z, bool has_m, Geometry &geometry) { - for (uint32_t i = 0; i < geometry.Count(); i++) { - if (has_z && has_m) { - auto x = ReadDouble(cursor, little_endian); - auto y = ReadDouble(cursor, little_endian); - auto z = ReadDouble(cursor, little_endian); - auto m = ReadDouble(cursor, little_endian); - SinglePartGeometry::SetVertex(geometry, i, VertexXYZM {x, y, z, m}); - } else if (has_z) { - auto x = ReadDouble(cursor, little_endian); - auto y = ReadDouble(cursor, little_endian); - auto z = ReadDouble(cursor, little_endian); - SinglePartGeometry::SetVertex(geometry, i, VertexXYZ {x, y, z}); - } else if (has_m) { - auto x = ReadDouble(cursor, little_endian); - auto y = ReadDouble(cursor, little_endian); - auto m = ReadDouble(cursor, little_endian); - SinglePartGeometry::SetVertex(geometry, i, VertexXYM {x, y, m}); - } else { - auto x = ReadDouble(cursor, little_endian); - auto y = ReadDouble(cursor, little_endian); - SinglePartGeometry::SetVertex(geometry, i, VertexXY {x, y}); - } - } -} - -Geometry WKBReader::ReadLineString(Cursor &cursor, bool little_endian, bool has_z, bool has_m) { - auto count = ReadInt(cursor, little_endian); - auto vertices = LineString::Create(arena, count, has_z, has_m); - ReadVertices(cursor, little_endian, has_z, has_m, vertices); - return vertices; -} - -Geometry WKBReader::ReadPolygon(Cursor &cursor, bool little_endian, bool has_z, bool has_m) { - auto ring_count = ReadInt(cursor, little_endian); - auto polygon = Polygon::Create(arena, ring_count, has_z, has_m); - for (uint32_t i = 0; i < ring_count; i++) { - auto point_count = ReadInt(cursor, little_endian); - Polygon::Part(polygon, i) = LineString::Create(arena, point_count, has_z, has_m); - ReadVertices(cursor, little_endian, has_z, has_m, Polygon::Part(polygon, i)); - } - return polygon; -} - -Geometry WKBReader::ReadMultiPoint(Cursor &cursor, bool little_endian, bool has_z, bool has_m) { - uint32_t count = ReadInt(cursor, little_endian); - auto multi_point = MultiPoint::Create(arena, count, has_z, has_m); - for (uint32_t i = 0; i < count; i++) { - bool point_order = cursor.Read(); - auto point_type = ReadType(cursor, point_order); - MultiPoint::Part(multi_point, i) = ReadPoint(cursor, point_order, point_type.has_z, point_type.has_m); - } - return multi_point; -} - -Geometry WKBReader::ReadMultiLineString(Cursor &cursor, bool little_endian, bool has_z, bool has_m) { - uint32_t count = ReadInt(cursor, little_endian); - auto multi_line_string = MultiLineString::Create(arena, count, has_z, has_m); - for (uint32_t i = 0; i < count; i++) { - bool line_order = cursor.Read(); - auto line_type = ReadType(cursor, line_order); - MultiLineString::Part(multi_line_string, i) = - ReadLineString(cursor, line_order, line_type.has_z, line_type.has_m); - } - return multi_line_string; -} - -Geometry WKBReader::ReadMultiPolygon(Cursor &cursor, bool little_endian, bool has_z, bool has_m) { - uint32_t count = ReadInt(cursor, little_endian); - auto multi_polygon = MultiPolygon::Create(arena, count, has_z, has_m); - for (uint32_t i = 0; i < count; i++) { - bool polygon_order = cursor.Read(); - auto polygon_type = ReadType(cursor, polygon_order); - MultiPolygon::Part(multi_polygon, i) = - ReadPolygon(cursor, polygon_order, polygon_type.has_z, polygon_type.has_m); - } - return multi_polygon; -} - -Geometry WKBReader::ReadGeometryCollection(Cursor &cursor, bool little_endian, bool has_z, bool has_m) { - uint32_t count = ReadInt(cursor, little_endian); - auto geometry_collection = GeometryCollection::Create(arena, count, has_z, has_m); - for (uint32_t i = 0; i < count; i++) { - GeometryCollection::Part(geometry_collection, i) = ReadGeometry(cursor); - } - return geometry_collection; -} - -Geometry WKBReader::ReadGeometry(Cursor &cursor) { - bool little_endian = cursor.Read(); - auto type = ReadType(cursor, little_endian); - switch (type.type) { - case GeometryType::POINT: - return ReadPoint(cursor, little_endian, type.has_z, type.has_m); - case GeometryType::LINESTRING: - return ReadLineString(cursor, little_endian, type.has_z, type.has_m); - case GeometryType::POLYGON: - return ReadPolygon(cursor, little_endian, type.has_z, type.has_m); - case GeometryType::MULTIPOINT: - return ReadMultiPoint(cursor, little_endian, type.has_z, type.has_m); - case GeometryType::MULTILINESTRING: - return ReadMultiLineString(cursor, little_endian, type.has_z, type.has_m); - case GeometryType::MULTIPOLYGON: - return ReadMultiPolygon(cursor, little_endian, type.has_z, type.has_m); - case GeometryType::GEOMETRYCOLLECTION: - return ReadGeometryCollection(cursor, little_endian, type.has_z, type.has_m); - default: - throw NotImplementedException("WKB Reader: Geometry type %u not supported", type.type); - } -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/geometry/wkt_reader.cpp b/spatial/src/spatial/core/geometry/wkt_reader.cpp deleted file mode 100644 index d1d25fd2..00000000 --- a/spatial/src/spatial/core/geometry/wkt_reader.cpp +++ /dev/null @@ -1,344 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/geometry/wkt_reader.hpp" -#include "fast_float/fast_float.h" - -namespace spatial { - -namespace core { - -// TODO: Support the full EWKT spec (e.g. SRID and other metadata) -// TODO: Support better error messages using DuckDBs new error context -string WKTReader::GetErrorContext() { - // Return a string of the current position in the input string - const auto len = 32; - const auto msg_start = std::max(cursor - len, start); - const auto msg_end = std::min(cursor + 1, end); - auto msg = string(msg_start, msg_end); - if (msg_start != start) { - msg = "..." + msg; - } - // Add an arrow to indicate the position - msg = "at position " + std::to_string(cursor - start) + " near: '" + msg + "'|<---"; - return msg; -} - -bool WKTReader::TryParseDouble(double &data) { - auto result = duckdb_fast_float::from_chars(cursor, end, data); - if (result.ec == std::errc()) { - cursor = result.ptr; - while (cursor < end && std::isspace(*cursor)) { - cursor++; - } - return true; - } else { - return false; - } -} - -double WKTReader::ParseDouble() { - double result; - if (!TryParseDouble(result)) { - auto msg = "WKT Parser: Expected double " + GetErrorContext(); - throw InvalidInputException(msg); - } - return result; -}; - -string WKTReader::ParseWord() { - auto pos = cursor; - while (cursor < end && !std::isspace(*cursor) && std::isalnum(*cursor)) { - cursor++; - } - return string(pos, cursor); -} - -bool WKTReader::Match(char c) { - if (*cursor == c) { - cursor++; - while (cursor < end && std::isspace(*cursor)) { - cursor++; - } - return true; - } else { - return false; - } -} - -bool WKTReader::MatchCI(const char *str) { - auto pos = cursor; - while (*str) { - if (std::tolower(*str) != std::tolower(*cursor)) { - cursor = pos; - return false; - } - str++; - cursor++; - } - while (cursor < end && std::isspace(*cursor)) { - cursor++; - } - return true; -} - -void WKTReader::Expect(char c) { - if (!Match(c)) { - auto msg = "WKT Parser: Expected character '" + string(1, c) + "' " + GetErrorContext(); - throw InvalidInputException(msg); - } -} - -void WKTReader::ParseVertex(vector &coords) { - double x, y, z, m; - x = ParseDouble(); - y = ParseDouble(); - if (has_z) { - z = ParseDouble(); - } - if (has_m) { - m = ParseDouble(); - } - coords.push_back(x); - coords.push_back(y); - if (has_z) { - coords.push_back(z); - } - if (has_m) { - coords.push_back(m); - } -} - -pair> WKTReader::ParseVertices() { - vector coords; - if (MatchCI("EMPTY")) { - return {0, coords}; - } - Expect('('); - uint32_t count = 0; - ParseVertex(coords); - count++; - while (Match(',')) { - ParseVertex(coords); - count++; - } - Expect(')'); - return {count, coords}; -} - -Geometry WKTReader::ParsePoint() { - if (MatchCI("EMPTY")) { - return Point::CreateEmpty(has_z, has_m); - } - Expect('('); - vector coords; - ParseVertex(coords); - Expect(')'); - return Point::CreateFromCopy(arena, data_ptr_cast(coords.data()), 1, has_z, has_m); -} - -Geometry WKTReader::ParseLineString() { - auto verts = ParseVertices(); - return LineString::CreateFromCopy(arena, data_ptr_cast(verts.second.data()), verts.first, has_z, has_m); -} - -Geometry WKTReader::ParsePolygon() { - if (MatchCI("EMPTY")) { - return Polygon::CreateEmpty(has_z, has_m); - } - Expect('('); - vector>> rings; - rings.push_back(ParseVertices()); - while (Match(',')) { - rings.push_back(ParseVertices()); - } - Expect(')'); - auto result = Polygon::Create(arena, rings.size(), has_z, has_m); - for (uint32_t i = 0; i < rings.size(); i++) { - auto &ring = Polygon::Part(result, i); - LineString::CopyData(ring, arena, data_ptr_cast(rings[i].second.data()), rings[i].first); - } - return result; -} - -Geometry WKTReader::ParseMultiPoint() { - if (MatchCI("EMPTY")) { - return MultiPoint::CreateEmpty(has_z, has_m); - } - // Multipoints are special in that parens around each point is optional. - Expect('('); - vector coords; - vector points; - bool optional_paren = false; - - if (Match('(')) { - optional_paren = true; - } - ParseVertex(coords); - if (optional_paren) { - Expect(')'); - optional_paren = false; - } - points.push_back(Point::CreateFromCopy(arena, data_ptr_cast(coords.data()), 1, has_z, has_m)); - coords.clear(); - while (Match(',')) { - if (Match('(')) { - optional_paren = true; - } - ParseVertex(coords); - if (optional_paren) { - Expect(')'); - optional_paren = false; - } - points.push_back(Point::CreateFromCopy(arena, data_ptr_cast(coords.data()), 1, has_z, has_m)); - coords.clear(); - } - Expect(')'); - auto result = MultiPoint::Create(arena, points.size(), has_z, has_m); - for (uint32_t i = 0; i < points.size(); i++) { - MultiPoint::Part(result, i) = points[i]; - } - return result; -} - -Geometry WKTReader::ParseMultiLineString() { - if (MatchCI("EMPTY")) { - return MultiLineString::CreateEmpty(has_z, has_m); - } - Expect('('); - vector lines; - lines.push_back(ParseLineString()); - while (Match(',')) { - lines.push_back(ParseLineString()); - } - Expect(')'); - auto result = MultiLineString::Create(arena, lines.size(), has_z, has_m); - for (uint32_t i = 0; i < lines.size(); i++) { - MultiLineString::Part(result, i) = lines[i]; - } - return result; -} - -Geometry WKTReader::ParseMultiPolygon() { - if (MatchCI("EMPTY")) { - return MultiPolygon::CreateEmpty(has_z, has_m); - } - Expect('('); - vector polygons; - polygons.push_back(ParsePolygon()); - while (Match(',')) { - polygons.push_back(ParsePolygon()); - } - Expect(')'); - auto result = MultiPolygon::Create(arena, polygons.size(), has_z, has_m); - for (uint32_t i = 0; i < polygons.size(); i++) { - MultiPolygon::Part(result, i) = polygons[i]; - } - return result; -} - -Geometry WKTReader::ParseGeometryCollection() { - if (MatchCI("EMPTY")) { - return GeometryCollection::CreateEmpty(has_z, has_m); - } - Expect('('); - vector geometries; - geometries.push_back(ParseGeometry()); - while (Match(',')) { - geometries.push_back(ParseGeometry()); - } - Expect(')'); - auto result = GeometryCollection::Create(arena, geometries.size(), has_z, has_m); - for (uint32_t i = 0; i < geometries.size(); i++) { - GeometryCollection::Part(result, i) = geometries[i]; - } - return result; -} - -void WKTReader::CheckZM() { - bool geom_has_z = false; - bool geom_has_m = false; - if (Match('Z')) { - geom_has_z = true; - if (Match('M')) { - geom_has_m = true; - } - } else if (Match('M')) { - geom_has_m = true; - } - - if (zm_set) { - if (has_z != geom_has_z || has_m != geom_has_m) { - auto msg = "WKT Parser: GeometryCollection with mixed Z and M types are not supported, mismatch " + - GetErrorContext(); - throw InvalidInputException(msg); - } - } else { - has_z = geom_has_z; - has_m = geom_has_m; - zm_set = true; - } -} - -Geometry WKTReader::ParseGeometry() { - if (MatchCI("POINT")) { - CheckZM(); - return ParsePoint(); - } - if (MatchCI("LINESTRING")) { - CheckZM(); - return ParseLineString(); - } - if (MatchCI("POLYGON")) { - CheckZM(); - return ParsePolygon(); - } - if (MatchCI("MULTIPOINT")) { - CheckZM(); - return ParseMultiPoint(); - } - if (MatchCI("MULTILINESTRING")) { - CheckZM(); - return ParseMultiLineString(); - } - if (MatchCI("MULTIPOLYGON")) { - CheckZM(); - return ParseMultiPolygon(); - } - if (MatchCI("GEOMETRYCOLLECTION")) { - CheckZM(); - return ParseGeometryCollection(); - } - auto context = GetErrorContext(); - auto msg = "WKT Parser: Unknown geometry type '" + ParseWord() + "' " + context; - throw InvalidInputException(msg); -} - -Geometry WKTReader::ParseWKT() { - // TODO: Handle EWKT properly. This is just a temporary fix to ignore SRID - if (MatchCI("SRID")) { - // Discard everything until the next semicolon - while (cursor < end && *cursor != ';') { - cursor++; - } - Expect(';'); - while (cursor < end && std::isspace(*cursor)) { - cursor++; - } - } - return ParseGeometry(); -} - -Geometry WKTReader::Parse(const string_t &wkt) { - start = wkt.GetDataUnsafe(); - cursor = wkt.GetDataUnsafe(); - end = wkt.GetDataUnsafe() + wkt.GetSize(); - zm_set = false; - has_z = false; - has_m = false; - auto geom = ParseWKT(); - return geom; -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/io/shapefile/CMakeLists.txt b/spatial/src/spatial/core/io/shapefile/CMakeLists.txt deleted file mode 100644 index f87fd287..00000000 --- a/spatial/src/spatial/core/io/shapefile/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -set(EXTENSION_SOURCES - ${EXTENSION_SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/read_shapefile.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/read_shapefile_meta.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/shapefile_common.cpp - PARENT_SCOPE -) \ No newline at end of file diff --git a/spatial/src/spatial/core/io/shapefile/read_shapefile.cpp b/spatial/src/spatial/core/io/shapefile/read_shapefile.cpp deleted file mode 100644 index d8c97082..00000000 --- a/spatial/src/spatial/core/io/shapefile/read_shapefile.cpp +++ /dev/null @@ -1,578 +0,0 @@ -#include "duckdb/parser/parsed_data/create_table_function_info.hpp" -#include "duckdb/storage/buffer_manager.hpp" -#include "duckdb/function/replacement_scan.hpp" -#include "duckdb/parser/expression/constant_expression.hpp" -#include "duckdb/parser/expression/function_expression.hpp" -#include "duckdb/parser/tableref/table_function_ref.hpp" -#include "duckdb/common/multi_file_reader.hpp" - -#include "duckdb/function/copy_function.hpp" -#include "duckdb/parser/parsed_data/copy_info.hpp" -#include "duckdb/parser/parsed_data/create_copy_function_info.hpp" - -#include "spatial/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/io/shapefile.hpp" -#include "spatial/core/functions/table.hpp" -#include "spatial/core/types.hpp" - -#include "shapefil.h" -#include "utf8proc_wrapper.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// Bind -//------------------------------------------------------------------------------ - -struct ShapefileBindData : TableFunctionData { - string file_name; - int shape_count; - int shape_type; - double min_bound[4]; - double max_bound[4]; - AttributeEncoding attribute_encoding; - vector attribute_types; - - explicit ShapefileBindData(string file_name_p) - : file_name(std::move(file_name_p)), shape_count(0), - shape_type(0), min_bound {0, 0, 0, 0}, max_bound {0, 0, 0, 0}, attribute_encoding(AttributeEncoding::LATIN1) { - } -}; - -static unique_ptr Bind(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names) { - - auto file_name = StringValue::Get(input.inputs[0]); - auto result = make_uniq(file_name); - - auto &fs = FileSystem::GetFileSystem(context); - auto shp_handle = OpenSHPFile(fs, file_name); - - // Get info about the geometry - SHPGetInfo(shp_handle.get(), &result->shape_count, &result->shape_type, result->min_bound, result->max_bound); - - // Ensure we have a supported shape type - auto valid_types = {SHPT_NULL, SHPT_POINT, SHPT_ARC, SHPT_POLYGON, SHPT_MULTIPOINT}; - bool is_valid_type = false; - for (auto type : valid_types) { - if (result->shape_type == type) { - is_valid_type = true; - break; - } - } - if (!is_valid_type) { - throw InvalidInputException("Invalid shape type %d", result->shape_type); - } - - auto base_name = file_name.substr(0, file_name.find_last_of('.')); - - // A standards compliant shapefile should use ISO-8859-1 encoding for attributes, but it can be overridden - // by a .cpg file. So check if there is a .cpg file, if so use that to determine the encoding - auto cpg_file = base_name + ".cpg"; - if (fs.FileExists(cpg_file)) { - auto cpg_handle = fs.OpenFile(cpg_file, FileFlags::FILE_FLAGS_READ); - auto cpg_type = StringUtil::Lower(cpg_handle->ReadLine()); - if (cpg_type == "utf-8") { - result->attribute_encoding = AttributeEncoding::UTF8; - } else if (cpg_type == "iso-8859-1") { - result->attribute_encoding = AttributeEncoding::LATIN1; - } else { - // Otherwise, parse as blob - result->attribute_encoding = AttributeEncoding::BLOB; - } - } - - for (auto &kv : input.named_parameters) { - if (kv.first == "encoding") { - auto encoding = StringUtil::Lower(StringValue::Get(kv.second)); - if (encoding == "utf-8") { - result->attribute_encoding = AttributeEncoding::UTF8; - } else if (encoding == "iso-8859-1") { - result->attribute_encoding = AttributeEncoding::LATIN1; - } else if (encoding == "blob") { - // Otherwise, parse as blob - result->attribute_encoding = AttributeEncoding::BLOB; - } else { - vector candidates = {"utf-8", "iso-8859-1", "blob"}; - auto msg = StringUtil::CandidatesErrorMessage(candidates, encoding, "encoding"); - throw InvalidInputException("Invalid encoding %s", encoding.c_str()); - } - } - if (kv.first == "spatial_filter_box") { - auto filter_box = StructValue::GetChildren(kv.second); - } - } - - // Get info about the attributes - // Remove file extension and replace with .dbf - auto dbf_handle = OpenDBFFile(fs, base_name + ".dbf"); - - // TODO: Try to get the encoding from the dbf if there is no .cpg file - // auto code_page = DBFGetCodePage(dbf_handle.get()); - // if(!has_cpg_file && code_page != 0) { } - - // Then return the attributes - auto field_count = DBFGetFieldCount(dbf_handle.get()); - char field_name[12]; // Max field name length is 11 + null terminator - int field_width = 0; - int field_precision = 0; - memset(field_name, 0, sizeof(field_name)); - - for (int i = 0; i < field_count; i++) { - auto field_type = DBFGetFieldInfo(dbf_handle.get(), i, field_name, &field_width, &field_precision); - - LogicalType type; - switch (field_type) { - case FTString: - type = result->attribute_encoding == AttributeEncoding::BLOB ? LogicalType::BLOB : LogicalType::VARCHAR; - break; - case FTInteger: - type = LogicalType::INTEGER; - break; - case FTDouble: - if (field_precision == 0 && field_width < 19) { - type = LogicalType::BIGINT; - } else { - type = LogicalType::DOUBLE; - } - break; - case FTDate: - // Dates are stored as 8-char strings - // YYYYMMDD - type = LogicalType::DATE; - break; - case FTLogical: - type = LogicalType::BOOLEAN; - break; - default: - throw InvalidInputException("DBF field type %d not supported", field_type); - } - names.emplace_back(field_name); - return_types.push_back(type); - result->attribute_types.push_back(type); - } - - // Always return geometry last - return_types.push_back(GeoTypes::GEOMETRY()); - names.push_back("geom"); - - // Deduplicate field names if necessary - for (size_t i = 0; i < names.size(); i++) { - idx_t count = 1; - for (size_t j = i + 1; j < names.size(); j++) { - if (names[i] == names[j]) { - names[j] += "_" + std::to_string(count++); - } - } - } - - return std::move(result); -} - -//------------------------------------------------------------------------------ -// Init Global -//------------------------------------------------------------------------------ - -struct ShapefileGlobalState : public GlobalTableFunctionState { - int shape_idx; - SHPHandlePtr shp_handle; - DBFHandlePtr dbf_handle; - ArenaAllocator arena; - vector column_ids; - - explicit ShapefileGlobalState(ClientContext &context, const string &file_name, vector column_ids_p) - : shape_idx(0), arena(BufferAllocator::Get(context)), column_ids(std::move(column_ids_p)) { - auto &fs = FileSystem::GetFileSystem(context); - - shp_handle = OpenSHPFile(fs, file_name); - - // Remove file extension and replace with .dbf - auto dot_idx = file_name.find_last_of('.'); - auto base_name = file_name.substr(0, dot_idx); - dbf_handle = OpenDBFFile(fs, base_name + ".dbf"); - } -}; - -static unique_ptr InitGlobal(ClientContext &context, TableFunctionInitInput &input) { - auto &bind_data = input.bind_data->Cast(); - auto result = make_uniq(context, bind_data.file_name, input.column_ids); - return std::move(result); -} - -//------------------------------------------------------------------------------ -// Geometry Conversion -//------------------------------------------------------------------------------ - -struct ConvertPoint { - static Geometry Convert(SHPObjectPtr &shape, ArenaAllocator &arena) { - return Point::CreateFromVertex(arena, VertexXY {shape->padfX[0], shape->padfY[0]}); - } -}; - -struct ConvertLineString { - static Geometry Convert(SHPObjectPtr &shape, ArenaAllocator &arena) { - if (shape->nParts == 1) { - // Single LineString - auto line = LineString::Create(arena, shape->nVertices, false, false); - for (int i = 0; i < shape->nVertices; i++) { - LineString::SetVertex(line, i, {shape->padfX[i], shape->padfY[i]}); - } - return line; - } else { - // MultiLineString - auto multi_line_string = MultiLineString::Create(arena, shape->nParts, false, false); - auto start = shape->panPartStart[0]; - for (int i = 0; i < shape->nParts; i++) { - auto end = i == shape->nParts - 1 ? shape->nVertices : shape->panPartStart[i + 1]; - auto line_size = end - start; - auto &line = MultiLineString::Part(multi_line_string, i); - LineString::Resize(line, arena, line_size); - for (int j = 0; j < line_size; j++) { - auto offset = start + j; - LineString::SetVertex(line, j, {shape->padfX[offset], shape->padfY[offset]}); - } - start = end; - } - return multi_line_string; - } - } -}; - -struct ConvertPolygon { - static Geometry Convert(SHPObjectPtr &shape, ArenaAllocator &arena) { - // First off, check if there are more than one polygon. - // Each polygon is identified by a part with clockwise winding order - // we calculate the winding order by checking the sign of the area - vector polygon_part_starts; - for (int i = 0; i < shape->nParts; i++) { - auto start = shape->panPartStart[i]; - auto end = i == shape->nParts - 1 ? shape->nVertices : shape->panPartStart[i + 1]; - double area = 0; - for (int j = start; j < end - 1; j++) { - area += (shape->padfX[j] * shape->padfY[j + 1]) - (shape->padfX[j + 1] * shape->padfY[j]); - } - if (area < 0) { - polygon_part_starts.push_back(i); - } - } - if (polygon_part_starts.size() < 2) { - // Single polygon, every part is an interior ring - // Even if the polygon is counter-clockwise (which should not happen for shapefiles). - // we still fall back and convert it to a single polygon. - auto polygon = Polygon::Create(arena, shape->nParts, false, false); - auto start = shape->panPartStart[0]; - for (int i = 0; i < shape->nParts; i++) { - auto end = i == shape->nParts - 1 ? shape->nVertices : shape->panPartStart[i + 1]; - auto &ring = Polygon::Part(polygon, i); - auto ring_size = end - start; - LineString::Resize(ring, arena, ring_size); - for (int j = 0; j < ring_size; j++) { - auto offset = start + j; - LineString::SetVertex(ring, j, {shape->padfX[offset], shape->padfY[offset]}); - } - start = end; - } - return polygon; - } else { - // MultiPolygon - auto multi_polygon = MultiPolygon::Create(arena, polygon_part_starts.size(), false, false); - for (size_t polygon_idx = 0; polygon_idx < polygon_part_starts.size(); polygon_idx++) { - auto part_start = polygon_part_starts[polygon_idx]; - auto part_end = polygon_idx == polygon_part_starts.size() - 1 ? shape->nParts - : polygon_part_starts[polygon_idx + 1]; - - auto polygon = Polygon::Create(arena, part_end - part_start, false, false); - - for (auto ring_idx = part_start; ring_idx < part_end; ring_idx++) { - auto start = shape->panPartStart[ring_idx]; - auto end = ring_idx == shape->nParts - 1 ? shape->nVertices : shape->panPartStart[ring_idx + 1]; - auto &ring = Polygon::Part(polygon, ring_idx - part_start); - auto ring_size = end - start; - LineString::Resize(ring, arena, ring_size); - for (int j = 0; j < ring_size; j++) { - auto offset = start + j; - LineString::SetVertex(ring, j, {shape->padfX[offset], shape->padfY[offset]}); - } - } - MultiPolygon::Part(multi_polygon, polygon_idx) = std::move(polygon); - } - return multi_polygon; - } - } -}; - -struct ConvertMultiPoint { - static Geometry Convert(SHPObjectPtr &shape, ArenaAllocator &arena) { - auto multi_point = MultiPoint::Create(arena, shape->nVertices, false, false); - for (int i = 0; i < shape->nVertices; i++) { - auto point = Point::CreateFromVertex(arena, VertexXY {shape->padfX[i], shape->padfY[i]}); - MultiPoint::Part(multi_point, i) = std::move(point); - } - return multi_point; - } -}; - -template -static void ConvertGeomLoop(Vector &result, int record_start, idx_t count, SHPHandle &shp_handle, - ArenaAllocator &arena) { - for (idx_t result_idx = 0; result_idx < count; result_idx++) { - auto shape = SHPObjectPtr(SHPReadObject(shp_handle, record_start++)); - if (shape->nSHPType == SHPT_NULL) { - FlatVector::SetNull(result, result_idx, true); - } else { - // TODO: Handle Z and M - FlatVector::GetData(result)[result_idx] = Geometry::Serialize(OP::Convert(shape, arena), result); - } - } -} - -static void ConvertGeometryVector(Vector &result, int record_start, idx_t count, SHPHandle shp_handle, - ArenaAllocator &arena, int geom_type) { - switch (geom_type) { - case SHPT_NULL: - FlatVector::Validity(result).SetAllInvalid(count); - break; - case SHPT_POINT: - ConvertGeomLoop(result, record_start, count, shp_handle, arena); - break; - case SHPT_ARC: - ConvertGeomLoop(result, record_start, count, shp_handle, arena); - break; - case SHPT_POLYGON: - ConvertGeomLoop(result, record_start, count, shp_handle, arena); - break; - case SHPT_MULTIPOINT: - ConvertGeomLoop(result, record_start, count, shp_handle, arena); - break; - default: - throw InvalidInputException("Shape type %d not supported", geom_type); - } -} - -//------------------------------------------------------------------------------ -// Attribute Conversion -//------------------------------------------------------------------------------ - -struct ConvertBlobAttribute { - using TYPE = string_t; - static string_t Convert(Vector &result, DBFHandle dbf_handle, int record_idx, int field_idx) { - auto value = DBFReadStringAttribute(dbf_handle, record_idx, field_idx); - return StringVector::AddString(result, const_char_ptr_cast(value)); - } -}; - -struct ConvertIntegerAttribute { - using TYPE = int32_t; - static int32_t Convert(Vector &, DBFHandle dbf_handle, int record_idx, int field_idx) { - return DBFReadIntegerAttribute(dbf_handle, record_idx, field_idx); - } -}; - -struct ConvertBigIntAttribute { - using TYPE = int64_t; - static int64_t Convert(Vector &, DBFHandle dbf_handle, int record_idx, int field_idx) { - return static_cast(DBFReadDoubleAttribute(dbf_handle, record_idx, field_idx)); - } -}; - -struct ConvertDoubleAttribute { - using TYPE = double; - static double Convert(Vector &, DBFHandle dbf_handle, int record_idx, int field_idx) { - return DBFReadDoubleAttribute(dbf_handle, record_idx, field_idx); - } -}; - -struct ConvertDateAttribute { - using TYPE = date_t; - static date_t Convert(Vector &, DBFHandle dbf_handle, int record_idx, int field_idx) { - // XBase stores dates as 8-char strings (without separators) - // but DuckDB expects a date string with separators. - auto value = DBFReadStringAttribute(dbf_handle, record_idx, field_idx); - char date_with_separator[11]; - memcpy(date_with_separator, value, 4); - date_with_separator[4] = '-'; - memcpy(date_with_separator + 5, value + 4, 2); - date_with_separator[7] = '-'; - memcpy(date_with_separator + 8, value + 6, 2); - date_with_separator[10] = '\0'; - return Date::FromString(date_with_separator); - } -}; - -struct ConvertBooleanAttribute { - using TYPE = bool; - static bool Convert(Vector &result, DBFHandle dbf_handle, int record_idx, int field_idx) { - return *DBFReadLogicalAttribute(dbf_handle, record_idx, field_idx) == 'T'; - } -}; - -template -static void ConvertAttributeLoop(Vector &result, int record_start, idx_t count, DBFHandle dbf_handle, int field_idx) { - int record_idx = record_start; - for (idx_t row_idx = 0; row_idx < count; row_idx++) { - if (DBFIsAttributeNULL(dbf_handle, record_idx, field_idx)) { - FlatVector::SetNull(result, row_idx, true); - } else { - FlatVector::GetData(result)[row_idx] = - OP::Convert(result, dbf_handle, record_idx, field_idx); - } - record_idx++; - } -} - -static void ConvertStringAttributeLoop(Vector &result, int record_start, idx_t count, DBFHandle dbf_handle, - int field_idx, AttributeEncoding attribute_encoding) { - int record_idx = record_start; - vector conversion_buffer; - for (idx_t row_idx = 0; row_idx < count; row_idx++) { - if (DBFIsAttributeNULL(dbf_handle, record_idx, field_idx)) { - FlatVector::SetNull(result, row_idx, true); - } else { - auto string_bytes = DBFReadStringAttribute(dbf_handle, record_idx, field_idx); - string_t result_str; - if (attribute_encoding == AttributeEncoding::LATIN1) { - conversion_buffer.resize(strlen(string_bytes) * 2 + 1); // worst case (all non-ascii chars) - auto out_len = - EncodingUtil::LatinToUTF8Buffer(const_data_ptr_cast(string_bytes), conversion_buffer.data()); - result_str = StringVector::AddString(result, const_char_ptr_cast(conversion_buffer.data()), out_len); - } else { - result_str = StringVector::AddString(result, const_char_ptr_cast(string_bytes)); - } - if (!Utf8Proc::IsValid(result_str.GetDataUnsafe(), result_str.GetSize())) { - throw InvalidInputException("Could not decode VARCHAR field as valid UTF-8, try passing " - "encoding='blob' to skip decoding of string attributes"); - } - FlatVector::GetData(result)[row_idx] = result_str; - } - record_idx++; - } -} - -static void ConvertAttributeVector(Vector &result, int record_start, idx_t count, DBFHandle dbf_handle, int field_idx, - AttributeEncoding attribute_encoding) { - switch (result.GetType().id()) { - case LogicalTypeId::BLOB: - ConvertAttributeLoop(result, record_start, count, dbf_handle, field_idx); - break; - case LogicalTypeId::VARCHAR: - ConvertStringAttributeLoop(result, record_start, count, dbf_handle, field_idx, attribute_encoding); - break; - case LogicalTypeId::INTEGER: - ConvertAttributeLoop(result, record_start, count, dbf_handle, field_idx); - break; - case LogicalTypeId::BIGINT: - ConvertAttributeLoop(result, record_start, count, dbf_handle, field_idx); - break; - case LogicalTypeId::DOUBLE: - ConvertAttributeLoop(result, record_start, count, dbf_handle, field_idx); - break; - case LogicalTypeId::DATE: - ConvertAttributeLoop(result, record_start, count, dbf_handle, field_idx); - break; - case LogicalTypeId::BOOLEAN: - ConvertAttributeLoop(result, record_start, count, dbf_handle, field_idx); - break; - default: - throw InvalidInputException("Attribute type %s not supported", result.GetType().ToString()); - } -} - -//------------------------------------------------------------------------------ -// Execute -//------------------------------------------------------------------------------ - -static void Execute(ClientContext &context, TableFunctionInput &input, DataChunk &output) { - auto &bind_data = input.bind_data->Cast(); - auto &gstate = input.global_state->Cast(); - - // Reset the buffer allocator - gstate.arena.Reset(); - - // Calculate how many record we can fit in the output - auto output_size = std::min(STANDARD_VECTOR_SIZE, bind_data.shape_count - gstate.shape_idx); - int record_start = gstate.shape_idx; - for (auto col_idx = 0; col_idx < output.ColumnCount(); col_idx++) { - - // Projected column indices - auto projected_col_idx = gstate.column_ids[col_idx]; - - auto &col_vec = output.data[col_idx]; - if (col_vec.GetType() == GeoTypes::GEOMETRY()) { - ConvertGeometryVector(col_vec, record_start, output_size, gstate.shp_handle.get(), gstate.arena, - bind_data.shape_type); - } else { - // The geometry is always last, so we can use the projected column index directly - auto field_idx = projected_col_idx; - ConvertAttributeVector(col_vec, record_start, output_size, gstate.dbf_handle.get(), (int)field_idx, - bind_data.attribute_encoding); - } - } - // Update the shape index - gstate.shape_idx += output_size; - - // Set the cardinality of the output - output.SetCardinality(output_size); -} - -//------------------------------------------------------------------------------ -// Progress, Cardinality and Replacement Scans -//------------------------------------------------------------------------------ - -static double GetProgress(ClientContext &context, const FunctionData *bind_data_p, - const GlobalTableFunctionState *global_state) { - - auto &gstate = global_state->Cast(); - auto &bind_data = bind_data_p->Cast(); - - return (double)gstate.shape_idx / (double)bind_data.shape_count; -} - -static unique_ptr GetCardinality(ClientContext &context, const FunctionData *data) { - auto &bind_data = data->Cast(); - auto result = make_uniq(); - - // This is the maximum number of shapes in a single file - result->has_max_cardinality = true; - result->max_cardinality = bind_data.shape_count; - - return result; -} - -static unique_ptr GetReplacementScan(ClientContext &context, ReplacementScanInput &input, - optional_ptr data) { - auto &table_name = input.table_name; - // Check if the table name ends with .shp - if (!StringUtil::EndsWith(StringUtil::Lower(table_name), ".shp")) { - return nullptr; - } - - auto table_function = make_uniq(); - vector> children; - children.push_back(make_uniq(Value(table_name))); - table_function->function = make_uniq("ST_ReadSHP", std::move(children)); - return std::move(table_function); -} - -//------------------------------------------------------------------------------ -// Register table function -//------------------------------------------------------------------------------ -void CoreTableFunctions::RegisterShapefileTableFunction(DatabaseInstance &db) { - TableFunction read_func("ST_ReadSHP", {LogicalType::VARCHAR}, Execute, Bind, InitGlobal); - - read_func.named_parameters["encoding"] = LogicalType::VARCHAR; - read_func.table_scan_progress = GetProgress; - read_func.cardinality = GetCardinality; - read_func.projection_pushdown = true; - ExtensionUtil::RegisterFunction(db, read_func); - - // Replacement scan - auto &config = DBConfig::GetConfig(db); - config.replacement_scans.emplace_back(GetReplacementScan); -} - -} // namespace core - -} // namespace spatial diff --git a/spatial/src/spatial/core/io/shapefile/read_shapefile_meta.cpp b/spatial/src/spatial/core/io/shapefile/read_shapefile_meta.cpp deleted file mode 100644 index d42342a9..00000000 --- a/spatial/src/spatial/core/io/shapefile/read_shapefile_meta.cpp +++ /dev/null @@ -1,177 +0,0 @@ -#include "duckdb/parser/parsed_data/create_table_function_info.hpp" -#include "duckdb/storage/buffer_manager.hpp" -#include "duckdb/function/replacement_scan.hpp" -#include "duckdb/parser/expression/constant_expression.hpp" -#include "duckdb/parser/expression/function_expression.hpp" -#include "duckdb/parser/tableref/table_function_ref.hpp" -#include "duckdb/common/multi_file_reader.hpp" - -#include "spatial/common.hpp" -#include "spatial/core/io/shapefile.hpp" -#include "spatial/core/functions/table.hpp" -#include "spatial/core/types.hpp" - -#include "shapefil.h" -#include "utf8proc_wrapper.hpp" - -namespace spatial { - -namespace core { - -struct ShapeFileMetaBindData : public TableFunctionData { - vector files; -}; - -struct ShapeTypeEntry { - int shp_type; - const char *shp_name; -}; - -static ShapeTypeEntry shape_type_map[] = { - {SHPT_NULL, "NULL"}, - {SHPT_POINT, "POINT"}, - {SHPT_ARC, "LINESTRING"}, - {SHPT_POLYGON, "POLYGON"}, - {SHPT_MULTIPOINT, "MULTIPOINT"}, - {SHPT_POINTZ, "POINTZ"}, - {SHPT_ARCZ, "LINESTRINGZ"}, - {SHPT_POLYGONZ, "POLYGONZ"}, - {SHPT_MULTIPOINTZ, "MULTIPOINTZ"}, - {SHPT_POINTM, "POINTM"}, - {SHPT_ARCM, "LINESTRINGM"}, - {SHPT_POLYGONM, "POLYGONM"}, - {SHPT_MULTIPOINTM, "MULTIPOINTM"}, - {SHPT_MULTIPATCH, "MULTIPATCH"}, -}; - -static unique_ptr ShapeFileMetaBind(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names) { - auto result = make_uniq(); - - auto multi_file_reader = MultiFileReader::Create(input.table_function); - auto file_list = multi_file_reader->CreateFileList(context, input.inputs[0], FileGlobOptions::ALLOW_EMPTY); - - for (auto &file : file_list->Files()) { - if (StringUtil::EndsWith(StringUtil::Lower(file), ".shp")) { - result->files.push_back(file); - } - } - - auto shape_type_count = sizeof(shape_type_map) / sizeof(ShapeTypeEntry); - auto varchar_vector = Vector(LogicalType::VARCHAR, shape_type_count); - auto varchar_data = FlatVector::GetData(varchar_vector); - for (idx_t i = 0; i < shape_type_count; i++) { - auto str = string_t(shape_type_map[i].shp_name); - varchar_data[i] = str.IsInlined() ? str : StringVector::AddString(varchar_vector, str); - } - auto shape_type_enum = LogicalType::ENUM("SHAPE_TYPE", varchar_vector, shape_type_count); - shape_type_enum.SetAlias("SHAPE_TYPE"); - - return_types.push_back(LogicalType::VARCHAR); - return_types.push_back(shape_type_enum); - return_types.push_back(GeoTypes::BOX_2D()); - return_types.push_back(LogicalType::INTEGER); - names.push_back("name"); - names.push_back("shape_type"); - names.push_back("bounds"); - names.push_back("count"); - return std::move(result); -} - -struct ShapeFileMetaGlobalState : public GlobalTableFunctionState { - ShapeFileMetaGlobalState() : current_file_idx(0) { - } - idx_t current_file_idx; - vector files; -}; - -static unique_ptr ShapeFileMetaInitGlobal(ClientContext &context, - TableFunctionInitInput &input) { - auto &bind_data = input.bind_data->Cast(); - auto result = make_uniq(); - - result->files = bind_data.files; - result->current_file_idx = 0; - - return std::move(result); -} - -static void ShapeFileMetaExecute(ClientContext &context, TableFunctionInput &input, DataChunk &output) { - auto &bind_data = input.bind_data->Cast(); - auto &state = input.global_state->Cast(); - auto &fs = FileSystem::GetFileSystem(context); - - auto &file_name_vector = output.data[0]; - auto file_name_data = FlatVector::GetData(file_name_vector); - auto &shape_type_vector = output.data[1]; - auto shape_type_data = FlatVector::GetData(shape_type_vector); - auto &bounds_vector = output.data[2]; - auto &bounds_vector_children = StructVector::GetEntries(bounds_vector); - auto minx_data = FlatVector::GetData(*bounds_vector_children[0]); - auto miny_data = FlatVector::GetData(*bounds_vector_children[1]); - auto maxx_data = FlatVector::GetData(*bounds_vector_children[2]); - auto maxy_data = FlatVector::GetData(*bounds_vector_children[3]); - auto record_count_vector = output.data[3]; - auto record_count_data = FlatVector::GetData(record_count_vector); - - auto output_count = MinValue(STANDARD_VECTOR_SIZE, bind_data.files.size() - state.current_file_idx); - - for (idx_t out_idx = 0; out_idx < output_count; out_idx++) { - auto &file_name = bind_data.files[state.current_file_idx + out_idx]; - - auto file_handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ); - auto shp_handle = OpenSHPFile(fs, file_name.c_str()); - - double min_bound[4]; - double max_bound[4]; - int shape_type; - int record_count; - SHPGetInfo(shp_handle.get(), &record_count, &shape_type, min_bound, max_bound); - file_name_data[out_idx] = StringVector::AddString(file_name_vector, file_name); - shape_type_data[out_idx] = 0; - for (auto shape_type_idx = 0; shape_type_idx < sizeof(shape_type_map) / sizeof(ShapeTypeEntry); - shape_type_idx++) { - if (shape_type_map[shape_type_idx].shp_type == shape_type) { - shape_type_data[out_idx] = shape_type_idx; - break; - } - } - minx_data[out_idx] = min_bound[0]; - miny_data[out_idx] = min_bound[1]; - maxx_data[out_idx] = max_bound[0]; - maxy_data[out_idx] = max_bound[1]; - record_count_data[out_idx] = record_count; - } - - state.current_file_idx += output_count; - output.SetCardinality(output_count); -} - -static double ShapeFileMetaProgress(ClientContext &context, const FunctionData *bind_data, - const GlobalTableFunctionState *gstate) { - auto &state = gstate->Cast(); - return static_cast(state.current_file_idx) / static_cast(state.files.size()); -} - -static unique_ptr ShapeFileMetaCardinality(ClientContext &context, const FunctionData *bind_data_p) { - auto &bind_data = bind_data_p->Cast(); - auto result = make_uniq(); - result->has_max_cardinality = true; - result->max_cardinality = bind_data.files.size(); - result->has_estimated_cardinality = true; - result->estimated_cardinality = bind_data.files.size(); - return result; -} - -void CoreTableFunctions::RegisterShapefileMetaTableFunction(DatabaseInstance &db) { - - TableFunction meta_func("shapefile_meta", {LogicalType::VARCHAR}, ShapeFileMetaExecute, ShapeFileMetaBind, - ShapeFileMetaInitGlobal); - meta_func.table_scan_progress = ShapeFileMetaProgress; - meta_func.cardinality = ShapeFileMetaCardinality; - ExtensionUtil::RegisterFunction(db, MultiFileReader::CreateFunctionSet(meta_func)); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/io/shapefile/shapefile_common.cpp b/spatial/src/spatial/core/io/shapefile/shapefile_common.cpp deleted file mode 100644 index 75676992..00000000 --- a/spatial/src/spatial/core/io/shapefile/shapefile_common.cpp +++ /dev/null @@ -1,160 +0,0 @@ -#include "duckdb/common/file_system.hpp" - -#include "spatial/common.hpp" -#include "spatial/core/io/shapefile.hpp" - -void SASetupDefaultHooks(SAHooks *hooks) { - // Should never be called, use OpenLL and pass in the hooks - throw duckdb::InternalException("SASetupDefaultHooks"); -} - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// Shapefile filesystem abstractions -//------------------------------------------------------------------------------ -static SAFile DuckDBShapefileOpen(void *userData, const char *filename, const char *access_mode) { - try { - auto &fs = *reinterpret_cast(userData); - auto file_handle = fs.OpenFile(filename, FileFlags::FILE_FLAGS_READ | FileFlags::FILE_FLAGS_NULL_IF_NOT_EXISTS); - if (!file_handle) { - return nullptr; - } - return reinterpret_cast(file_handle.release()); - } catch (...) { - return nullptr; - } -} - -static SAOffset DuckDBShapefileRead(void *p, SAOffset size, SAOffset nmemb, SAFile file) { - auto handle = reinterpret_cast(file); - auto read_bytes = handle->Read(p, size * nmemb); - return read_bytes / size; -} - -static SAOffset DuckDBShapefileWrite(const void *p, SAOffset size, SAOffset nmemb, SAFile file) { - auto handle = reinterpret_cast(file); - auto written_bytes = handle->Write(const_cast(p), size * nmemb); - return written_bytes / size; -} - -static SAOffset DuckDBShapefileSeek(SAFile file, SAOffset offset, int whence) { - auto file_handle = reinterpret_cast(file); - switch (whence) { - case SEEK_SET: - file_handle->Seek(offset); - break; - case SEEK_CUR: - file_handle->Seek(file_handle->SeekPosition() + offset); - break; - case SEEK_END: - file_handle->Seek(file_handle->GetFileSize() + offset); - break; - default: - throw InternalException("Unknown seek type"); - } - return 0; -} - -static SAOffset DuckDBShapefileTell(SAFile file) { - auto handle = reinterpret_cast(file); - return handle->SeekPosition(); -} - -static int DuckDBShapefileFlush(SAFile file) { - try { - auto handle = reinterpret_cast(file); - handle->Sync(); - return 0; - } catch (...) { - return -1; - } -} - -static int DuckDBShapefileClose(SAFile file) { - try { - auto handle = reinterpret_cast(file); - handle->Close(); - delete handle; - return 0; - } catch (...) { - return -1; - } -} - -static int DuckDBShapefileRemove(void *userData, const char *filename) { - try { - auto &fs = *reinterpret_cast(userData); - auto file = fs.OpenFile(filename, FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_NULL_IF_NOT_EXISTS); - if (!file) { - return -1; - } - auto file_type = fs.GetFileType(*file); - if (file_type == FileType::FILE_TYPE_DIR) { - fs.RemoveDirectory(filename); - } else { - fs.RemoveFile(filename); - } - return 0; - } catch (...) { - return -1; - } -} - -static void DuckDBShapefileError(const char *message) { - // TODO - // We cant throw an exception here because the shapefile library is not - // exception safe. Instead we should store it somewhere... - // Maybe another client context cache? - - // Note that we need to copy the message - - fprintf(stderr, "%s\n", message); -} - -//------------------------------------------------------------------------------ -// RAII Wrappers -//------------------------------------------------------------------------------ - -static SAHooks GetDuckDBHooks(FileSystem &fs) { - SAHooks hooks; - hooks.FOpen = DuckDBShapefileOpen; - hooks.FRead = DuckDBShapefileRead; - hooks.FWrite = DuckDBShapefileWrite; - hooks.FSeek = DuckDBShapefileSeek; - hooks.FTell = DuckDBShapefileTell; - hooks.FFlush = DuckDBShapefileFlush; - hooks.FClose = DuckDBShapefileClose; - hooks.Remove = DuckDBShapefileRemove; - - hooks.Error = DuckDBShapefileError; - hooks.Atof = std::atof; - hooks.userData = &fs; - return hooks; -} - -DBFHandlePtr OpenDBFFile(FileSystem &fs, const string &filename) { - auto hooks = GetDuckDBHooks(fs); - auto handle = DBFOpenLL(filename.c_str(), "rb", &hooks); - - if (!handle) { - throw IOException("Failed to open DBF file %s", filename.c_str()); - } - - return DBFHandlePtr(handle); -} - -SHPHandlePtr OpenSHPFile(FileSystem &fs, const string &filename) { - auto hooks = GetDuckDBHooks(fs); - auto handle = SHPOpenLL(filename.c_str(), "rb", &hooks); - if (!handle) { - throw IOException("Failed to open SHP file %s", filename); - } - return SHPHandlePtr(handle); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/layout_benchmark/CMakeLists.txt b/spatial/src/spatial/core/layout_benchmark/CMakeLists.txt deleted file mode 100644 index 21295f17..00000000 --- a/spatial/src/spatial/core/layout_benchmark/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -set(EXTENSION_SOURCES - ${EXTENSION_SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/test.cpp - PARENT_SCOPE -) \ No newline at end of file diff --git a/spatial/src/spatial/core/layout_benchmark/benchmark/line_length/2d_length_col.benchmark b/spatial/src/spatial/core/layout_benchmark/benchmark/line_length/2d_length_col.benchmark deleted file mode 100644 index f1bfda32..00000000 --- a/spatial/src/spatial/core/layout_benchmark/benchmark/line_length/2d_length_col.benchmark +++ /dev/null @@ -1,10 +0,0 @@ -name columnar 2d point distance -group point_distance - -require spatial - -load -CREATE TABLE lines AS SELECT geo_create_line2d_c(wkb_geometry) as line FROM st_read('../../../../../spatial/test/data/germany_roads.fgb'); - -run -SELECT geo_length2d_c(line) FROM lines; diff --git a/spatial/src/spatial/core/layout_benchmark/benchmark/line_length/2d_length_row.benchmark b/spatial/src/spatial/core/layout_benchmark/benchmark/line_length/2d_length_row.benchmark deleted file mode 100644 index 3029cd71..00000000 --- a/spatial/src/spatial/core/layout_benchmark/benchmark/line_length/2d_length_row.benchmark +++ /dev/null @@ -1,10 +0,0 @@ -name columnar 2d point distance -group point_distance - -require spatial - -load -CREATE TABLE lines AS SELECT geo_create_line2d_r(wkb_geometry) as line FROM st_read('../../../../../spatial/test/data/germany_roads.fgb'); - -run -SELECT geo_length2d_r(line) FROM lines; diff --git a/spatial/src/spatial/core/layout_benchmark/benchmark/point_distance/2d_point_col.benchmark b/spatial/src/spatial/core/layout_benchmark/benchmark/point_distance/2d_point_col.benchmark deleted file mode 100644 index 94b0d7df..00000000 --- a/spatial/src/spatial/core/layout_benchmark/benchmark/point_distance/2d_point_col.benchmark +++ /dev/null @@ -1,11 +0,0 @@ -name columnar 2d point distance -group point_distance - -require spatial - -load -CREATE TABLE points AS SELECT geo_create_point2d_c(st_x(c), st_y(c)) as p FROM (SELECT st_centroid(wkb_geometry) -FROM st_read('../../../../../spatial/test/data/amsterdam_roads.fgb') LIMIT 100000) as s(c); - -run -SELECT geo_distance2d_c(a.p, b.p) FROM points as a CROSS JOIN points as b; diff --git a/spatial/src/spatial/core/layout_benchmark/benchmark/point_distance/2d_point_row.benchmark b/spatial/src/spatial/core/layout_benchmark/benchmark/point_distance/2d_point_row.benchmark deleted file mode 100644 index d4e8baad..00000000 --- a/spatial/src/spatial/core/layout_benchmark/benchmark/point_distance/2d_point_row.benchmark +++ /dev/null @@ -1,11 +0,0 @@ -name columnar 2d point distance -group point_distance - -require spatial - -load -CREATE TABLE points AS SELECT geo_create_point2d_r(st_x(c), st_y(c)) as p FROM (SELECT st_centroid(wkb_geometry) -FROM st_read('../../../../../spatial/test/data/amsterdam_roads.fgb') LIMIT 100000) as s(c); - -run -SELECT geo_distance2d_r(a.p, b.p) FROM points as a CROSS JOIN points as b; diff --git a/spatial/src/spatial/core/layout_benchmark/benchmark/point_distance/3d_point_col.benchmark b/spatial/src/spatial/core/layout_benchmark/benchmark/point_distance/3d_point_col.benchmark deleted file mode 100644 index da9da819..00000000 --- a/spatial/src/spatial/core/layout_benchmark/benchmark/point_distance/3d_point_col.benchmark +++ /dev/null @@ -1,11 +0,0 @@ -name columnar 3d point 2d distance -group point_distance - -require spatial - -load -CREATE TABLE points AS SELECT geo_create_point3d_c(st_x(c), st_y(c), (st_x(c) + st_y(c)) / 2.0) as p FROM (SELECT st_centroid(wkb_geometry) -FROM st_read('../../../../../spatial/test/data/amsterdam_roads.fgb') LIMIT 100000) as s(c); - -run -SELECT geo_distance2d_c(a.p, b.p) FROM points as a CROSS JOIN points as b; diff --git a/spatial/src/spatial/core/layout_benchmark/benchmark/point_distance/3d_point_row.benchmark b/spatial/src/spatial/core/layout_benchmark/benchmark/point_distance/3d_point_row.benchmark deleted file mode 100644 index 386f0c79..00000000 --- a/spatial/src/spatial/core/layout_benchmark/benchmark/point_distance/3d_point_row.benchmark +++ /dev/null @@ -1,11 +0,0 @@ -name row 3d point 2d distance -group point_distance - -require spatial - -load -CREATE TABLE points AS SELECT geo_create_point3d_r(st_x(c), st_y(c), (st_x(c) + st_y(c)) / 2.0) as p FROM (SELECT st_centroid(wkb_geometry) -FROM st_read('../../../../../spatial/test/data/amsterdam_roads.fgb') LIMIT 100000) as s(c); - -run -SELECT geo_distance2d_r(a.p, b.p) FROM points as a CROSS JOIN points as b; diff --git a/spatial/src/spatial/core/layout_benchmark/benchmark/point_distance/4d_point_col.benchmark b/spatial/src/spatial/core/layout_benchmark/benchmark/point_distance/4d_point_col.benchmark deleted file mode 100644 index 19101c78..00000000 --- a/spatial/src/spatial/core/layout_benchmark/benchmark/point_distance/4d_point_col.benchmark +++ /dev/null @@ -1,11 +0,0 @@ -name columnar 4d point 2d distance -group point_distance - -require spatial - -load -CREATE TABLE points AS SELECT geo_create_point4d_c(st_x(c), st_y(c), (st_x(c) + st_y(c)) / 2.0, (st_x(c) + st_y(c)) / 4.0) as p FROM (SELECT st_centroid(wkb_geometry) -FROM st_read('../../../../../spatial/test/data/amsterdam_roads.fgb') LIMIT 100000) as s(c); - -run -SELECT geo_distance2d_c(a.p, b.p) FROM points as a CROSS JOIN points as b; diff --git a/spatial/src/spatial/core/layout_benchmark/benchmark/point_distance/4d_point_row.benchmark b/spatial/src/spatial/core/layout_benchmark/benchmark/point_distance/4d_point_row.benchmark deleted file mode 100644 index 347495c2..00000000 --- a/spatial/src/spatial/core/layout_benchmark/benchmark/point_distance/4d_point_row.benchmark +++ /dev/null @@ -1,11 +0,0 @@ -name columnar 4d point 2d distance -group point_distance - -require spatial - -load -CREATE TABLE points AS SELECT geo_create_point4d_r(st_x(c), st_y(c), (st_x(c) + st_y(c)) / 2.0, (st_x(c) + st_y(c)) / 4.0) as p FROM (SELECT st_centroid(wkb_geometry) -FROM st_read('../../../../../spatial/test/data/amsterdam_roads.fgb') LIMIT 100000) as s(c); - -run -SELECT geo_distance2d_r(a.p, b.p) FROM points as a CROSS JOIN points as b; diff --git a/spatial/src/spatial/core/layout_benchmark/benchmark/point_in_polygon/2d_point_in_polygon_col.benchmark b/spatial/src/spatial/core/layout_benchmark/benchmark/point_in_polygon/2d_point_in_polygon_col.benchmark deleted file mode 100644 index c27b464c..00000000 --- a/spatial/src/spatial/core/layout_benchmark/benchmark/point_in_polygon/2d_point_in_polygon_col.benchmark +++ /dev/null @@ -1,17 +0,0 @@ -name columnar 2d point in polygon -group point_distance - -require spatial - -load -CREATE TABLE t1 as SELECT - geo_create_polygon2d_c(wkb_geometry) as geom, - geo_create_point2d_c(st_x(st_centroid(wkb_geometry)), st_y(st_centroid(wkb_geometry))) as centroid -FROM st_read('../../../../../spatial/test/data/germany/forest/forest.fgb'); - - -run -SELECT geo_point_in_polygon2d_c(centroid, geom) as x FROM t1; - - - diff --git a/spatial/src/spatial/core/layout_benchmark/benchmark/point_in_polygon/2d_point_in_polygon_row.benchmark b/spatial/src/spatial/core/layout_benchmark/benchmark/point_in_polygon/2d_point_in_polygon_row.benchmark deleted file mode 100644 index cb4ac490..00000000 --- a/spatial/src/spatial/core/layout_benchmark/benchmark/point_in_polygon/2d_point_in_polygon_row.benchmark +++ /dev/null @@ -1,17 +0,0 @@ -name columnar 2d point in polygon -group point_distance - -require spatial - -load -CREATE TABLE t1 as SELECT - geo_create_polygon2d_r(wkb_geometry) as geom, - geo_create_point2d_r(st_x(st_centroid(wkb_geometry)), st_y(st_centroid(wkb_geometry))) as centroid -FROM st_read('../../../../../spatial/test/data/germany/forest/forest.fgb'); - - -run -SELECT geo_point_in_polygon2d_r(centroid, geom) as x FROM t1; - - - diff --git a/spatial/src/spatial/core/layout_benchmark/benchmark/point_line_distance/2d_point_line_distance_col.benchmark b/spatial/src/spatial/core/layout_benchmark/benchmark/point_line_distance/2d_point_line_distance_col.benchmark deleted file mode 100644 index b4ebac6c..00000000 --- a/spatial/src/spatial/core/layout_benchmark/benchmark/point_line_distance/2d_point_line_distance_col.benchmark +++ /dev/null @@ -1,18 +0,0 @@ -name columnar 2d point distance -group point_distance - -require spatial - -load -CREATE TABLE pairs as SELECT - geo_create_point2d_c(st_x(st_centroid(l.wkb_geometry)), st_y(st_centroid(l.wkb_geometry))) as point, - geo_create_line2d_c(r.wkb_geometry) as line - FROM - st_read('../../../../../spatial/test/data/amsterdam_roads.fgb') as l - CROSS JOIN - st_read('../../../../../spatial/test/data/amsterdam_roads.fgb') as r - LIMIT 10000000; -checkpoint; - -run -SELECT geo_line_point_distance2d_c(point, line) FROM pairs; \ No newline at end of file diff --git a/spatial/src/spatial/core/layout_benchmark/benchmark/point_line_distance/2d_point_line_distance_row.benchmark b/spatial/src/spatial/core/layout_benchmark/benchmark/point_line_distance/2d_point_line_distance_row.benchmark deleted file mode 100644 index 98d9d8c8..00000000 --- a/spatial/src/spatial/core/layout_benchmark/benchmark/point_line_distance/2d_point_line_distance_row.benchmark +++ /dev/null @@ -1,18 +0,0 @@ -name row 2d point distance -group point_distance - -require spatial - -load -CREATE TABLE pairs as SELECT - geo_create_point2d_r(st_x(st_centroid(l.wkb_geometry)), st_y(st_centroid(l.wkb_geometry))) as point, - geo_create_line2d_r(r.wkb_geometry) as line - FROM - st_read('../../../../../spatial/test/data/amsterdam_roads.fgb') as l - CROSS JOIN - st_read('../../../../../spatial/test/data/amsterdam_roads.fgb') as r - LIMIT 10000000; -checkpoint; - -run -SELECT geo_line_point_distance2d_r(point, line) FROM pairs; \ No newline at end of file diff --git a/spatial/src/spatial/core/layout_benchmark/test.cpp b/spatial/src/spatial/core/layout_benchmark/test.cpp deleted file mode 100644 index 990d17c6..00000000 --- a/spatial/src/spatial/core/layout_benchmark/test.cpp +++ /dev/null @@ -1,1282 +0,0 @@ -#pragma once - -#include "spatial/common.hpp" -#include "spatial/core/layout_benchmark/test.hpp" -#include "spatial/core/types.hpp" - -#include "duckdb/parser/parsed_data/create_type_info.hpp" -#include "duckdb/function/cast/cast_function_set.hpp" -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/function/cast/cast_function_set.hpp" - -namespace spatial { - -namespace core { - -static Point ClosestPointOnSegment(const Point &p, const Point &p1, const Point &p2) { - // If the segment is a Vertex, then return that Vertex - if (p1 == p2) { - return p1; - } - double r = ((p.x - p1.x) * (p2.x - p1.x) + (p.y - p1.y) * (p2.y - p1.y)) / - ((p2.x - p1.x) * (p2.x - p1.x) + (p2.y - p1.y) * (p2.y - p1.y)); - // If r is less than 0, then the Vertex is outside the segment in the p1 direction - if (r <= 0) { - return p1; - } - // If r is greater than 1, then the Vertex is outside the segment in the p2 direction - if (r >= 1) { - return p2; - } - // Interpolate between p1 and p2 - return Point(p1.x + r * (p2.x - p1.x), p1.y + r * (p2.y - p1.y)); -} -static double DistanceToSegmentSquared(const Point &px, const Point &ax, const Point &bx) { - auto point = ClosestPointOnSegment(px, ax, bx); - auto dx = px.x - point.x; - auto dy = px.y - point.y; - return dx * dx + dy * dy; -} - -//---------------------------------------------------------------------- -// WKB (WELL KNOWN BINARY) READER -//---------------------------------------------------------------------- - -// Super simple WKB reader that only supports reading known types in little endian - -struct SimpleWKBReader { - const char *data = nullptr; - uint32_t cursor = 0; - uint32_t length = 0; - - SimpleWKBReader(const char *data, uint32_t length) : data(data), length(length) { - } - - vector ReadLine() { - auto byte_order = ReadByte(); - D_ASSERT(byte_order == 1); // Little endian - auto type = ReadInt(); - D_ASSERT(type == 2); // LineString - auto num_points = ReadInt(); - D_ASSERT(num_points > 0); - D_ASSERT(cursor + num_points * 2 * sizeof(double) <= length); - vector result; - for (uint32_t i = 0; i < num_points; i++) { - auto x = ReadDouble(); - auto y = ReadDouble(); - result.emplace_back(x, y); - } - return result; - } - - Point ReadPoint() { - auto byte_order = ReadByte(); - D_ASSERT(byte_order == 1); // Little endian - auto type = ReadInt(); - D_ASSERT(type == 1); // Point - auto x = ReadDouble(); - auto y = ReadDouble(); - return Point(x, y); - } - - vector> ReadPolygon() { - auto byte_order = ReadByte(); - D_ASSERT(byte_order == 1); // Little endian - auto type = ReadInt(); - D_ASSERT(type == 3); // Polygon - auto num_rings = ReadInt(); - D_ASSERT(num_rings > 0); - vector> result; - for (uint32_t i = 0; i < num_rings; i++) { - auto num_points = ReadInt(); - D_ASSERT(num_points > 0); - D_ASSERT(cursor + num_points * 2 * sizeof(double) <= length); - vector ring; - for (uint32_t j = 0; j < num_points; j++) { - auto x = ReadDouble(); - auto y = ReadDouble(); - ring.emplace_back(x, y); - } - result.push_back(ring); - } - return result; - } - - uint8_t ReadByte() { - D_ASSERT(cursor + sizeof(uint8_t) <= length); - uint8_t result = data[cursor]; - cursor += sizeof(uint8_t); - return result; - } - - uint32_t ReadInt() { - D_ASSERT(cursor + sizeof(uint32_t) <= length); - // Read uint32_t in little endian - uint32_t result = 0; - result |= (uint32_t)data[cursor + 0] << 0 & 0x000000FF; - result |= (uint32_t)data[cursor + 1] << 8 & 0x0000FF00; - result |= (uint32_t)data[cursor + 2] << 16 & 0x00FF0000; - result |= (uint32_t)data[cursor + 3] << 24 & 0xFF000000; - cursor += sizeof(uint32_t); - return result; - } - - double ReadDouble() { - D_ASSERT(cursor + sizeof(double) <= length); - // Read double in little endian - uint64_t result = 0; - result |= (uint64_t)data[cursor + 0] << 0 & 0x00000000000000FF; - result |= (uint64_t)data[cursor + 1] << 8 & 0x000000000000FF00; - result |= (uint64_t)data[cursor + 2] << 16 & 0x0000000000FF0000; - result |= (uint64_t)data[cursor + 3] << 24 & 0x00000000FF000000; - result |= (uint64_t)data[cursor + 4] << 32 & 0x000000FF00000000; - result |= (uint64_t)data[cursor + 5] << 40 & 0x0000FF0000000000; - result |= (uint64_t)data[cursor + 6] << 48 & 0x00FF000000000000; - result |= (uint64_t)data[cursor + 7] << 56 & 0xFF00000000000000; - cursor += sizeof(double); - return *reinterpret_cast(&result); - } -}; - -//---------------------------------------------------------------------- -// TYPES (COLUMNAR) -//---------------------------------------------------------------------- - -static LogicalType GEO_POINT_2D_C = LogicalType::STRUCT({{"x", LogicalType::DOUBLE}, {"y", LogicalType::DOUBLE}}); - -static LogicalType GEO_POINT_3D_C = - LogicalType::STRUCT({{"x", LogicalType::DOUBLE}, {"y", LogicalType::DOUBLE}, {"z", LogicalType::DOUBLE}}); - -static LogicalType GEO_POINT_4D_C = LogicalType::STRUCT( - {{"x", LogicalType::DOUBLE}, {"y", LogicalType::DOUBLE}, {"z", LogicalType::DOUBLE}, {"m", LogicalType::DOUBLE}}); - -static LogicalType GEO_LINE_2D_C = LogicalType::LIST(GEO_POINT_2D_C); - -static LogicalType GEO_POLYGON_2D_C = LogicalType::LIST(GEO_LINE_2D_C); - -static void CreatePoint2D_C(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 2); - auto count = args.size(); - - auto &x = args.data[0]; - auto &y = args.data[1]; - - x.Flatten(count); - y.Flatten(count); - - auto &children = StructVector::GetEntries(result); - auto &x_child = children[0]; - auto &y_child = children[1]; - - x_child->Reference(x); - y_child->Reference(y); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -static void CreatePoint3D_C(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 3); - auto count = args.size(); - - auto &x = args.data[0]; - auto &y = args.data[1]; - auto &z = args.data[2]; - - x.Flatten(count); - y.Flatten(count); - z.Flatten(count); - - auto &children = StructVector::GetEntries(result); - auto &x_child = children[0]; - auto &y_child = children[1]; - auto &z_child = children[2]; - - x_child->Reference(x); - y_child->Reference(y); - z_child->Reference(z); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -static void CreatePoint4D_C(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 4); - auto count = args.size(); - - auto &x = args.data[0]; - auto &y = args.data[1]; - auto &z = args.data[2]; - auto &m = args.data[3]; - - x.Flatten(count); - y.Flatten(count); - z.Flatten(count); - m.Flatten(count); - - auto &children = StructVector::GetEntries(result); - auto &x_child = children[0]; - auto &y_child = children[1]; - auto &z_child = children[2]; - auto &m_child = children[3]; - - x_child->Reference(x); - y_child->Reference(y); - z_child->Reference(z); - m_child->Reference(m); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -/// point casts -static bool CastPointTo2D_C(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { - auto &children = StructVector::GetEntries(source); - auto &x_child = children[0]; - auto &y_child = children[1]; - - auto &result_children = StructVector::GetEntries(result); - auto &result_x_child = result_children[0]; - auto &result_y_child = result_children[1]; - result_x_child->Reference(*x_child); - result_y_child->Reference(*y_child); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } - return true; -} - -static void CreateLine_2D_C(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - auto count = args.size(); - auto &wkb_blobs = args.data[0]; - wkb_blobs.Flatten(count); - - auto &inner = ListVector::GetEntry(result); - auto lines = ListVector::GetData(result); - - auto wkb_data = FlatVector::GetData(wkb_blobs); - - idx_t total_size = 0; - for (idx_t i = 0; i < count; i++) { - auto wkb = wkb_data[i]; - auto wkb_ptr = wkb.GetDataUnsafe(); - auto wkb_size = wkb.GetSize(); - - SimpleWKBReader reader(wkb_ptr, wkb_size); - auto line = reader.ReadLine(); - auto line_size = line.size(); - - lines[i].offset = total_size; - lines[i].length = line_size; - - ListVector::Reserve(result, total_size + line_size); - - // Since ListVector::Reserve potentially reallocates, we need to re-fetch the inner vector pointers - auto &children = StructVector::GetEntries(inner); - auto &x_child = children[0]; - auto &y_child = children[1]; - auto x_data = FlatVector::GetData(*x_child); - auto y_data = FlatVector::GetData(*y_child); - - for (idx_t j = 0; j < line_size; j++) { - x_data[total_size + j] = line[j].x; - y_data[total_size + j] = line[j].y; - } - - total_size += line_size; - } - - ListVector::SetListSize(result, total_size); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -static void CreatePolygon_2D_C(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - auto count = args.size(); - - // Set up input data - auto &wkb_blobs = args.data[0]; - wkb_blobs.Flatten(count); - auto wkb_data = FlatVector::GetData(wkb_blobs); - - // Set up output data - auto &ring_vec = ListVector::GetEntry(result); - auto polygons = ListVector::GetData(result); - - idx_t total_ring_count = 0; - idx_t total_point_count = 0; - - for (idx_t i = 0; i < count; i++) { - auto wkb = wkb_data[i]; - auto wkb_ptr = wkb.GetDataUnsafe(); - auto wkb_size = wkb.GetSize(); - - SimpleWKBReader reader(wkb_ptr, wkb_size); - auto polygon = reader.ReadPolygon(); - auto ring_count = polygon.size(); - - polygons[i].offset = total_ring_count; - polygons[i].length = ring_count; - - ListVector::Reserve(result, total_ring_count + ring_count); - // Since ListVector::Reserve potentially reallocates, we need to re-fetch the inner vector pointers - - for (idx_t j = 0; j < ring_count; j++) { - auto ring = polygon[j]; - auto point_count = ring.size(); - - ListVector::Reserve(ring_vec, total_point_count + point_count); - auto ring_entries = ListVector::GetData(ring_vec); - auto &inner = ListVector::GetEntry(ring_vec); - - auto &children = StructVector::GetEntries(inner); - auto &x_child = children[0]; - auto &y_child = children[1]; - auto x_data = FlatVector::GetData(*x_child); - auto y_data = FlatVector::GetData(*y_child); - - for (idx_t k = 0; k < point_count; k++) { - x_data[total_point_count + k] = ring[k].x; - y_data[total_point_count + k] = ring[k].y; - } - - ring_entries[total_ring_count + j].offset = total_point_count; - ring_entries[total_ring_count + j].length = point_count; - - total_point_count += point_count; - } - total_ring_count += ring_count; - } - - ListVector::SetListSize(result, total_ring_count); - ListVector::SetListSize(ring_vec, total_point_count); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//---------------------------------------------------------------------- -// TYPES (ROW) -//---------------------------------------------------------------------- - -static LogicalType GEO_POINT_2D_R = LogicalType::LIST(LogicalType::DOUBLE); - -static LogicalType GEO_POINT_3D_R = LogicalType::LIST(LogicalType::DOUBLE); - -static LogicalType GEO_POINT_4D_R = LogicalType::LIST(LogicalType::DOUBLE); - -static LogicalType GEO_LINE_2D_R = LogicalType::LIST(GEO_POINT_2D_R); - -static LogicalType GEO_POLYGON_2D_R = LogicalType::LIST(GEO_LINE_2D_R); - -static void CreatePoint2D_R(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 2); - auto count = args.size(); - - auto &x = args.data[0]; - auto &y = args.data[1]; - - x.Flatten(count); - y.Flatten(count); - - auto x_data = FlatVector::GetData(x); - auto y_data = FlatVector::GetData(y); - - ListVector::Reserve(result, count * 2); - - auto entries = ListVector::GetData(result); - auto &inner = ListVector::GetEntry(result); - auto inner_Data = FlatVector::GetData(inner); - - for (idx_t i = 0; i < count; i++) { - entries[i].offset = i * 2; - entries[i].length = 2; - inner_Data[i * 2] = x_data[i]; - inner_Data[i * 2 + 1] = y_data[i]; - } - ListVector::SetListSize(result, count * 2); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -static void CreatePoint3D_R(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 3); - auto count = args.size(); - - auto &x = args.data[0]; - auto &y = args.data[1]; - auto &z = args.data[2]; - - x.Flatten(count); - y.Flatten(count); - z.Flatten(count); - - auto x_data = FlatVector::GetData(x); - auto y_data = FlatVector::GetData(y); - auto z_data = FlatVector::GetData(z); - - ListVector::Reserve(result, count * 3); - auto entries = ListVector::GetData(result); - auto &inner = ListVector::GetEntry(result); - auto inner_Data = FlatVector::GetData(inner); - for (idx_t i = 0; i < count; i++) { - entries[i].offset = i * 3; - entries[i].length = 3; - inner_Data[i * 3] = x_data[i]; - inner_Data[i * 3 + 1] = y_data[i]; - inner_Data[i * 3 + 2] = z_data[i]; - } - - ListVector::SetListSize(result, count * 3); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -static void CreatePoint4D_R(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 4); - auto count = args.size(); - - auto &x = args.data[0]; - auto &y = args.data[1]; - auto &z = args.data[2]; - auto &m = args.data[3]; - - x.Flatten(count); - y.Flatten(count); - z.Flatten(count); - m.Flatten(count); - - auto x_data = FlatVector::GetData(x); - auto y_data = FlatVector::GetData(y); - auto z_data = FlatVector::GetData(z); - auto m_data = FlatVector::GetData(m); - - ListVector::Reserve(result, count * 4); - auto entries = ListVector::GetData(result); - auto &inner = ListVector::GetEntry(result); - auto inner_Data = FlatVector::GetData(inner); - for (idx_t i = 0; i < count; i++) { - entries[i].offset = i * 4; - entries[i].length = 4; - inner_Data[i * 4] = x_data[i]; - inner_Data[i * 4 + 1] = y_data[i]; - inner_Data[i * 4 + 2] = z_data[i]; - inner_Data[i * 4 + 3] = m_data[i]; - } - - ListVector::SetListSize(result, count * 4); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -static void CreateLine_2D_R(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - auto count = args.size(); - auto &wkb_blobs = args.data[0]; - wkb_blobs.Flatten(count); - - auto line_entries = ListVector::GetData(result); - auto &coord_vec = ListVector::GetEntry(result); - - auto wkb_data = FlatVector::GetData(wkb_blobs); - - idx_t total_coords_size = 0; - idx_t total_coords_data_size = 0; - - for (idx_t i = 0; i < count; i++) { - auto wkb = wkb_data[i]; - auto wkb_ptr = wkb.GetDataUnsafe(); - auto wkb_size = wkb.GetSize(); - SimpleWKBReader reader(wkb_ptr, wkb_size); - auto line_geom = reader.ReadLine(); - - auto offset = total_coords_size; - auto length = line_geom.size(); - - line_entries[i].offset = offset; - line_entries[i].length = length; - - total_coords_size += length; - - ListVector::Reserve(result, total_coords_size); - ListVector::Reserve(coord_vec, total_coords_data_size + length * 2); - - auto coord_entries = ListVector::GetData(coord_vec); - auto &coord_inner = ListVector::GetEntry(coord_vec); - auto coord_inner_data = FlatVector::GetData(coord_inner); - - for (idx_t j = 0; j < length; j++) { - auto coord = line_geom[j]; - coord_entries[offset + j].offset = total_coords_data_size; - coord_entries[offset + j].length = 2; - coord_inner_data[total_coords_data_size] = coord.x; - coord_inner_data[total_coords_data_size + 1] = coord.y; - total_coords_data_size += 2; - } - } - - ListVector::SetListSize(result, total_coords_size); - ListVector::SetListSize(coord_vec, total_coords_data_size); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -static void CreatePolygon_2D_R(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - auto count = args.size(); - - // Set up input data - auto &wkb_blobs = args.data[0]; - wkb_blobs.Flatten(count); - auto wkb_data = FlatVector::GetData(wkb_blobs); - - // Set up output data - auto &ring_vec = ListVector::GetEntry(result); - auto &coords_vec = ListVector::GetEntry(ring_vec); - auto polygons = ListVector::GetData(result); - - idx_t total_ring_count = 0; - idx_t total_point_count = 0; - idx_t total_coord_count = 0; - - for (idx_t i = 0; i < count; i++) { - auto wkb = wkb_data[i]; - auto wkb_ptr = wkb.GetDataUnsafe(); - auto wkb_size = wkb.GetSize(); - - SimpleWKBReader reader(wkb_ptr, wkb_size); - auto polygon = reader.ReadPolygon(); - auto ring_count = polygon.size(); - - polygons[i].offset = total_ring_count; - polygons[i].length = ring_count; - - ListVector::Reserve(result, total_ring_count + ring_count); - // Since ListVector::Reserve potentially reallocates, we need to re-fetch the inner vector pointers - - for (idx_t j = 0; j < ring_count; j++) { - auto ring = polygon[j]; - auto point_count = ring.size(); - - ListVector::Reserve(ring_vec, total_point_count + point_count); - auto ring_entries = ListVector::GetData(ring_vec); - ListVector::Reserve(coords_vec, total_coord_count + point_count * 2); - auto coords_entries = ListVector::GetData(coords_vec); - auto &coord_inner = ListVector::GetEntry(coords_vec); - auto coord_inner_data = FlatVector::GetData(coord_inner); - - for (idx_t k = 0; k < point_count; k++) { - auto point = ring[k]; - coords_entries[total_point_count + k].offset = total_coord_count; - coords_entries[total_point_count + k].length = 2; - coord_inner_data[total_coord_count] = point.x; - coord_inner_data[total_coord_count + 1] = point.y; - total_coord_count += 2; - } - - ring_entries[total_ring_count + j].offset = total_point_count; - ring_entries[total_ring_count + j].length = point_count; - - total_point_count += point_count; - } - total_ring_count += ring_count; - } - ListVector::SetListSize(result, total_ring_count); - ListVector::SetListSize(ring_vec, total_point_count); - ListVector::SetListSize(coords_vec, total_coord_count); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//---------------------------------------------------------------------- -// Distance2D COLUMNAR -//---------------------------------------------------------------------- -static void Distance2D_C(Vector &left, Vector &right, Vector &out, idx_t count) { - /* - using POINT_2D_TYPE = StructTypeBinary; - GenericExecutor::ExecuteBinary>( - left, right, out, count, [](POINT_2D_TYPE &left, POINT_2D_TYPE &right) { - return sqrt(pow(left.a_val - right.a_val, 2) + pow(left.b_val - right.b_val, 2)); - }); - */ - left.Flatten(count); - right.Flatten(count); - - auto &left_entries = StructVector::GetEntries(left); - auto &right_entries = StructVector::GetEntries(right); - - auto left_x = FlatVector::GetData(*left_entries[0]); - auto left_y = FlatVector::GetData(*left_entries[1]); - auto right_x = FlatVector::GetData(*right_entries[0]); - auto right_y = FlatVector::GetData(*right_entries[1]); - - auto out_data = FlatVector::GetData(out); - for (idx_t i = 0; i < count; i++) { - out_data[i] = sqrt(pow(left_x[i] - right_x[i], 2) + pow(left_y[i] - right_y[i], 2)); - } -} - -static void Distance2DFunction_C(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 2); - Distance2D_C(args.data[0], args.data[1], result, args.size()); -} - -//---------------------------------------------------------------------- -// Distance2D ROW -//---------------------------------------------------------------------- -static void Distance2D_R(Vector &left, Vector &right, Vector &out, idx_t count) { - - // TODO: - right.Flatten(count); - left.Flatten(count); - - auto &l_inner = ListVector::GetEntry(left); - auto &r_inner = ListVector::GetEntry(right); - - auto l_data = FlatVector::GetData(l_inner); - auto r_data = FlatVector::GetData(r_inner); - - auto l_entries = ListVector::GetData(left); - auto r_entries = ListVector::GetData(right); - - auto result_data = FlatVector::GetData(out); - - for (idx_t i = 0; i < count; i++) { - auto l_offset = l_entries[i].offset; - auto r_offset = r_entries[i].offset; - - auto lx = l_data[l_offset]; - auto ly = l_data[l_offset + 1]; - auto rx = r_data[r_offset]; - auto ry = r_data[r_offset + 1]; - - result_data[i] = sqrt(pow(lx - rx, 2) + pow(ly - ry, 2)); - } - - if (count == 1) { - out.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -static void Distance2DFunction_R(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 2); - Distance2D_R(args.data[0], args.data[1], result, args.size()); -} - -static unique_ptr BindDistance2D_R(ClientContext &context, ScalarFunction &bound_function, - vector> &arguments) { - bool left_ok = false; - if (arguments[0]->return_type == GEO_POINT_2D_R || arguments[0]->return_type == GEO_POINT_3D_R || - arguments[0]->return_type == GEO_POINT_4D_R) { - left_ok = true; - } - - bool right_ok = false; - if (arguments[1]->return_type == GEO_POINT_2D_R || arguments[1]->return_type == GEO_POINT_3D_R || - arguments[1]->return_type == GEO_POINT_4D_R) { - right_ok = true; - } - - if (!left_ok || !right_ok) { - throw BinderException("Invalid arguments for Distance2D_R: %s, %s", arguments[0]->return_type.ToString(), - arguments[1]->return_type.ToString()); - } - return nullptr; -} - -//---------------------------------------------------------------------- -// Length2D COLUMN -//---------------------------------------------------------------------- -static void LengthFunction_2D_C(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - - auto &input = args.data[0]; - auto count = args.size(); - input.Flatten(count); - - auto &inner = ListVector::GetEntry(input); - auto &children = StructVector::GetEntries(inner); - auto &x = children[0]; - auto &y = children[1]; - auto x_data = FlatVector::GetData(*x); - auto y_data = FlatVector::GetData(*y); - auto lines = ListVector::GetData(input); - - auto result_data = FlatVector::GetData(result); - for (idx_t i = 0; i < count; i++) { - auto offset = lines[i].offset; - auto length = lines[i].length; - double sum = 0; - - // Loop over the segments - for (idx_t j = 0; j < length - 1; j++) { - auto x1 = x_data[offset + j]; - auto y1 = y_data[offset + j]; - auto x2 = x_data[offset + j + 1]; - auto y2 = y_data[offset + j + 1]; - sum += sqrt(pow(x1 - x2, 2) + pow(y1 - y2, 2)); - } - - result_data[i] = sum; - } - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//---------------------------------------------------------------------- -// Line To Point Distance COLUMN -//---------------------------------------------------------------------- - -static void LinePointDistance_2D_C(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 2); - auto count = args.size(); - - // Set up the point vectors - auto &point_input = args.data[0]; - point_input.Flatten(count); - auto &p_children = StructVector::GetEntries(point_input); - auto &p_x = p_children[0]; - auto &p_y = p_children[1]; - auto p_x_data = FlatVector::GetData(*p_x); - auto p_y_data = FlatVector::GetData(*p_y); - - // Set up the line vectors - auto &line_input = args.data[1]; - line_input.Flatten(count); - - auto &inner = ListVector::GetEntry(line_input); - auto &children = StructVector::GetEntries(inner); - auto &x = children[0]; - auto &y = children[1]; - auto x_data = FlatVector::GetData(*x); - auto y_data = FlatVector::GetData(*y); - auto lines = ListVector::GetData(line_input); - - auto result_data = FlatVector::GetData(result); - for (idx_t i = 0; i < count; i++) { - auto offset = lines[i].offset; - auto length = lines[i].length; - - double min_distance = std::numeric_limits::max(); - auto p = Point(p_x_data[i], p_y_data[i]); - - // Loop over the segments and find the closes one to the point - for (idx_t j = 0; j < length - 1; j++) { - auto a = Point(x_data[offset + j], y_data[offset + j]); - auto b = Point(x_data[offset + j + 1], y_data[offset + j + 1]); - - auto distance = DistanceToSegmentSquared(p, a, b); - if (distance < min_distance) { - min_distance = distance; - - if (min_distance == 0) { - break; - } - } - } - result_data[i] = std::sqrt(min_distance); - } - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//---------------------------------------------------------------------- -// Line To Point Distance ROW -//---------------------------------------------------------------------- -static void LinePointDistance_2D_R(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 2); - auto count = args.size(); - - // Set up the point vectors - auto &point_input = args.data[0]; - point_input.Flatten(count); - auto point_entries = ListVector::GetData(point_input); - auto point_data_vec = ListVector::GetEntry(point_input); - auto point_data = FlatVector::GetData(point_data_vec); - - // Set up line vectors - auto &line_input = args.data[1]; - line_input.Flatten(count); - - auto &coord_vec = ListVector::GetEntry(line_input); - auto line_entries = ListVector::GetData(line_input); - auto coord_entries = ListVector::GetData(coord_vec); - auto coord_data_vec = ListVector::GetEntry(coord_vec); - auto coord_data = FlatVector::GetData(coord_data_vec); - - auto result_data = FlatVector::GetData(result); - for (idx_t i = 0; i < count; i++) { - auto offset = line_entries[i].offset; - auto length = line_entries[i].length; - double sum = 0; - - auto p = Point(point_data[point_entries[i].offset], point_data[point_entries[i].offset + 1]); - - double min_distance = std::numeric_limits::max(); - // Loop over the segments - for (idx_t j = 0; j < length - 1; j++) { - auto x1 = coord_data[coord_entries[offset + j].offset]; - auto y1 = coord_data[coord_entries[offset + j].offset + 1]; - auto x2 = coord_data[coord_entries[offset + j + 1].offset]; - auto y2 = coord_data[coord_entries[offset + j + 1].offset + 1]; - - auto a = Point(x1, y1); - auto b = Point(x2, y2); - - auto distance = DistanceToSegmentSquared(p, a, b); - if (distance < min_distance) { - min_distance = distance; - - if (min_distance == 0) { - break; - } - } - } - result_data[i] = std::sqrt(min_distance); - } - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//---------------------------------------------------------------------- -// Length2D ROW -//---------------------------------------------------------------------- -static void LengthFunction_2D_R(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - - auto &input = args.data[0]; - auto count = args.size(); - input.Flatten(count); - - auto &coord_vec = ListVector::GetEntry(input); - auto line_entries = ListVector::GetData(input); - auto coord_entries = ListVector::GetData(coord_vec); - auto coord_data_vec = ListVector::GetEntry(coord_vec); - auto coord_data = FlatVector::GetData(coord_data_vec); - - auto result_data = FlatVector::GetData(result); - for (idx_t i = 0; i < count; i++) { - auto offset = line_entries[i].offset; - auto length = line_entries[i].length; - double sum = 0; - - // Loop over the segments - for (idx_t j = 0; j < length - 1; j++) { - auto x1 = coord_data[coord_entries[offset + j].offset]; - auto y1 = coord_data[coord_entries[offset + j].offset + 1]; - auto x2 = coord_data[coord_entries[offset + j + 1].offset]; - auto y2 = coord_data[coord_entries[offset + j + 1].offset + 1]; - sum += sqrt(pow(x1 - x2, 2) + pow(y1 - y2, 2)); - } - - result_data[i] = sum; - } - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//---------------------------------------------------------------------- -// POINT IN POLYGON (COLUMN) -//---------------------------------------------------------------------- - -//---------------------------------------------------------------------- -// POLYGON AREA (ROW) -//---------------------------------------------------------------------- -static void PointInPolygon_R(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 2); - - auto count = args.size(); - auto &in_point = args.data[0]; - auto &in_polygon = args.data[1]; - - in_polygon.Flatten(count); - in_point.Flatten(count); - - // Setup point vectors - auto point_entries = ListVector::GetData(in_point); - auto point_data = FlatVector::GetData(ListVector::GetEntry(in_point)); - - // Setup polygon vectors - auto polygon_entries = ListVector::GetData(in_polygon); - auto &ring_vec = ListVector::GetEntry(in_polygon); - auto ring_entries = ListVector::GetData(ring_vec); - auto &coord_vec = ListVector::GetEntry(ring_vec); - auto coord_entries = ListVector::GetData(coord_vec); - auto coord_data = FlatVector::GetData(ListVector::GetEntry(coord_vec)); - - auto result_data = FlatVector::GetData(result); - - for (idx_t polygon_idx = 0; polygon_idx < count; polygon_idx++) { - auto polygon = polygon_entries[polygon_idx]; - auto polygon_offset = polygon.offset; - auto polygon_length = polygon.length; - bool first = true; - - // does the point lie inside the polygon? - bool contains = false; - - auto x = point_data[point_entries[polygon_idx].offset]; - auto y = point_data[point_entries[polygon_idx].offset + 1]; - - for (idx_t ring_idx = polygon_offset; ring_idx < polygon_offset + polygon_length; ring_idx++) { - auto ring = ring_entries[ring_idx]; - auto ring_offset = ring.offset; - auto ring_length = ring.length; - - auto x1 = coord_data[coord_entries[ring_offset].offset]; - auto y1 = coord_data[coord_entries[ring_offset].offset + 1]; - - int winding_number = 0; - - for (idx_t coord_idx = ring_offset + 1; coord_idx < ring_offset + ring_length; coord_idx++) { - // foo foo foo - auto x2 = coord_data[coord_entries[coord_idx].offset]; - auto y2 = coord_data[coord_entries[coord_idx].offset + 1]; - - if (x1 == x2 && y1 == y2) { - x1 = x2; - y1 = y2; - continue; - } - - auto y_min = std::min(y1, y2); - auto y_max = std::max(y1, y2); - - if (y > y_max || y < y_min) { - x1 = x2; - y1 = y2; - continue; - } - - auto side = Side::ON; - double side_v = ((x - x1) * (y2 - y1) - (x2 - x1) * (y - y1)); - if (side_v == 0) { - side = Side::ON; - } else if (side_v < 0) { - side = Side::LEFT; - } else { - side = Side::RIGHT; - } - - if (side == Side::ON && - (((x1 <= x && x < x2) || (x1 >= x && x > x2)) || ((y1 <= y && y < y2) || (y1 >= y && y > y2)))) { - - // return Contains::ON_EDGE; - contains = false; - break; - } else if (side == Side::LEFT && (y1 < y && y <= y2)) { - winding_number++; - } else if (side == Side::RIGHT && (y2 <= y && y < y1)) { - winding_number--; - } - - x1 = x2; - y1 = y2; - } - bool in_ring = winding_number != 0; - if (first) { - if (!in_ring) { - // if the first ring is not inside, then the point is not inside the polygon - contains = false; - break; - } else { - // if the first ring is inside, then the point is inside the polygon - // but might be inside a hole, so we continue - contains = true; - } - } else { - if (in_ring) { - // if the hole is inside, then the point is not inside the polygon - contains = false; - break; - } // else continue - } - first = false; - } - result_data[polygon_idx] = contains; - } - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//---------------------------------------------------------------------- -// POINT IN POLYGON (ROW) -//---------------------------------------------------------------------- -static void AreaFunction_2D_R(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - - auto count = args.size(); - auto &in_polygon = args.data[0]; - - in_polygon.Flatten(count); - - // Setup polygon vectors - auto polygon_entries = ListVector::GetData(in_polygon); - auto &ring_vec = ListVector::GetEntry(in_polygon); - auto ring_entries = ListVector::GetData(ring_vec); - auto &coord_vec = ListVector::GetEntry(ring_vec); - auto coord_entries = ListVector::GetData(coord_vec); - auto coord_data = FlatVector::GetData(ListVector::GetEntry(coord_vec)); - - auto result_data = FlatVector::GetData(result); - - for (idx_t polygon_idx = 0; polygon_idx < count; polygon_idx++) { - auto polygon = polygon_entries[polygon_idx]; - auto polygon_offset = polygon.offset; - auto polygon_length = polygon.length; - bool first = true; - auto area = 0.0; - - for (idx_t ring_idx = polygon_offset; ring_idx < polygon_offset + polygon_length; ring_idx++) { - auto ring = ring_entries[ring_idx]; - auto ring_offset = ring.offset; - auto ring_length = ring.length; - - auto sum = 0.0; - for (idx_t coord_idx = ring_offset; coord_idx < ring_offset + ring_length - 1; coord_idx++) { - auto x1 = coord_data[coord_entries[ring_offset].offset]; - auto y1 = coord_data[coord_entries[ring_offset].offset + 1]; - auto x2 = coord_data[coord_entries[ring_offset + 1].offset]; - auto y2 = coord_data[coord_entries[ring_offset + 1].offset + 1]; - sum += (x2 - x1) * (y2 + y1); - } - if (first) { - // Add outer ring - area = sum * 0.5; - first = false; - } else { - // Subtract holes - area -= sum * 0.5; - } - } - result_data[polygon_idx] = area; - } - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//---------------------------------------------------------------------- -// REGISTER -//---------------------------------------------------------------------- -void LayoutBenchmark::Register(ClientContext &context) { - auto &catalog = Catalog::GetSystemCatalog(context); - auto &config = DBConfig::GetConfig(context); - auto &casts = config.GetCastFunctions(); - - // GEO_POINT_2D_C - auto geo_point_2d_c = CreateTypeInfo("GEO_POINT_2D_C", GEO_POINT_2D_C); - geo_point_2d_c.temporary = true; - geo_point_2d_c.internal = true; - GEO_POINT_2D_C.SetAlias("GEO_POINT_2D_C"); - auto geo_point_2d_c_entry = (TypeCatalogEntry *)catalog.CreateType(context, &geo_point_2d_c); - LogicalType::SetCatalog(GEO_POINT_2D_C, geo_point_2d_c_entry); - - // GEO_POINT_3D_C - auto geo_point_3d_c = CreateTypeInfo("GEO_POINT_3D_C", GEO_POINT_3D_C); - geo_point_3d_c.temporary = true; - geo_point_3d_c.internal = true; - GEO_POINT_3D_C.SetAlias("GEO_POINT_3D_C"); - auto geo_point_3d_c_entry = (TypeCatalogEntry *)catalog.CreateType(context, &geo_point_3d_c); - LogicalType::SetCatalog(GEO_POINT_3D_C, geo_point_3d_c_entry); - - // GEO_POINT_4D_C - auto geo_point_4d_c = CreateTypeInfo("GEO_POINT_4D_C", GEO_POINT_4D_C); - geo_point_4d_c.temporary = true; - geo_point_4d_c.internal = true; - GEO_POINT_4D_C.SetAlias("GEO_POINT_4D_C"); - auto geo_point_4d_c_entry = (TypeCatalogEntry *)catalog.CreateType(context, &geo_point_4d_c); - LogicalType::SetCatalog(GEO_POINT_4D_C, geo_point_4d_c_entry); - - auto geo_line_2d_c = CreateTypeInfo("GEO_LINE_2D_C", GEO_LINE_2D_C); - geo_line_2d_c.temporary = true; - geo_line_2d_c.internal = true; - GEO_LINE_2D_C.SetAlias("GEO_LINE_2D_C"); - auto geo_line_2d_c_entry = (TypeCatalogEntry *)catalog.CreateType(context, &geo_line_2d_c); - LogicalType::SetCatalog(GEO_LINE_2D_C, geo_line_2d_c_entry); - - // GEO_POINT_2D_R - auto geo_point_2d_r = CreateTypeInfo("GEO_POINT_2D_R", GEO_POINT_2D_R); - geo_point_2d_r.temporary = true; - geo_point_2d_r.internal = true; - GEO_POINT_2D_R.SetAlias("GEO_POINT_2D_R"); - auto geo_point_2d_r_entry = (TypeCatalogEntry *)catalog.CreateType(context, &geo_point_2d_r); - LogicalType::SetCatalog(GEO_POINT_2D_R, geo_point_2d_r_entry); - - // GEO_POINT_3D_R - auto geo_point_3d_r = CreateTypeInfo("GEO_POINT_3D_R", GEO_POINT_3D_R); - geo_point_3d_r.temporary = true; - geo_point_3d_r.internal = true; - GEO_POINT_3D_R.SetAlias("GEO_POINT_3D_R"); - auto geo_point_3d_r_entry = (TypeCatalogEntry *)catalog.CreateType(context, &geo_point_3d_r); - LogicalType::SetCatalog(GEO_POINT_3D_R, geo_point_3d_r_entry); - - // GEO_POINT_4D_R - auto geo_point_4d_r = CreateTypeInfo("GEO_POINT_4D_R", GEO_POINT_4D_R); - geo_point_4d_r.temporary = true; - geo_point_4d_r.internal = true; - GEO_POINT_4D_R.SetAlias("GEO_POINT_4D_R"); - auto geo_point_4d_r_entry = (TypeCatalogEntry *)catalog.CreateType(context, &geo_point_4d_r); - LogicalType::SetCatalog(GEO_POINT_4D_R, geo_point_4d_r_entry); - - // GEO_POLYGON_2D_C - auto geo_polygon_2d_c = CreateTypeInfo("GEO_POLYGON_2D_C", GEO_POLYGON_2D_C); - geo_polygon_2d_c.temporary = true; - geo_polygon_2d_c.internal = true; - GEO_POLYGON_2D_C.SetAlias("GEO_POLYGON_2D_C"); - auto geo_polygon_2d_c_entry = (TypeCatalogEntry *)catalog.CreateType(context, &geo_polygon_2d_c); - LogicalType::SetCatalog(GEO_POLYGON_2D_C, geo_polygon_2d_c_entry); - - // GEO_POLYGON_2D_R - auto geo_polygon_2d_r = CreateTypeInfo("GEO_POLYGON_2D_R", GEO_POLYGON_2D_R); - geo_polygon_2d_r.temporary = true; - geo_polygon_2d_r.internal = true; - GEO_POLYGON_2D_R.SetAlias("GEO_POLYGON_2D_R"); - auto geo_polygon_2d_r_entry = (TypeCatalogEntry *)catalog.CreateType(context, &geo_polygon_2d_r); - LogicalType::SetCatalog(GEO_POLYGON_2D_R, geo_polygon_2d_r_entry); - - // functions - - /// POINTS (COLUMNS) - - // Create - CreateScalarFunctionInfo create_point_2d_c_info(ScalarFunction( - "geo_create_point2d_c", {LogicalType::DOUBLE, LogicalType::DOUBLE}, GEO_POINT_2D_C, CreatePoint2D_C)); - catalog.CreateFunction(context, create_point_2d_c_info); - - CreateScalarFunctionInfo create_point_3d_c_info( - ScalarFunction("geo_create_point3d_c", {LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::DOUBLE}, - GEO_POINT_3D_C, CreatePoint3D_C)); - catalog.CreateFunction(context, create_point_3d_c_info); - - CreateScalarFunctionInfo create_point_4d_c_info(ScalarFunction( - "geo_create_point4d_c", {LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::DOUBLE}, - GEO_POINT_4D_C, CreatePoint4D_C)); - catalog.CreateFunction(context, create_point_4d_c_info); - - CreateScalarFunctionInfo create_polygon_2d_c_info( - ScalarFunction("geo_create_polygon2d_c", {GeoTypes::WKB_BLOB}, GEO_POLYGON_2D_C, CreatePolygon_2D_C)); - catalog.CreateFunction(context, create_polygon_2d_c_info); - - // this is the nice thing about columnar data, we can cast effortlessly - casts.RegisterCastFunction(GEO_POINT_3D_C, GEO_POINT_2D_C, CastPointTo2D_C, 0); - casts.RegisterCastFunction(GEO_POINT_4D_C, GEO_POINT_2D_C, CastPointTo2D_C, 0); - - // Distance 2D - CreateScalarFunctionInfo distance_2d_c_info(ScalarFunction("geo_distance2d_c", {GEO_POINT_2D_C, GEO_POINT_2D_C}, - LogicalType::DOUBLE, Distance2DFunction_C)); - catalog.CreateFunction(context, distance_2d_c_info); - - CreateScalarFunctionInfo line_point_distance_2d_c_info(ScalarFunction( - "geo_line_point_distance2d_c", {GEO_POINT_2D_C, GEO_LINE_2D_C}, LogicalType::DOUBLE, LinePointDistance_2D_C)); - catalog.CreateFunction(context, line_point_distance_2d_c_info); - - CreateScalarFunctionInfo polygon_area_2d_c_info( - ScalarFunction("geo_polygon_area2d_c", {GEO_POLYGON_2D_C}, LogicalType::DOUBLE, AreaFunction_2D_C)); - catalog.CreateFunction(context, polygon_area_2d_c_info); - - CreateScalarFunctionInfo point_in_polygon_2d_c_info(ScalarFunction( - "geo_point_in_polygon2d_c", {GEO_POINT_2D_C, GEO_POLYGON_2D_C}, LogicalType::BOOLEAN, PointInPolygon_C)); - catalog.CreateFunction(context, point_in_polygon_2d_c_info); - - /// LINES (COLUMNS) - CreateScalarFunctionInfo create_line_2d_c_info( - ScalarFunction("geo_create_line2d_c", {GeoTypes::WKB_BLOB}, GEO_LINE_2D_C, CreateLine_2D_C)); - catalog.CreateFunction(context, create_line_2d_c_info); - - /// Length2d - CreateScalarFunctionInfo length_2d_c_info( - ScalarFunction("geo_length2d_c", {LogicalType::ANY}, LogicalType::DOUBLE, LengthFunction_2D_C)); - catalog.CreateFunction(context, length_2d_c_info); - - /// POINT (ROWS) - - // Create - CreateScalarFunctionInfo create_point_2d_r_info(ScalarFunction( - "geo_create_point2d_r", {LogicalType::DOUBLE, LogicalType::DOUBLE}, GEO_POINT_2D_R, CreatePoint2D_R)); - catalog.CreateFunction(context, create_point_2d_r_info); - - CreateScalarFunctionInfo create_point_3d_r_info( - ScalarFunction("geo_create_point3d_r", {LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::DOUBLE}, - GEO_POINT_3D_R, CreatePoint3D_R)); - catalog.CreateFunction(context, create_point_3d_r_info); - - CreateScalarFunctionInfo create_point_4d_r_info(ScalarFunction( - "geo_create_point4d_r", {LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::DOUBLE}, - GEO_POINT_4D_R, CreatePoint4D_R)); - catalog.CreateFunction(context, create_point_4d_r_info); - - // Distance2d - CreateScalarFunctionInfo distance_2d_r_info(ScalarFunction("geo_distance2d_r", {LogicalType::ANY, LogicalType::ANY}, - LogicalType::DOUBLE, Distance2DFunction_R, - BindDistance2D_R)); - catalog.CreateFunction(context, distance_2d_r_info); - - CreateScalarFunctionInfo line_point_distance_2d_r_info(ScalarFunction( - "geo_line_point_distance2d_r", {GEO_POINT_2D_R, GEO_LINE_2D_R}, LogicalType::DOUBLE, LinePointDistance_2D_R)); - catalog.CreateFunction(context, line_point_distance_2d_r_info); - - CreateScalarFunctionInfo create_polygon_2d_r_info( - ScalarFunction("geo_create_polygon2d_r", {GeoTypes::WKB_BLOB}, GEO_POLYGON_2D_R, CreatePolygon_2D_R)); - catalog.CreateFunction(context, create_polygon_2d_r_info); - - /// LINES (ROWS) - CreateScalarFunctionInfo create_line_2d_r_info( - ScalarFunction("geo_create_line2d_r", {GeoTypes::WKB_BLOB}, GEO_LINE_2D_R, CreateLine_2D_R)); - catalog.CreateFunction(context, create_line_2d_r_info); - - /// Length2d - CreateScalarFunctionInfo length_2d_r_info( - ScalarFunction("geo_length2d_r", {LogicalType::ANY}, LogicalType::DOUBLE, LengthFunction_2D_R)); - catalog.CreateFunction(context, length_2d_r_info); - - /// POLYGONS (ROWS) - CreateScalarFunctionInfo polygon_area_2d_r_info( - ScalarFunction("geo_polygon_area2d_r", {GEO_POLYGON_2D_R}, LogicalType::DOUBLE, AreaFunction_2D_R)); - catalog.CreateFunction(context, polygon_area_2d_r_info); - - CreateScalarFunctionInfo point_in_polygon_2d_r_info(ScalarFunction( - "geo_point_in_polygon2d_r", {GEO_POINT_2D_R, GEO_POLYGON_2D_R}, LogicalType::BOOLEAN, PointInPolygon_R)); - catalog.CreateFunction(context, point_in_polygon_2d_r_info); - - /* - GeoTypes::WKB_BLOB.SetAlias("WKB_BLOB"); - auto wkb = CreateTypeInfo("WKB_BLOB", GeoTypes::WKB_BLOB); - wkb.internal = true; - wkb.temporary = true; - catalog.CreateType(context, &wkb); - */ -} - -} // namespace core - -} // namespace spatial - -// D CREATE TABLE t1 as SELECT geo_create_polygon2d_c(wkb_geometry) as geom, -// geo_create_point2d_c(st_x(st_centroid(wkb_geometry)), st_y(st_centroid(wkb_geometry))) as centroid FROM -// st_read('./spatial/test/data/germany/forest/forest.fgb') LIMIT 1000; \ No newline at end of file diff --git a/spatial/src/spatial/core/module.cpp b/spatial/src/spatial/core/module.cpp deleted file mode 100644 index 983eaed5..00000000 --- a/spatial/src/spatial/core/module.cpp +++ /dev/null @@ -1,43 +0,0 @@ - -#include "spatial/core/module.hpp" - -#include "spatial/common.hpp" -#include "spatial/core/functions/aggregate.hpp" -#include "spatial/core/functions/cast.hpp" -#include "spatial/core/functions/macros.hpp" -#include "spatial/core/functions/scalar.hpp" -#include "spatial/core/functions/table.hpp" -#include "spatial/core/geoarrow.hpp" -#include "spatial/core/index/rtree/rtree_module.hpp" -#include "spatial/core/optimizer_rules.hpp" -#include "spatial/core/types.hpp" - -namespace spatial { - -namespace core { - -void CoreModule::Register(DatabaseInstance &db) { - GeoTypes::Register(db); - CoreScalarFunctions::Register(db); - CoreCastFunctions::Register(db); - CoreTableFunctions::Register(db); - CoreAggregateFunctions::Register(db); - CoreScalarMacros::Register(db); - - // RTree index - RTreeModule::RegisterIndex(db); - RTreeModule::RegisterIndexScan(db); - RTreeModule::RegisterIndexPlanCreate(db); - RTreeModule::RegisterIndexPlanScan(db); - RTreeModule::RegisterIndexPragmas(db); - - // Register the optimizer extensions - CoreOptimizerRules::Register(db); - - // Arrow extensions - GeoArrow::Register(db); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/optimizer_rules.cpp b/spatial/src/spatial/core/optimizer_rules.cpp deleted file mode 100644 index 21472b84..00000000 --- a/spatial/src/spatial/core/optimizer_rules.cpp +++ /dev/null @@ -1,299 +0,0 @@ -#include "duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp" -#include "duckdb/execution/expression_executor.hpp" -#include "duckdb/optimizer/optimizer_extension.hpp" -#include "duckdb/planner/expression/bound_comparison_expression.hpp" -#include "duckdb/planner/expression/bound_conjunction_expression.hpp" -#include "duckdb/planner/expression/bound_function_expression.hpp" -#include "duckdb/planner/logical_operator.hpp" -#include "duckdb/planner/operator/logical_any_join.hpp" -#include "duckdb/planner/operator/logical_comparison_join.hpp" -#include "duckdb/planner/operator/logical_filter.hpp" -#include "duckdb/planner/operator/logical_get.hpp" -#include "duckdb/planner/operator/logical_join.hpp" -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/optimizer_rules.hpp" - -namespace spatial { - -namespace core { - -//------------------------------------------------------------------------------ -// Range Join Spatial Predicate Rewriter -//------------------------------------------------------------------------------ -// -// Rewrites joins on spatial predicates to range joins on their bounding boxes -// combined with a spatial predicate filter. This turns the joins from a -// blockwise-nested loop join into a inequality join + filter, which is much -// faster. -// -// All spatial predicates (except st_disjoint) imply an intersection of the -// bounding boxes of the two geometries. -// -class RangeJoinSpatialPredicateRewriter : public OptimizerExtension { -public: - RangeJoinSpatialPredicateRewriter() { - optimize_function = RangeJoinSpatialPredicateRewriter::Optimize; - } - - static void AddComparison(unique_ptr &join, unique_ptr left, - unique_ptr right, ExpressionType type) { - JoinCondition cmp; - cmp.comparison = type; - cmp.left = std::move(left); - cmp.right = std::move(right); - join->conditions.push_back(std::move(cmp)); - } - - static bool IsTableRefsDisjoint(unordered_set &left_table_indexes, unordered_set &right_table_indexes, - unordered_set &left_bindings, unordered_set &right_bindings) { - - // Check that all the left-side bindings reference the left-side tables of the join, - // as well as that all the right-side bindings reference the right-side tables of the join. - // and that the left and right side bindings are disjoint. - - for (auto &left_binding : left_bindings) { - if (right_bindings.find(left_binding) != right_bindings.end()) { - // The left side bindings reference the right side tables of the join. - return false; - } - // Also check that the left side bindings are on the left side of the join - if (left_table_indexes.find(left_binding) == left_table_indexes.end()) { - // The left side bindings are not on the left side of the join. - return false; - } - } - - for (auto &right_binding : right_bindings) { - if (left_bindings.find(right_binding) != left_bindings.end()) { - // The right side bindings reference the left side tables of the join. - return false; - } - // Also check that the right side bindings are on the right side of the join - if (right_table_indexes.find(right_binding) == right_table_indexes.end()) { - // The right side bindings are not on the right side of the join. - return false; - } - } - - return true; - } - - static void TryOptimize(ClientContext &context, OptimizerExtensionInfo *info, unique_ptr &plan) { - - auto &op = *plan; - - // Look for ANY_JOIN operators - if (op.type == LogicalOperatorType::LOGICAL_ANY_JOIN) { - auto &any_join = op.Cast(); - - // Check if the join condition is a spatial predicate and the join type is INNER - if (any_join.condition->type == ExpressionType::BOUND_FUNCTION && any_join.join_type == JoinType::INNER) { - auto bound_func_expr = any_join.condition->Copy(); - auto &bound_function = bound_func_expr->Cast(); - - // Note that we cant perform this optimization for st_disjoint as all comparisons have to be AND'd - case_insensitive_set_t predicates = {"st_equals", "st_intersects", "st_touches", "st_crosses", - "st_within", "st_contains", "st_overlaps", "st_covers", - "st_coveredby", "st_containsproperly"}; - - if (predicates.find(bound_function.function.name) != predicates.end()) { - // Found a spatial predicate we can optimize - - // Convert this into a comparison join on st_xmin, st_xmax, st_ymin, st_ymax of the two input - // geometries - auto left_pred_expr = std::move(bound_function.children[0]); - auto right_pred_expr = std::move(bound_function.children[1]); - - // We need to place the left side of the predicate on the left side of the join - // and the right side of the predicate on the right side of the join - // So look at the table indexes of the left and right side of the predicate - unordered_set left_table_indexes; - LogicalJoin::GetTableReferences(*any_join.children[0], left_table_indexes); - - unordered_set right_table_indexes; - LogicalJoin::GetTableReferences(*any_join.children[1], right_table_indexes); - - unordered_set left_pred_bindings; - LogicalJoin::GetExpressionBindings(*left_pred_expr, left_pred_bindings); - - unordered_set right_pred_bindings; - LogicalJoin::GetExpressionBindings(*right_pred_expr, right_pred_bindings); - - // Check if we can optimize this join - // We need to make sure that the left and right side of the predicate are disjoint - // e.g. - // a JOIN b ON st_intersects(a.geom, b.geom) => OK - // a JOIN b ON st_intersects(b.geom, a.geom) => OK - // a JOIN b ON st_intersects(a.geom, st_union(a.geom, b.geom)) => NOT OK - auto can_split = IsTableRefsDisjoint(left_table_indexes, right_table_indexes, left_pred_bindings, - right_pred_bindings); - if (!can_split) { - // Try again with the left and right side of the predicate swapped - // We can safely swap because the intersection operation we encode with the comparison join - // is symmetric, so the order of the arguments wont matter in the "new" join condition we're - // about to create. - can_split = IsTableRefsDisjoint(left_table_indexes, right_table_indexes, right_pred_bindings, - left_pred_bindings); - if (!can_split) { - // We cant optimize this join - return; - } - // Swap the left and right side of the predicate - std::swap(left_pred_expr, right_pred_expr); - } - - // Lookup the st_xmin, st_xmax, st_ymin, st_ymax functions in the catalog - auto &catalog = Catalog::GetSystemCatalog(context); - - auto &extent_func_set = - catalog.GetEntry(context, CatalogType::SCALAR_FUNCTION_ENTRY, DEFAULT_SCHEMA, "st_extent") - .Cast(); - - auto &xmin_func_set = - catalog.GetEntry(context, CatalogType::SCALAR_FUNCTION_ENTRY, DEFAULT_SCHEMA, "st_xmin") - .Cast(); - auto &xmax_func_set = - catalog.GetEntry(context, CatalogType::SCALAR_FUNCTION_ENTRY, DEFAULT_SCHEMA, "st_xmax") - .Cast(); - auto &ymin_func_set = - catalog.GetEntry(context, CatalogType::SCALAR_FUNCTION_ENTRY, DEFAULT_SCHEMA, "st_ymin") - .Cast(); - auto &ymax_func_set = - catalog.GetEntry(context, CatalogType::SCALAR_FUNCTION_ENTRY, DEFAULT_SCHEMA, "st_ymax") - .Cast(); - - auto &left_arg_type = left_pred_expr->return_type; - auto &right_arg_type = right_pred_expr->return_type; - - auto extent_func_left = extent_func_set.functions.GetFunctionByArguments(context, {left_arg_type}); - auto extent_func_right = - extent_func_set.functions.GetFunctionByArguments(context, {right_arg_type}); - - auto xmin_func_left = - xmin_func_set.functions.GetFunctionByArguments(context, {extent_func_left.return_type}); - auto xmax_func_left = - xmax_func_set.functions.GetFunctionByArguments(context, {extent_func_left.return_type}); - auto ymin_func_left = - ymin_func_set.functions.GetFunctionByArguments(context, {extent_func_left.return_type}); - auto ymax_func_left = - ymax_func_set.functions.GetFunctionByArguments(context, {extent_func_left.return_type}); - - auto xmin_func_right = - xmin_func_set.functions.GetFunctionByArguments(context, {extent_func_right.return_type}); - auto xmax_func_right = - xmax_func_set.functions.GetFunctionByArguments(context, {extent_func_right.return_type}); - auto ymin_func_right = - ymin_func_set.functions.GetFunctionByArguments(context, {extent_func_right.return_type}); - auto ymax_func_right = - ymax_func_set.functions.GetFunctionByArguments(context, {extent_func_right.return_type}); - - // Create the new join condition - vector> left_extent_args; - left_extent_args.push_back(left_pred_expr->Copy()); - auto left_extent = make_uniq( - GeoTypes::BOX_2D(), std::move(extent_func_left), std::move(left_extent_args), nullptr); - - vector> right_extent_args; - right_extent_args.push_back(right_pred_expr->Copy()); - auto right_extent = make_uniq( - GeoTypes::BOX_2D(), std::move(extent_func_right), std::move(right_extent_args), nullptr); - - // Left - vector> left_xmin_args; - left_xmin_args.push_back(left_extent->Copy()); - auto a_x_min = make_uniq(LogicalType::DOUBLE, std::move(xmin_func_left), - std::move(left_xmin_args), nullptr); - - vector> left_xmax_args; - left_xmax_args.push_back(left_extent->Copy()); - auto a_x_max = make_uniq(LogicalType::DOUBLE, std::move(xmax_func_left), - std::move(left_xmax_args), nullptr); - - vector> left_ymin_args; - left_ymin_args.push_back(left_extent->Copy()); - auto a_y_min = make_uniq(LogicalType::DOUBLE, std::move(ymin_func_left), - std::move(left_ymin_args), nullptr); - - vector> left_ymax_args; - left_ymax_args.push_back(left_extent->Copy()); - auto a_y_max = make_uniq(LogicalType::DOUBLE, std::move(ymax_func_left), - std::move(left_ymax_args), nullptr); - - // Right - vector> right_xmin_args; - right_xmin_args.push_back(right_extent->Copy()); - auto b_x_min = make_uniq(LogicalType::DOUBLE, std::move(xmin_func_right), - std::move(right_xmin_args), nullptr); - - vector> right_xmax_args; - right_xmax_args.push_back(right_extent->Copy()); - auto b_x_max = make_uniq(LogicalType::DOUBLE, std::move(xmax_func_right), - std::move(right_xmax_args), nullptr); - - vector> right_ymin_args; - right_ymin_args.push_back(right_extent->Copy()); - auto b_y_min = make_uniq(LogicalType::DOUBLE, std::move(ymin_func_right), - std::move(right_ymin_args), nullptr); - - vector> right_ymax_args; - right_ymax_args.push_back(right_extent->Copy()); - auto b_y_max = make_uniq(LogicalType::DOUBLE, std::move(ymax_func_right), - std::move(right_ymax_args), nullptr); - - // Now create the new join operator - auto new_join = make_uniq(JoinType::INNER); - AddComparison(new_join, std::move(a_x_min), std::move(b_x_max), - ExpressionType::COMPARE_LESSTHANOREQUALTO); - AddComparison(new_join, std::move(a_x_max), std::move(b_x_min), - ExpressionType::COMPARE_GREATERTHANOREQUALTO); - AddComparison(new_join, std::move(a_y_min), std::move(b_y_max), - ExpressionType::COMPARE_LESSTHANOREQUALTO); - AddComparison(new_join, std::move(a_y_max), std::move(b_y_min), - ExpressionType::COMPARE_GREATERTHANOREQUALTO); - - new_join->children = std::move(any_join.children); - if (any_join.has_estimated_cardinality) { - new_join->estimated_cardinality = any_join.estimated_cardinality; - new_join->has_estimated_cardinality = true; - } - - auto filter = make_uniq(std::move(any_join.condition)); - filter->children.push_back(std::move(new_join)); - - plan = std::move(filter); - } - } - } - } - - static void Optimize(OptimizerExtensionInput &input, unique_ptr &plan) { - - TryOptimize(input.context, input.info.get(), plan); - - // Recursively optimize the children - for (auto &child : plan->children) { - Optimize(input, child); - } - } -}; - -//------------------------------------------------------------------------------ -// Register optimizers -//------------------------------------------------------------------------------ -void CoreOptimizerRules::Register(DatabaseInstance &db) { - Connection con(db); - auto &context = *con.context; - - con.BeginTransaction(); - auto &config = DBConfig::GetConfig(context); - - // Register the optimizer rules - config.optimizer_extensions.push_back(RangeJoinSpatialPredicateRewriter()); - - con.Commit(); -} - -} // namespace core - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/core/util/CMakeLists.txt b/spatial/src/spatial/core/util/CMakeLists.txt deleted file mode 100644 index fcbaa0c3..00000000 --- a/spatial/src/spatial/core/util/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -set(EXTENSION_SOURCES - ${EXTENSION_SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/math.cpp - PARENT_SCOPE -) \ No newline at end of file diff --git a/spatial/src/spatial/gdal/CMakeLists.txt b/spatial/src/spatial/gdal/CMakeLists.txt deleted file mode 100644 index 8dce5a4f..00000000 --- a/spatial/src/spatial/gdal/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -add_subdirectory(functions) -set(EXTENSION_SOURCES - ${EXTENSION_SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/module.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/file_handler.cpp - PARENT_SCOPE -) \ No newline at end of file diff --git a/spatial/src/spatial/gdal/file_handler.cpp b/spatial/src/spatial/gdal/file_handler.cpp deleted file mode 100644 index 6f04630b..00000000 --- a/spatial/src/spatial/gdal/file_handler.cpp +++ /dev/null @@ -1,433 +0,0 @@ -#include "spatial/gdal/file_handler.hpp" - -#include "duckdb/common/mutex.hpp" -#include "duckdb/main/client_context.hpp" -#include "duckdb/common/types/uuid.hpp" -#include "duckdb/main/client_data.hpp" - -#include "cpl_vsi.h" -#include "cpl_vsi_virtual.h" -#include "cpl_vsi_error.h" -#include "cpl_string.h" - -namespace spatial { - -namespace gdal { - -//-------------------------------------------------------------------------- -// GDAL DuckDB File handle wrapper -//-------------------------------------------------------------------------- - -class DuckDBFileHandle : public VSIVirtualHandle { -private: - unique_ptr file_handle; - bool is_eof; - -public: - explicit DuckDBFileHandle(unique_ptr file_handle_p) - : file_handle(std::move(file_handle_p)), is_eof(false) { - } - - vsi_l_offset Tell() override { - return static_cast(file_handle->SeekPosition()); - } - int Seek(vsi_l_offset nOffset, int nWhence) override { - is_eof = false; - - if (nWhence == SEEK_SET && nOffset == 0) { - // Use the reset function instead to allow compressed file handles to rewind - // even if they don't support seeking - file_handle->Reset(); - return 0; - } - switch (nWhence) { - case SEEK_SET: - file_handle->Seek(nOffset); - break; - case SEEK_CUR: - file_handle->Seek(file_handle->SeekPosition() + nOffset); - break; - case SEEK_END: - file_handle->Seek(file_handle->GetFileSize() + nOffset); - break; - default: - throw InternalException("Unknown seek type"); - } - return 0; - } - - size_t Read(void *pBuffer, size_t nSize, size_t nCount) override { - auto remaining_bytes = nSize * nCount; - try { - while (remaining_bytes > 0) { - auto read_bytes = file_handle->Read(pBuffer, remaining_bytes); - if (read_bytes == 0) { - break; - } - remaining_bytes -= read_bytes; - // Note we performed a cast back to void* - pBuffer = static_cast(pBuffer) + read_bytes; - } - } catch (...) { - } - - if (remaining_bytes != 0) { - if (file_handle->SeekPosition() == file_handle->GetFileSize()) { - // Is at EOF! - is_eof = true; - } - // else, error! - // unfortunately, this version of GDAL cant distinguish between errors and reading less bytes - // its avaiable in 3.9.2, but we're stuck on 3.8.5 for now. - } - - return nCount - (remaining_bytes / nSize); - } - - int Eof() override { - return is_eof ? TRUE : FALSE; - } - - size_t Write(const void *pBuffer, size_t nSize, size_t nCount) override { - size_t written_bytes = 0; - try { - written_bytes = file_handle->Write(const_cast(pBuffer), nSize * nCount); - } catch (...) { - } - // Return the number of items written - return static_cast(written_bytes / nSize); - } - - int Flush() override { - file_handle->Sync(); - return 0; - } - int Truncate(vsi_l_offset nNewSize) override { - file_handle->Truncate(static_cast(nNewSize)); - return 0; - } - int Close() override { - file_handle->Close(); - return 0; - } - - // int ReadMultiRange(int nRanges, void **ppData, const vsi_l_offset *panOffsets, const size_t *panSizes) override; - // void AdviseRead(int nRanges, const vsi_l_offset *panOffsets, const size_t *panSizes) override; - // VSIRangeStatus GetRangeStatus(vsi_l_offset nOffset, vsi_l_offset nLength) override; -}; - -//-------------------------------------------------------------------------- -// GDAL DuckDB File system wrapper -//-------------------------------------------------------------------------- -static bool IsStdCharDev(const char *file_name) { - return !strcmp(file_name, "/dev/stdin") || !strcmp(file_name, "/dev/stdout") || !strcmp(file_name, "/dev/stderr") || - !strcmp(file_name, "/dev/null") || !strcmp(file_name, "/dev/zero"); -} - -class DuckDBFileSystemHandler : public VSIFilesystemHandler { -private: - string client_prefix; - ClientContext &context; - -public: - DuckDBFileSystemHandler(string client_prefix, ClientContext &context) - : client_prefix(std::move(client_prefix)), context(context) {}; - - const char *StripPrefix(const char *pszFilename) { - return pszFilename + client_prefix.size(); - } - - string AddPrefix(const string &value) { - return client_prefix + value; - } - - VSIVirtualHandle *Open(const char *prefixed_file_name, const char *access, bool bSetError, - CSLConstList /* papszOptions */) override { - auto file_name = StripPrefix(prefixed_file_name); - auto file_name_str = string(file_name); - auto &fs = FileSystem::GetFileSystem(context); - - // TODO: Double check that this is correct - FileOpenFlags flags; - auto len = strlen(access); - if (access[0] == 'r') { - flags = FileFlags::FILE_FLAGS_READ; - if (len > 1 && access[1] == '+') { - flags |= FileFlags::FILE_FLAGS_WRITE; - } - if (len > 2 && access[2] == '+') { - // might be "rb+" - flags |= FileFlags::FILE_FLAGS_WRITE; - } - } else if (access[0] == 'w') { - flags = FileFlags::FILE_FLAGS_WRITE; - if (!IsStdCharDev(file_name)) { - flags |= FileFlags::FILE_FLAGS_FILE_CREATE_NEW; - } - if (len > 1 && access[1] == '+') { - flags |= FileFlags::FILE_FLAGS_READ; - } - if (len > 2 && access[2] == '+') { - // might be "wb+" - flags |= FileFlags::FILE_FLAGS_READ; - } - } else if (access[0] == 'a') { - flags = FileFlags::FILE_FLAGS_APPEND; - if (len > 1 && access[1] == '+') { - flags |= FileFlags::FILE_FLAGS_READ; - } - if (len > 2 && access[2] == '+') { - // might be "ab+" - flags |= FileFlags::FILE_FLAGS_READ; - } - } else { - throw InternalException("Unknown file access type"); - } - - try { - // Check if the file is a directory - -#ifdef _WIN32 - if (!FileSystem::IsRemoteFile(file_name) && fs.DirectoryExists(file_name_str) && (flags.OpenForReading())) { - // We can't open a directory for reading on windows without special flags - // so just open nul instead, gdal will reject it when it tries to read - auto file = fs.OpenFile("nul", flags); - return new DuckDBFileHandle(std::move(file)); - } -#endif - - // If the file is remote and NOT in write mode, we can cache it. - if (FileSystem::IsRemoteFile(file_name_str) && !flags.OpenForWriting() && !flags.OpenForAppending()) { - - // Pass the direct IO flag to the file system since we use GDAL's caching instead - flags |= FileFlags::FILE_FLAGS_DIRECT_IO; - - auto file = fs.OpenFile(file_name, flags | FileCompressionType::AUTO_DETECT); - return VSICreateCachedFile(new DuckDBFileHandle(std::move(file))); - } else { - auto file = fs.OpenFile(file_name, flags | FileCompressionType::AUTO_DETECT); - return new DuckDBFileHandle(std::move(file)); - } - } catch (std::exception &ex) { - // Failed to open file via DuckDB File System. If this doesnt have a VSI prefix we can return an error here. - if (strncmp(file_name, "/vsi", 4) != 0 && !IsStdCharDev(file_name)) { - if (bSetError) { - VSIError(VSIE_FileError, "Failed to open file %s: %s", file_name, ex.what()); - } - return nullptr; - } - - // Fall back to GDAL instead (if external access is enabled) - if (!context.db->config.options.enable_external_access) { - if (bSetError) { - VSIError(VSIE_FileError, "Failed to open file %s with GDAL: External access is disabled", - file_name); - } - return nullptr; - } - - const auto handler = VSIFileManager::GetHandler(file_name); - if (!handler) { - if (bSetError) { - VSIError(VSIE_FileError, "Failed to open file %s: %s", file_name, ex.what()); - } - return nullptr; - } - - return handler->Open(file_name, access); - } - } - - int Stat(const char *prefixed_file_name, VSIStatBufL *pstatbuf, int n_flags) override { - auto file_name = StripPrefix(prefixed_file_name); - auto &fs = FileSystem::GetFileSystem(context); - - memset(pstatbuf, 0, sizeof(VSIStatBufL)); - - if (IsStdCharDev(file_name)) { - pstatbuf->st_mode = S_IFCHR; - return 0; - } - - if (!(fs.FileExists(file_name) || (!FileSystem::IsRemoteFile(file_name) && fs.DirectoryExists(file_name)))) { - return -1; - } - -#ifdef _WIN32 - if (!FileSystem::IsRemoteFile(file_name) && fs.DirectoryExists(file_name)) { - pstatbuf->st_mode = S_IFDIR; - return 0; - } -#endif - - unique_ptr file; - try { - file = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ | FileCompressionType::AUTO_DETECT | - FileFlags::FILE_FLAGS_NULL_IF_NOT_EXISTS); - } catch (std::exception &ex) { - return -1; - } - if (!file) { - return -1; - } - - pstatbuf->st_size = static_cast(fs.GetFileSize(*file)); - pstatbuf->st_mtime = fs.GetLastModifiedTime(*file); - - auto type = file->GetType(); - switch (type) { - // These are the only three types present on all platforms - case FileType::FILE_TYPE_REGULAR: - pstatbuf->st_mode = S_IFREG; - break; - case FileType::FILE_TYPE_DIR: - pstatbuf->st_mode = S_IFDIR; - break; - case FileType::FILE_TYPE_CHARDEV: - pstatbuf->st_mode = S_IFCHR; - break; - default: - // HTTPFS returns invalid type for everything basically. - if (FileSystem::IsRemoteFile(file_name)) { - pstatbuf->st_mode = S_IFREG; - } else { - return -1; - } - } - - return 0; - } - - bool IsLocal(const char *prefixed_file_name) override { - auto file_name = StripPrefix(prefixed_file_name); - return !FileSystem::IsRemoteFile(file_name); - } - - int Mkdir(const char *prefixed_dir_name, long mode) override { - auto dir_name = StripPrefix(prefixed_dir_name); - auto &fs = FileSystem::GetFileSystem(context); - - fs.CreateDirectory(dir_name); - return 0; - } - - int Rmdir(const char *prefixed_dir_name) override { - auto dir_name = StripPrefix(prefixed_dir_name); - auto &fs = FileSystem::GetFileSystem(context); - - fs.RemoveDirectory(dir_name); - return 0; - } - - int RmdirRecursive(const char *prefixed_dir_name) override { - auto dir_name = StripPrefix(prefixed_dir_name); - auto &fs = FileSystem::GetFileSystem(context); - - fs.RemoveDirectory(dir_name); - return 0; - } - - char **ReadDirEx(const char *prefixed_dir_name, int max_files) override { - auto dir_name = StripPrefix(prefixed_dir_name); - auto &fs = FileSystem::GetFileSystem(context); - - CPLStringList files; - auto files_count = 0; - fs.ListFiles(dir_name, [&](const string &file_name, bool is_dir) { - if (files_count >= max_files) { - return; - } - const auto tmp = AddPrefix(file_name); - files.AddString(tmp.c_str()); - files_count++; - }); - return files.StealList(); - } - - char **SiblingFiles(const char *prefixed_file_name) override { - auto file_name = StripPrefix(prefixed_file_name); - - auto &fs = FileSystem::GetFileSystem(context); - CPLStringList files; - - auto file_name_without_ext = - fs.JoinPath(StringUtil::GetFilePath(file_name), StringUtil::GetFileStem(file_name)); - auto file_glob = file_name_without_ext + ".*"; - - auto file_vector = fs.Glob(file_glob); - for (auto &file : file_vector) { - auto tmp = AddPrefix(file); - files.AddString(tmp.c_str()); - } - return files.StealList(); - } - - int HasOptimizedReadMultiRange(const char *pszPath) override { - return 0; - } - - int Unlink(const char *prefixed_file_name) override { - auto file_name = StripPrefix(prefixed_file_name); - auto &fs = FileSystem::GetFileSystem(context); - try { - fs.RemoveFile(file_name); - return 0; - } catch (std::exception &ex) { - return -1; - } - } -}; - -//-------------------------------------------------------------------------- -// GDALClientContextState -//-------------------------------------------------------------------------- -// -// We give every client a unique prefix so that multiple connections can -// use their own attached file systems. This is necessary because GDAL is -// not otherwise aware of the connection context. -// -GDALClientContextState::GDALClientContextState(ClientContext &context) : context(context) { - - // Create a new random prefix for this client - client_prefix = StringUtil::Format("/vsiduckdb-%s/", UUID::ToString(UUID::GenerateRandomUUID())); - - // Create a new file handler responding to this prefix - fs_handler = new DuckDBFileSystemHandler(client_prefix, context); - - // Register the file handler - VSIFileManager::InstallHandler(client_prefix, fs_handler); - - // Also pass a reference to the client context -} - -GDALClientContextState::~GDALClientContextState() { - // Uninstall the file handler for this prefix - VSIFileManager::RemoveHandler(client_prefix); - - // Delete the file handler - delete fs_handler; -} - -void GDALClientContextState::QueryEnd() { - -}; - -string GDALClientContextState::GetPrefix(const string &value) const { - // If the user explicitly asked for a VSI prefix, we don't add our own - if (StringUtil::StartsWith(value, "/vsi")) { - if (!context.db->config.options.enable_external_access) { - throw PermissionException("Cannot open file '%s' with VSI prefix: External access is disabled", value); - } - return value; - } - return client_prefix + value; -} - -GDALClientContextState &GDALClientContextState::GetOrCreate(ClientContext &context) { - auto gdal_state = context.registered_state->GetOrCreate("gdal", context); - return *gdal_state; -} - -} // namespace gdal - -} // namespace spatial diff --git a/spatial/src/spatial/gdal/functions/CMakeLists.txt b/spatial/src/spatial/gdal/functions/CMakeLists.txt deleted file mode 100644 index 72a0eaab..00000000 --- a/spatial/src/spatial/gdal/functions/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -set(EXTENSION_SOURCES - ${EXTENSION_SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/st_drivers.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_read.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_read_meta.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_write.cpp - PARENT_SCOPE - ) \ No newline at end of file diff --git a/spatial/src/spatial/gdal/functions/st_drivers.cpp b/spatial/src/spatial/gdal/functions/st_drivers.cpp deleted file mode 100644 index 3c66be1e..00000000 --- a/spatial/src/spatial/gdal/functions/st_drivers.cpp +++ /dev/null @@ -1,108 +0,0 @@ -#include "duckdb/parser/parsed_data/create_table_function_info.hpp" - -#include "spatial/common.hpp" -#include "spatial/gdal/functions.hpp" - -#include "ogrsf_frmts.h" - -namespace spatial { - -namespace gdal { - -// Simple table function to list all the drivers available -unique_ptr GdalDriversTableFunction::Bind(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names) { - return_types.emplace_back(LogicalType::VARCHAR); - return_types.emplace_back(LogicalType::VARCHAR); - return_types.emplace_back(LogicalType::BOOLEAN); - return_types.emplace_back(LogicalType::BOOLEAN); - return_types.emplace_back(LogicalType::BOOLEAN); - return_types.emplace_back(LogicalType::VARCHAR); - names.emplace_back("short_name"); - names.emplace_back("long_name"); - names.emplace_back("can_create"); - names.emplace_back("can_copy"); - names.emplace_back("can_open"); - names.emplace_back("help_url"); - - auto driver_count = GDALGetDriverCount(); - auto result = make_uniq(driver_count); - return std::move(result); -} - -unique_ptr GdalDriversTableFunction::Init(ClientContext &context, - TableFunctionInitInput &input) { - auto result = make_uniq(); - return std::move(result); -} - -void GdalDriversTableFunction::Execute(ClientContext &context, TableFunctionInput &input, DataChunk &output) { - auto &state = (State &)*input.global_state; - auto &bind_data = (BindData &)*input.bind_data; - - idx_t count = 0; - auto next_idx = MinValue(state.current_idx + STANDARD_VECTOR_SIZE, bind_data.driver_count); - - for (; state.current_idx < next_idx; state.current_idx++) { - auto driver = GDALGetDriver((int)state.current_idx); - - // Check if the driver is a vector driver - if (GDALGetMetadataItem(driver, GDAL_DCAP_VECTOR, nullptr) == nullptr) { - continue; - } - - auto short_name = Value::CreateValue(GDALGetDriverShortName(driver)); - auto long_name = Value::CreateValue(GDALGetDriverLongName(driver)); - - const char *create_flag = GDALGetMetadataItem(driver, GDAL_DCAP_CREATE, nullptr); - auto create_value = Value::CreateValue(create_flag != nullptr); - - const char *copy_flag = GDALGetMetadataItem(driver, GDAL_DCAP_CREATECOPY, nullptr); - auto copy_value = Value::CreateValue(copy_flag != nullptr); - const char *open_flag = GDALGetMetadataItem(driver, GDAL_DCAP_OPEN, nullptr); - auto open_value = Value::CreateValue(open_flag != nullptr); - - auto help_topic_flag = GDALGetDriverHelpTopic(driver); - auto help_topic_value = help_topic_flag == nullptr - ? Value(LogicalType::VARCHAR) - : Value(StringUtil::Format("https://gdal.org/%s", help_topic_flag)); - - output.data[0].SetValue(count, short_name); - output.data[1].SetValue(count, long_name); - output.data[2].SetValue(count, create_value); - output.data[3].SetValue(count, copy_value); - output.data[4].SetValue(count, open_value); - output.data[5].SetValue(count, help_topic_value); - count++; - } - output.SetCardinality(count); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}}; - -static constexpr const char *DOC_DESCRIPTION = R"( - Returns the list of supported GDAL drivers and file formats - - Note that far from all of these drivers have been tested properly, and some may require additional options to be passed to work as expected. If you run into any issues please first consult the [consult the GDAL docs](https://gdal.org/drivers/vector/index.html). -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - SELECT * FROM ST_Drivers(); -)"; - -//------------------------------------------------------------------------------ -// Register -//------------------------------------------------------------------------------ -void GdalDriversTableFunction::Register(DatabaseInstance &db) { - TableFunction func("ST_Drivers", {}, Execute, Bind, Init); - - ExtensionUtil::RegisterFunction(db, func); - DocUtil::AddDocumentation(db, "ST_Drivers", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace gdal - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/gdal/functions/st_read.cpp b/spatial/src/spatial/gdal/functions/st_read.cpp deleted file mode 100644 index ab66331a..00000000 --- a/spatial/src/spatial/gdal/functions/st_read.cpp +++ /dev/null @@ -1,644 +0,0 @@ -#include "duckdb/parser/parsed_data/create_table_function_info.hpp" -#include "duckdb/parser/expression/constant_expression.hpp" -#include "duckdb/parser/expression/function_expression.hpp" -#include "duckdb/parser/tableref/table_function_ref.hpp" -#include "duckdb/planner/filter/conjunction_filter.hpp" -#include "duckdb/planner/filter/constant_filter.hpp" -#include "duckdb/planner/table_filter.hpp" -#include "duckdb/parser/tableref.hpp" -#include "duckdb/function/function.hpp" -#include "duckdb/function/replacement_scan.hpp" - -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/gdal/functions.hpp" -#include "spatial/gdal/file_handler.hpp" -#include "spatial/core/geometry/geometry_writer.hpp" -#include "spatial/core/geometry/wkb_reader.hpp" - -#include "ogrsf_frmts.h" - -namespace spatial { - -namespace gdal { - -enum SpatialFilterType { Wkb, Rectangle }; - -struct SpatialFilter { - SpatialFilterType type; - explicit SpatialFilter(SpatialFilterType type_p) : type(type_p) {}; -}; - -struct RectangleSpatialFilter : SpatialFilter { - double min_x, min_y, max_x, max_y; - RectangleSpatialFilter(double min_x_p, double min_y_p, double max_x_p, double max_y_p) - : SpatialFilter(SpatialFilterType::Rectangle), min_x(min_x_p), min_y(min_y_p), max_x(max_x_p), max_y(max_y_p) { - } -}; - -struct WKBSpatialFilter : SpatialFilter { - OGRGeometryH geom; - explicit WKBSpatialFilter(const string &wkb_p) : SpatialFilter(SpatialFilterType::Wkb), geom(nullptr) { - auto ok = OGR_G_CreateFromWkb(wkb_p.c_str(), nullptr, &geom, (int)wkb_p.size()); - if (ok != OGRERR_NONE) { - throw InvalidInputException("WKBSpatialFilter: could not create geometry from WKB"); - } - } - ~WKBSpatialFilter() { - OGR_G_DestroyGeometry(geom); - } -}; - -static void TryApplySpatialFilter(OGRLayer *layer, SpatialFilter *spatial_filter) { - if (spatial_filter != nullptr) { - if (spatial_filter->type == SpatialFilterType::Rectangle) { - auto &rect = (RectangleSpatialFilter &)*spatial_filter; - layer->SetSpatialFilterRect(rect.min_x, rect.min_y, rect.max_x, rect.max_y); - } else if (spatial_filter->type == SpatialFilterType::Wkb) { - auto &filter = (WKBSpatialFilter &)*spatial_filter; - layer->SetSpatialFilter(OGRGeometry::FromHandle(filter.geom)); - } - } -} - -struct GdalScanFunctionData : public TableFunctionData { - int layer_idx; - bool sequential_layer_scan = false; - bool keep_wkb = false; - unordered_set geometry_column_ids; - unique_ptr spatial_filter; - idx_t max_threads; - // before they are renamed - vector all_names; - vector all_types; - ArrowTableType arrow_table; - - bool has_approximate_feature_count; - idx_t approximate_feature_count; - string raw_file_name; - string prefixed_file_name; - CPLStringList dataset_open_options; - CPLStringList dataset_allowed_drivers; - CPLStringList dataset_sibling_files; - CPLStringList layer_creation_options; -}; - -struct GdalScanLocalState : ArrowScanLocalState { - ArenaAllocator arena; - // We trust GDAL to produce valid WKB - core::WKBReader wkb_reader; - explicit GdalScanLocalState(unique_ptr current_chunk, ClientContext &context) - : ArrowScanLocalState(std::move(current_chunk), context), arena(BufferAllocator::Get(context)), wkb_reader(arena) { - } -}; - -struct GdalScanGlobalState : ArrowScanGlobalState { - GDALDatasetUniquePtr dataset; - atomic lines_read; - explicit GdalScanGlobalState(GDALDatasetUniquePtr dataset) : dataset(std::move(dataset)), lines_read(0) { - } -}; - -//------------------------------------------------------------------------------ -// Bind -//------------------------------------------------------------------------------ -unique_ptr GdalTableFunction::Bind(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names) { - - auto result = make_uniq(); - - // First scan for "options" parameter - // auto gdal_open_options = vector(); - auto options_param = input.named_parameters.find("open_options"); - if (options_param != input.named_parameters.end()) { - for (auto ¶m : ListValue::GetChildren(options_param->second)) { - result->dataset_open_options.AddString(StringValue::Get(param).c_str()); - } - } - - auto drivers_param = input.named_parameters.find("allowed_drivers"); - if (drivers_param != input.named_parameters.end()) { - for (auto ¶m : ListValue::GetChildren(drivers_param->second)) { - result->dataset_allowed_drivers.AddString(StringValue::Get(param).c_str()); - } - } - - // Now we can open the dataset - auto &ctx_state = GDALClientContextState::GetOrCreate(context); - - auto siblings_params = input.named_parameters.find("sibling_files"); - if (siblings_params != input.named_parameters.end()) { - for (auto ¶m : ListValue::GetChildren(siblings_params->second)) { - result->dataset_sibling_files.AddString(ctx_state.GetPrefix(StringValue::Get(param)).c_str()); - } - } - - result->raw_file_name = input.inputs[0].GetValue(); - result->prefixed_file_name = ctx_state.GetPrefix(result->raw_file_name); - - auto dataset = GDALDatasetUniquePtr(GDALDataset::Open( - result->prefixed_file_name.c_str(), GDAL_OF_VECTOR | GDAL_OF_VERBOSE_ERROR, result->dataset_allowed_drivers, - result->dataset_open_options, result->dataset_sibling_files)); - - if (dataset == nullptr) { - auto error = string(CPLGetLastErrorMsg()); - throw IOException("Could not open file: " + result->raw_file_name + " (" + error + ")"); - } - - // Double check that the dataset have any layers - if (dataset->GetLayerCount() <= 0) { - throw IOException("Dataset does not contain any layers"); - } - - // Now we can bind the additonal options - bool max_batch_size_set = false; - for (auto &kv : input.named_parameters) { - auto loption = StringUtil::Lower(kv.first); - if (loption == "layer") { - - // Find layer by index - if (kv.second.type() == LogicalType::INTEGER) { - auto layer_idx = IntegerValue::Get(kv.second); - if (layer_idx < 0) { - throw BinderException("Layer index must be positive"); - } - if (layer_idx > dataset->GetLayerCount()) { - throw BinderException( - StringUtil::Format("Layer index too large (%s > %s)", layer_idx, dataset->GetLayerCount())); - } - result->layer_idx = layer_idx; - } - - // Find layer by name - if (kv.second.type() == LogicalTypeId::VARCHAR) { - auto name = StringValue::Get(kv.second).c_str(); - bool found = false; - for (auto layer_idx = 0; layer_idx < dataset->GetLayerCount(); layer_idx++) { - if (strcmp(dataset->GetLayer(layer_idx)->GetName(), name) == 0) { - result->layer_idx = layer_idx; - found = true; - break; - } - } - if (!found) { - throw BinderException(StringUtil::Format("Layer '%s' could not be found in dataset", name)); - } - } - } - - if (loption == "spatial_filter_box" && kv.second.type() == core::GeoTypes::BOX_2D()) { - if (result->spatial_filter) { - throw BinderException("Only one spatial filter can be specified"); - } - auto &children = StructValue::GetChildren(kv.second); - auto minx = DoubleValue::Get(children[0]); - auto miny = DoubleValue::Get(children[1]); - auto maxx = DoubleValue::Get(children[2]); - auto maxy = DoubleValue::Get(children[3]); - result->spatial_filter = make_uniq(minx, miny, maxx, maxy); - } - - if (loption == "spatial_filter" && kv.second.type() == core::GeoTypes::WKB_BLOB()) { - if (result->spatial_filter) { - throw BinderException("Only one spatial filter can be specified"); - } - auto wkb = StringValue::Get(kv.second); - result->spatial_filter = make_uniq(wkb); - } - - if (loption == "max_threads") { - auto max_threads = IntegerValue::Get(kv.second); - if (max_threads <= 0) { - throw BinderException("'max_threads' parameter must be positive"); - } - result->max_threads = (idx_t)max_threads; - } - - if (loption == "sequential_layer_scan") { - result->sequential_layer_scan = BooleanValue::Get(kv.second); - } - - if (loption == "max_batch_size") { - auto max_batch_size = IntegerValue::Get(kv.second); - if (max_batch_size <= 0) { - throw BinderException("'max_batch_size' parameter must be positive"); - } - auto str = StringUtil::Format("MAX_FEATURES_IN_BATCH=%d", max_batch_size); - result->layer_creation_options.AddString(str.c_str()); - max_batch_size_set = true; - } - - if (loption == "keep_wkb") { - result->keep_wkb = BooleanValue::Get(kv.second); - } - } - - // set default max_threads - if (result->max_threads == 0) { - result->max_threads = context.db->NumberOfThreads(); - } - - // Defaults - result->layer_creation_options.AddString("INCLUDE_FID=NO"); - if (!max_batch_size_set) { - // Set default max batch size to standard vector size - auto str = StringUtil::Format("MAX_FEATURES_IN_BATCH=%d", STANDARD_VECTOR_SIZE); - result->layer_creation_options.AddString(str.c_str()); - } - - // Get the schema for the selected layer - auto layer = dataset->GetLayer(result->layer_idx); - - TryApplySpatialFilter(layer, result->spatial_filter.get()); - - // Check if we can get an approximate feature count - result->approximate_feature_count = 0; - result->has_approximate_feature_count = false; - if (!result->sequential_layer_scan) { - // Dont force compute the count if its expensive - auto count = layer->GetFeatureCount(false); - if (count > -1) { - result->approximate_feature_count = count; - result->has_approximate_feature_count = true; - } - } - - struct ArrowArrayStream stream; - if (!layer->GetArrowStream(&stream, result->layer_creation_options)) { - // layer is owned by GDAL, we do not need to destory it - throw IOException("Could not get arrow stream from layer"); - } - - struct ArrowSchema schema; - if (stream.get_schema(&stream, &schema) != 0) { - if (stream.release) { - stream.release(&stream); - } - throw IOException("Could not get arrow schema from layer"); - } - - // The Arrow API will return attributes in this order - // 1. FID column - // 2. all ogr field attributes - // 3. all geometry columns - - auto attribute_count = schema.n_children; - auto attributes = schema.children; - - result->all_names.reserve(attribute_count + 1); - names.reserve(attribute_count + 1); - - for (idx_t col_idx = 0; col_idx < (idx_t)attribute_count; col_idx++) { - auto &attribute = *attributes[col_idx]; - - const char ogc_flag[] = {'\x01', '\0', '\0', '\0', '\x14', '\0', '\0', '\0', 'A', 'R', 'R', 'O', 'W', - ':', 'e', 'x', 't', 'e', 'n', 's', 'i', 'o', 'n', ':', 'n', 'a', - 'm', 'e', '\a', '\0', '\0', '\0', 'o', 'g', 'c', '.', 'w', 'k', 'b'}; - - auto arrow_type = ArrowType::GetArrowLogicalType(DBConfig::GetConfig(context), attribute); - - auto column_name = string(attribute.name); - auto duckdb_type = arrow_type->GetDuckType(); - - if (duckdb_type.id() == LogicalTypeId::BLOB && attribute.metadata != nullptr && - strncmp(attribute.metadata, ogc_flag, sizeof(ogc_flag)) == 0) { - // This is a WKB geometry blob - result->arrow_table.AddColumn(col_idx, std::move(arrow_type)); - - if (result->keep_wkb) { - return_types.emplace_back(core::GeoTypes::WKB_BLOB()); - } else { - return_types.emplace_back(core::GeoTypes::GEOMETRY()); - if (column_name == "wkb_geometry") { - column_name = "geom"; - } - } - result->geometry_column_ids.insert(col_idx); - - } else if (attribute.dictionary) { - auto dictionary_type = ArrowType::GetArrowLogicalType(DBConfig::GetConfig(context), attribute); - return_types.emplace_back(dictionary_type->GetDuckType()); - arrow_type->SetDictionary(std::move(dictionary_type)); - result->arrow_table.AddColumn(col_idx, std::move(arrow_type)); - } else { - return_types.emplace_back(arrow_type->GetDuckType()); - result->arrow_table.AddColumn(col_idx, std::move(arrow_type)); - } - - // keep these around for projection/filter pushdown later - // does GDAL even allow duplicate/missing names? - result->all_names.push_back(column_name); - - if (column_name.empty()) { - names.push_back("v" + to_string(col_idx)); - } else { - names.push_back(column_name); - } - } - - schema.release(&schema); - stream.release(&stream); - - GdalTableFunction::RenameColumns(names); - - result->all_types = return_types; - - return std::move(result); -} - -void GdalTableFunction::RenameColumns(vector &names) { - unordered_map name_map; - for (auto &column_name : names) { - // put it all lower_case - auto low_column_name = StringUtil::Lower(column_name); - if (name_map.find(low_column_name) == name_map.end()) { - // Name does not exist yet - name_map[low_column_name]++; - } else { - // Name already exists, we add _x where x is the repetition number - string new_column_name = column_name + "_" + std::to_string(name_map[low_column_name]); - auto new_column_name_low = StringUtil::Lower(new_column_name); - while (name_map.find(new_column_name_low) != name_map.end()) { - // This name is already here due to a previous definition - name_map[low_column_name]++; - new_column_name = column_name + "_" + std::to_string(name_map[low_column_name]); - new_column_name_low = StringUtil::Lower(new_column_name); - } - column_name = new_column_name; - name_map[new_column_name_low]++; - } - } -} - -idx_t GdalTableFunction::MaxThreads(ClientContext &context, const FunctionData *bind_data_p) { - auto &data = bind_data_p->Cast(); - return data.max_threads; -} - -//----------------------------------------------------------------------------- -// Init global -//----------------------------------------------------------------------------- -unique_ptr GdalTableFunction::InitGlobal(ClientContext &context, - TableFunctionInitInput &input) { - auto &data = input.bind_data->Cast(); - - auto dataset = GDALDatasetUniquePtr( - GDALDataset::Open(data.prefixed_file_name.c_str(), GDAL_OF_VECTOR | GDAL_OF_VERBOSE_ERROR | GDAL_OF_READONLY, - data.dataset_allowed_drivers, data.dataset_open_options, data.dataset_sibling_files)); - if (dataset == nullptr) { - auto error = string(CPLGetLastErrorMsg()); - throw IOException("Could not open file: " + data.raw_file_name + " (" + error + ")"); - } - - auto global_state = make_uniq(std::move(dataset)); - auto &gstate = *global_state; - - // Open the layer - OGRLayer *layer = nullptr; - if (data.sequential_layer_scan) { - // Get the layer from the dataset by scanning through the layers - for (int i = 0; i < gstate.dataset->GetLayerCount(); i++) { - layer = gstate.dataset->GetLayer(i); - if (i == data.layer_idx) { - // desired layer found - break; - } - // else scan through and empty the layer - OGRFeature *feature; - while ((feature = layer->GetNextFeature()) != nullptr) { - OGRFeature::DestroyFeature(feature); - } - } - } else { - // Otherwise get the layer directly - layer = gstate.dataset->GetLayer(data.layer_idx); - } - - // Apply spatial filter (if we got one) - TryApplySpatialFilter(layer, data.spatial_filter.get()); - // TODO: Apply projection pushdown - - // Create arrow stream from layer - - gstate.stream = make_uniq(); - - // set layer options - if (!layer->GetArrowStream(&gstate.stream->arrow_array_stream, data.layer_creation_options)) { - throw IOException("Could not get arrow stream"); - } - - gstate.max_threads = GdalTableFunction::MaxThreads(context, input.bind_data.get()); - - if (input.CanRemoveFilterColumns()) { - gstate.projection_ids = input.projection_ids; - for (const auto &col_idx : input.column_ids) { - if (col_idx == COLUMN_IDENTIFIER_ROW_ID) { - gstate.scanned_types.emplace_back(LogicalType::ROW_TYPE); - } else { - gstate.scanned_types.push_back(data.all_types[col_idx]); - } - } - } - - return std::move(global_state); -} - -//----------------------------------------------------------------------------- -// Init Local -//----------------------------------------------------------------------------- -unique_ptr GdalTableFunction::InitLocal(ExecutionContext &context, - TableFunctionInitInput &input, - GlobalTableFunctionState *global_state_p) { - - auto &global_state = global_state_p->Cast(); - auto current_chunk = make_uniq(); - auto result = make_uniq(std::move(current_chunk), context.client); - result->column_ids = input.column_ids; - result->filters = input.filters.get(); - if (input.CanRemoveFilterColumns()) { - result->all_columns.Initialize(context.client, global_state.scanned_types); - } - - if (!ArrowScanParallelStateNext(context.client, input.bind_data.get(), *result, global_state)) { - return nullptr; - } - - return std::move(result); -} - -//----------------------------------------------------------------------------- -// Scan -//----------------------------------------------------------------------------- -void GdalTableFunction::Scan(ClientContext &context, TableFunctionInput &input, DataChunk &output) { - if (!input.local_state) { - return; - } - auto &data = input.bind_data->Cast(); - auto &state = input.local_state->Cast(); - auto &gstate = input.global_state->Cast(); - - //! Out of tuples in this chunk - if (state.chunk_offset >= (idx_t)state.chunk->arrow_array.length) { - if (!ArrowScanParallelStateNext(context, input.bind_data.get(), state, gstate)) { - return; - } - } - auto output_size = MinValue(STANDARD_VECTOR_SIZE, state.chunk->arrow_array.length - state.chunk_offset); - gstate.lines_read += output_size; - - if (gstate.CanRemoveFilterColumns()) { - state.all_columns.Reset(); - state.all_columns.SetCardinality(output_size); - ArrowToDuckDB(state, data.arrow_table.GetColumns(), state.all_columns, gstate.lines_read - output_size, false); - output.ReferenceColumns(state.all_columns, gstate.projection_ids); - } else { - output.SetCardinality(output_size); - ArrowToDuckDB(state, data.arrow_table.GetColumns(), output, gstate.lines_read - output_size, false); - } - - if (!data.keep_wkb) { - // Find the geometry columns - for (idx_t col_idx = 0; col_idx < state.column_ids.size(); col_idx++) { - auto mapped_idx = state.column_ids[col_idx]; - if (data.geometry_column_ids.find(mapped_idx) != data.geometry_column_ids.end()) { - // Found a geometry column - // Convert the WKB columns to a geometry column - state.arena.Reset(); - auto &wkb_vec = output.data[col_idx]; - Vector geom_vec(core::GeoTypes::GEOMETRY(), output_size); - UnaryExecutor::ExecuteWithNulls( - wkb_vec, geom_vec, output_size, [&](string_t input, ValidityMask &validity, idx_t out_idx) { - if (input.Empty()) { - validity.SetInvalid(out_idx); - return core::geometry_t {}; - } - auto geom = state.wkb_reader.Deserialize(input); - return core::Geometry::Serialize(geom, geom_vec); - }); - output.data[col_idx].ReferenceAndSetType(geom_vec); - } - } - } - - output.Verify(); - state.chunk_offset += output.size(); -} - -unique_ptr GdalTableFunction::Cardinality(ClientContext &context, const FunctionData *data) { - auto &gdal_data = data->Cast(); - auto result = make_uniq(); - - if (gdal_data.has_approximate_feature_count) { - result->has_estimated_cardinality = true; - result->estimated_cardinality = gdal_data.approximate_feature_count; - } - return result; -} - -unique_ptr GdalTableFunction::ReplacementScan(ClientContext &, ReplacementScanInput &input, - optional_ptr) { - auto &table_name = input.table_name; - auto lower_name = StringUtil::Lower(table_name); - // Check if the table name ends with some common geospatial file extensions - if (StringUtil::EndsWith(lower_name, ".gpkg") || StringUtil::EndsWith(lower_name, ".fgb")) { - - auto table_function = make_uniq(); - vector> children; - children.push_back(make_uniq(Value(table_name))); - table_function->function = make_uniq("ST_Read", std::move(children)); - return std::move(table_function); - } - // else not something we can replace - return nullptr; -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}}; - -static constexpr const char *DOC_DESCRIPTION = R"( - Read and import a variety of geospatial file formats using the GDAL library. - - The `ST_Read` table function is based on the [GDAL](https://gdal.org/index.html) translator library and enables reading spatial data from a variety of geospatial vector file formats as if they were DuckDB tables. - - > See [ST_Drivers](#st_drivers) for a list of supported file formats and drivers. - - Except for the `path` parameter, all parameters are optional. - - | Parameter | Type | Description | - | --------- | -----| ----------- | - | `path` | VARCHAR | The path to the file to read. Mandatory | - | `sequential_layer_scan` | BOOLEAN | If set to true, the table function will scan through all layers sequentially and return the first layer that matches the given layer name. This is required for some drivers to work properly, e.g., the OSM driver. | - | `spatial_filter` | WKB_BLOB | If set to a WKB blob, the table function will only return rows that intersect with the given WKB geometry. Some drivers may support efficient spatial filtering natively, in which case it will be pushed down. Otherwise the filtering is done by GDAL which may be much slower. | - | `open_options` | VARCHAR[] | A list of key-value pairs that are passed to the GDAL driver to control the opening of the file. E.g., the GeoJSON driver supports a FLATTEN_NESTED_ATTRIBUTES=YES option to flatten nested attributes. | - | `layer` | VARCHAR | The name of the layer to read from the file. If NULL, the first layer is returned. Can also be a layer index (starting at 0). | - | `allowed_drivers` | VARCHAR[] | A list of GDAL driver names that are allowed to be used to open the file. If empty, all drivers are allowed. | - | `sibling_files` | VARCHAR[] | A list of sibling files that are required to open the file. E.g., the ESRI Shapefile driver requires a .shx file to be present. Although most of the time these can be discovered automatically. | - | `spatial_filter_box` | BOX_2D | If set to a BOX_2D, the table function will only return rows that intersect with the given bounding box. Similar to spatial_filter. | - | `keep_wkb` | BOOLEAN | If set, the table function will return geometries in a wkb_geometry column with the type WKB_BLOB (which can be cast to BLOB) instead of GEOMETRY. This is useful if you want to use DuckDB with more exotic geometry subtypes that DuckDB spatial doesnt support representing in the GEOMETRY type yet. | - - Note that GDAL is single-threaded, so this table function will not be able to make full use of parallelism. - - By using `ST_Read`, the spatial extension also provides “replacement scans” for common geospatial file formats, allowing you to query files of these formats as if they were tables directly. - - ```sql - SELECT * FROM './path/to/some/shapefile/dataset.shp'; - ``` - - In practice this is just syntax-sugar for calling ST_Read, so there is no difference in performance. If you want to pass additional options, you should use the ST_Read table function directly. - - The following formats are currently recognized by their file extension: - - | Format | Extension | - | ------ | --------- | - | ESRI ShapeFile | .shp | - | GeoPackage | .gpkg | - | FlatGeoBuf | .fgb | -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -- Read a Shapefile - SELECT * FROM ST_Read('some/file/path/filename.shp'); - - -- Read a GeoJSON file - CREATE TABLE my_geojson_table AS SELECT * FROM ST_Read('some/file/path/filename.json'); -)"; - -//------------------------------------------------------------------------------ -// Register -//------------------------------------------------------------------------------ - -void GdalTableFunction::Register(DatabaseInstance &db) { - - TableFunctionSet set("ST_Read"); - TableFunction scan({LogicalType::VARCHAR}, GdalTableFunction::Scan, GdalTableFunction::Bind, - GdalTableFunction::InitGlobal, GdalTableFunction::InitLocal); - - scan.cardinality = GdalTableFunction::Cardinality; - scan.get_partition_data = ArrowTableFunction::ArrowGetPartitionData; - - scan.projection_pushdown = true; - - scan.named_parameters["open_options"] = LogicalType::LIST(LogicalType::VARCHAR); - scan.named_parameters["allowed_drivers"] = LogicalType::LIST(LogicalType::VARCHAR); - scan.named_parameters["sibling_files"] = LogicalType::LIST(LogicalType::VARCHAR); - scan.named_parameters["spatial_filter_box"] = core::GeoTypes::BOX_2D(); - scan.named_parameters["spatial_filter"] = core::GeoTypes::WKB_BLOB(); - scan.named_parameters["layer"] = LogicalType::VARCHAR; - scan.named_parameters["sequential_layer_scan"] = LogicalType::BOOLEAN; - scan.named_parameters["max_batch_size"] = LogicalType::INTEGER; - scan.named_parameters["keep_wkb"] = LogicalType::BOOLEAN; - set.AddFunction(scan); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Read", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); - - // Replacement scan - auto &config = DBConfig::GetConfig(db); - config.replacement_scans.emplace_back(GdalTableFunction::ReplacementScan); -} - -} // namespace gdal - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/gdal/functions/st_read_meta.cpp b/spatial/src/spatial/gdal/functions/st_read_meta.cpp deleted file mode 100644 index 595c7fec..00000000 --- a/spatial/src/spatial/gdal/functions/st_read_meta.cpp +++ /dev/null @@ -1,239 +0,0 @@ -#include "duckdb/parser/parsed_data/create_table_function_info.hpp" -#include "duckdb/parser/expression/constant_expression.hpp" -#include "duckdb/parser/expression/function_expression.hpp" -#include "duckdb/parser/tableref/table_function_ref.hpp" -#include "duckdb/common/multi_file_reader.hpp" - -#include "spatial/common.hpp" -#include "spatial/gdal/functions.hpp" -#include "spatial/gdal/file_handler.hpp" - -#include "ogrsf_frmts.h" -#include - -namespace spatial { - -namespace gdal { - -//------------------------------------------------------------------------------ -// Bind -//------------------------------------------------------------------------------ - -struct GDALMetadataBindData : public TableFunctionData { - vector file_names; -}; - -static LogicalType GEOMETRY_FIELD_TYPE = LogicalType::STRUCT({ - {"name", LogicalType::VARCHAR}, - {"type", LogicalType::VARCHAR}, - {"nullable", LogicalType::BOOLEAN}, - {"crs", LogicalType::STRUCT({ - {"name", LogicalType::VARCHAR}, - {"auth_name", LogicalType::VARCHAR}, - {"auth_code", LogicalType::VARCHAR}, - {"wkt", LogicalType::VARCHAR}, - {"proj4", LogicalType::VARCHAR}, - {"projjson", LogicalType::VARCHAR}, - })}, -}); - -static LogicalType STANDARD_FIELD_TYPE = LogicalType::STRUCT({ - {"name", LogicalType::VARCHAR}, - {"type", LogicalType::VARCHAR}, - {"subtype", LogicalType::VARCHAR}, - {"nullable", LogicalType::BOOLEAN}, - {"unique", LogicalType::BOOLEAN}, - {"width", LogicalType::BIGINT}, - {"precision", LogicalType::BIGINT}, -}); - -static LogicalType LAYER_TYPE = LogicalType::STRUCT({ - {"name", LogicalType::VARCHAR}, - {"feature_count", LogicalType::BIGINT}, - {"geometry_fields", LogicalType::LIST(GEOMETRY_FIELD_TYPE)}, - {"fields", LogicalType::LIST(STANDARD_FIELD_TYPE)}, -}); - -static unique_ptr Bind(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names) { - auto result = make_uniq(); - - auto multi_file_reader = MultiFileReader::Create(input.table_function); - result->file_names = - multi_file_reader->CreateFileList(context, input.inputs[0], FileGlobOptions::ALLOW_EMPTY)->GetAllFiles(); - - names.push_back("file_name"); - return_types.push_back(LogicalType::VARCHAR); - - names.push_back("driver_short_name"); - return_types.push_back(LogicalType::VARCHAR); - - names.push_back("driver_long_name"); - return_types.push_back(LogicalType::VARCHAR); - - names.push_back("layers"); - return_types.push_back(LogicalType::LIST(LAYER_TYPE)); - - // TODO: Add metadata, domains, relationships - /* - names.push_back("metadata"); - return_types.push_back(LogicalType::VARCHAR); - - names.push_back("domains"); - return_types.push_back(LogicalType::VARCHAR); - - names.push_back("relationships"); - return_types.push_back(LogicalType::VARCHAR); - */ - - return std::move(result); -} - -//------------------------------------------------------------------------------ -// Init -//------------------------------------------------------------------------------ -struct GDALMetadataState : public GlobalTableFunctionState { - idx_t current_file_idx = 0; -}; - -static unique_ptr Init(ClientContext &context, TableFunctionInitInput &input) { - auto result = make_uniq(); - return std::move(result); -} - -//------------------------------------------------------------------------------ -// Scan -//------------------------------------------------------------------------------ - -static Value GetLayerData(GDALDatasetUniquePtr &dataset) { - vector layer_values; - - for (const auto &layer : dataset->GetLayers()) { - child_list_t layer_value_fields; - - layer_value_fields.emplace_back("name", Value(layer->GetName())); - layer_value_fields.emplace_back("feature_count", Value(static_cast(layer->GetFeatureCount()))); - - vector geometry_fields; - for (const auto &field : layer->GetLayerDefn()->GetGeomFields()) { - child_list_t geometry_field_value_fields; - auto field_name = field->GetNameRef(); - if (std::strlen(field_name) == 0) { - field_name = "geom"; - } - geometry_field_value_fields.emplace_back("name", Value(field_name)); - geometry_field_value_fields.emplace_back("type", Value(OGRGeometryTypeToName(field->GetType()))); - geometry_field_value_fields.emplace_back("nullable", Value(static_cast(field->IsNullable()))); - - auto crs = field->GetSpatialRef(); - if (crs != nullptr) { - child_list_t crs_value_fields; - crs_value_fields.emplace_back("name", Value(crs->GetName())); - crs_value_fields.emplace_back("auth_name", Value(crs->GetAuthorityName(nullptr))); - crs_value_fields.emplace_back("auth_code", Value(crs->GetAuthorityCode(nullptr))); - - char *wkt_ptr = nullptr; - crs->exportToWkt(&wkt_ptr); - crs_value_fields.emplace_back("wkt", wkt_ptr ? Value(wkt_ptr) : Value()); - CPLFree(wkt_ptr); - - char *proj4_ptr = nullptr; - crs->exportToProj4(&proj4_ptr); - crs_value_fields.emplace_back("proj4", proj4_ptr ? Value(proj4_ptr) : Value()); - CPLFree(proj4_ptr); - - char *projjson_ptr = nullptr; - crs->exportToPROJJSON(&projjson_ptr, nullptr); - crs_value_fields.emplace_back("projjson", projjson_ptr ? Value(projjson_ptr) : Value()); - CPLFree(projjson_ptr); - - geometry_field_value_fields.emplace_back("crs", Value::STRUCT(crs_value_fields)); - } - - geometry_fields.push_back(Value::STRUCT(geometry_field_value_fields)); - } - layer_value_fields.emplace_back("geometry_fields", - Value::LIST(GEOMETRY_FIELD_TYPE, std::move(geometry_fields))); - - vector standard_fields; - for (const auto &field : layer->GetLayerDefn()->GetFields()) { - child_list_t standard_field_value_fields; - standard_field_value_fields.emplace_back("name", Value(field->GetNameRef())); - standard_field_value_fields.emplace_back("type", Value(OGR_GetFieldTypeName(field->GetType()))); - standard_field_value_fields.emplace_back("subtype", Value(OGR_GetFieldSubTypeName(field->GetSubType()))); - standard_field_value_fields.emplace_back("nullable", Value(field->IsNullable())); - standard_field_value_fields.emplace_back("unique", Value(field->IsUnique())); - standard_field_value_fields.emplace_back("width", Value(field->GetWidth())); - standard_field_value_fields.emplace_back("precision", Value(field->GetPrecision())); - standard_fields.push_back(Value::STRUCT(standard_field_value_fields)); - } - layer_value_fields.emplace_back("fields", Value::LIST(STANDARD_FIELD_TYPE, std::move(standard_fields))); - - layer_values.push_back(Value::STRUCT(layer_value_fields)); - } - - return Value::LIST(LAYER_TYPE, std::move(layer_values)); -} - -static void Scan(ClientContext &context, TableFunctionInput &input, DataChunk &output) { - auto &bind_data = input.bind_data->Cast(); - auto &state = input.global_state->Cast(); - - auto out_size = MinValue(STANDARD_VECTOR_SIZE, bind_data.file_names.size() - state.current_file_idx); - - for (idx_t out_idx = 0; out_idx < out_size; out_idx++, state.current_file_idx++) { - auto file_name = bind_data.file_names[state.current_file_idx]; - auto prefixed_file_name = GDALClientContextState::GetOrCreate(context).GetPrefix(file_name); - - GDALDatasetUniquePtr dataset; - try { - dataset = GDALDatasetUniquePtr( - GDALDataset::Open(prefixed_file_name.c_str(), GDAL_OF_VECTOR | GDAL_OF_VERBOSE_ERROR)); - } catch (...) { - // Just skip anything we cant open - out_idx--; - out_size--; - continue; - } - - output.data[0].SetValue(out_idx, file_name); - output.data[1].SetValue(out_idx, dataset->GetDriver()->GetDescription()); - output.data[2].SetValue(out_idx, dataset->GetDriver()->GetMetadataItem(GDAL_DMD_LONGNAME)); - output.data[3].SetValue(out_idx, GetLayerData(dataset)); - } - - output.SetCardinality(out_size); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}}; - -static constexpr const char *DOC_DESCRIPTION = R"( - Read the metadata from a variety of geospatial file formats using the GDAL library. - - The `ST_Read_Meta` table function accompanies the `ST_Read` table function, but instead of reading the contents of a file, this function scans the metadata instead. - Since the data model of the underlying GDAL library is quite flexible, most of the interesting metadata is within the returned `layers` column, which is a somewhat complex nested structure of DuckDB `STRUCT` and `LIST` types. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -- Find the coordinate reference system authority name and code for the first layers first geometry column in the file - SELECT - layers[1].geometry_fields[1].crs.auth_name as name, - layers[1].geometry_fields[1].crs.auth_code as code - FROM st_read_meta('../../tmp/data/amsterdam_roads.fgb'); -)"; - -//------------------------------------------------------------------------------ -// Register -//------------------------------------------------------------------------------ -void GdalMetadataFunction::Register(DatabaseInstance &db) { - TableFunction func("ST_Read_Meta", {LogicalType::VARCHAR}, Scan, Bind, Init); - ExtensionUtil::RegisterFunction(db, MultiFileReader::CreateFunctionSet(func)); - DocUtil::AddDocumentation(db, "ST_Read_Meta", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace gdal - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/gdal/functions/st_write.cpp b/spatial/src/spatial/gdal/functions/st_write.cpp deleted file mode 100644 index 083c1be7..00000000 --- a/spatial/src/spatial/gdal/functions/st_write.cpp +++ /dev/null @@ -1,575 +0,0 @@ -#include "duckdb/catalog/catalog.hpp" -#include "duckdb/common/types/value.hpp" -#include "duckdb/function/copy_function.hpp" -#include "duckdb/function/table_function.hpp" -#include "duckdb/main/client_context.hpp" -#include "duckdb/main/config.hpp" -#include "duckdb/parser/parsed_data/copy_info.hpp" -#include "duckdb/parser/parsed_data/create_copy_function_info.hpp" -#include "duckdb/parser/parsed_data/create_table_function_info.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/geometry/geometry_type.hpp" -#include "spatial/core/geometry/wkb_writer.hpp" -#include "spatial/gdal/functions.hpp" -#include "spatial/gdal/file_handler.hpp" - -#include "ogrsf_frmts.h" - -namespace spatial { - -namespace gdal { - -struct BindData : public TableFunctionData { - - string file_path; - vector field_sql_types; - vector field_names; - string driver_name; - string layer_name; - CPLStringList dataset_creation_options; - CPLStringList layer_creation_options; - string target_srs; - OGRwkbGeometryType geometry_type = wkbUnknown; - - BindData(string file_path, vector field_sql_types, vector field_names) - : file_path(std::move(file_path)), field_sql_types(std::move(field_sql_types)), - field_names(std::move(field_names)) { - } -}; - -struct LocalState : public LocalFunctionData { - ArenaAllocator arena; - explicit LocalState(ClientContext &context) : arena(BufferAllocator::Get(context)) { - } -}; - -struct GlobalState : public GlobalFunctionData { - mutex lock; - GDALDatasetUniquePtr dataset; - OGRLayer *layer; - vector> field_defs; - - GlobalState(GDALDatasetUniquePtr dataset, OGRLayer *layer, vector> field_defs) - : dataset(std::move(dataset)), layer(layer), field_defs(std::move(field_defs)) { - } -}; - -//===--------------------------------------------------------------------===// -// Bind -//===--------------------------------------------------------------------===// -static unique_ptr Bind(ClientContext &context, CopyFunctionBindInput &input, const vector &names, - const vector &sql_types) { - - auto bind_data = make_uniq(input.info.file_path, sql_types, names); - - // check all the options in the copy info - // and set - for (auto &option : input.info.options) { - if (StringUtil::Upper(option.first) == "DRIVER") { - auto set = option.second.front(); - if (set.type().id() == LogicalTypeId::VARCHAR) { - bind_data->driver_name = set.GetValue(); - } else { - throw BinderException("Driver name must be a string"); - } - } else if (StringUtil::Upper(option.first) == "LAYER_NAME") { - auto set = option.second.front(); - if (set.type().id() == LogicalTypeId::VARCHAR) { - bind_data->layer_name = set.GetValue(); - } else { - throw BinderException("Layer name must be a string"); - } - } else if (StringUtil::Upper(option.first) == "LAYER_CREATION_OPTIONS") { - auto set = option.second; - for (auto &s : set) { - if (s.type().id() != LogicalTypeId::VARCHAR) { - throw BinderException("Layer creation options must be strings"); - } - auto str = s.GetValue(); - bind_data->layer_creation_options.AddString(str.c_str()); - } - } else if (StringUtil::Upper(option.first) == "DATASET_CREATION_OPTIONS") { - auto set = option.second; - for (auto &s : set) { - if (s.type().id() != LogicalTypeId::VARCHAR) { - throw BinderException("Dataset creation options must be strings"); - } - auto str = s.GetValue(); - bind_data->dataset_creation_options.AddString(str.c_str()); - } - } else if (StringUtil::Upper(option.first) == "GEOMETRY_TYPE") { - auto &set = option.second.front(); - if (set.type().id() == LogicalTypeId::VARCHAR) { - auto type = set.GetValue(); - if (StringUtil::CIEquals(type, "POINT")) { - bind_data->geometry_type = wkbPoint; - } else if (StringUtil::CIEquals(type, "LINESTRING")) { - bind_data->geometry_type = wkbLineString; - } else if (StringUtil::CIEquals(type, "POLYGON")) { - bind_data->geometry_type = wkbPolygon; - } else if (StringUtil::CIEquals(type, "MULTIPOINT")) { - bind_data->geometry_type = wkbMultiPoint; - } else if (StringUtil::CIEquals(type, "MULTILINESTRING")) { - bind_data->geometry_type = wkbMultiLineString; - } else if (StringUtil::CIEquals(type, "MULTIPOLYGON")) { - bind_data->geometry_type = wkbMultiPolygon; - } else if (StringUtil::CIEquals(type, "GEOMETRYCOLLECTION")) { - bind_data->geometry_type = wkbGeometryCollection; - } else { - throw BinderException( - "Unknown geometry type '%s', expected one of 'POINT', 'LINESTRING', 'POLYGON', 'MULTIPOINT', " - "'MULTILINESTRING', 'MULTIPOLYGON', 'GEOMETRYCOLLECTION'", - type); - } - } else { - throw BinderException("Geometry type must be a string"); - } - } else if (StringUtil::Upper(option.first) == "SRS") { - auto &set = option.second.front(); - if (set.type().id() == LogicalTypeId::VARCHAR) { - bind_data->target_srs = set.GetValue(); - } else { - throw BinderException("SRS must be a string"); - } - } else { - throw BinderException("Unknown option '%s'", option.first); - } - // save dataset open options.. i guess? - } - - if (bind_data->driver_name.empty()) { - throw BinderException("Driver name must be specified"); - } - - if (bind_data->layer_name.empty()) { - // Default to the base name of the file - auto &fs = FileSystem::GetFileSystem(context); - bind_data->layer_name = fs.ExtractBaseName(bind_data->file_path); - } - - auto driver = GetGDALDriverManager()->GetDriverByName(bind_data->driver_name.c_str()); - if (!driver) { - throw BinderException("Unknown driver '%s'", bind_data->driver_name); - } - - // Try get the file extension from the driver - auto file_ext = driver->GetMetadataItem(GDAL_DMD_EXTENSION); - if (file_ext) { - input.file_extension = file_ext; - } else { - // Space separated list of file extensions - auto file_exts = driver->GetMetadataItem(GDAL_DMD_EXTENSIONS); - if (file_exts) { - auto exts = StringUtil::Split(file_exts, ' '); - if (!exts.empty()) { - input.file_extension = exts[0]; - } - } - } - - // Driver specific checks - if (bind_data->driver_name == "OpenFileGDB" && bind_data->geometry_type == wkbUnknown) { - throw BinderException("OpenFileGDB requires 'GEOMETRY_TYPE' parameter to be set when writing!"); - } - - return std::move(bind_data); -} - -//===--------------------------------------------------------------------===// -// Init Local -//===--------------------------------------------------------------------===// -static unique_ptr InitLocal(ExecutionContext &context, FunctionData &bind_data) { - auto local_data = make_uniq(context.client); - return std::move(local_data); -} - -//===--------------------------------------------------------------------===// -// Init Global -//===--------------------------------------------------------------------===// -static bool IsGeometryType(const LogicalType &type) { - return type == core::GeoTypes::WKB_BLOB() || type == core::GeoTypes::POINT_2D() || - type == core::GeoTypes::GEOMETRY(); -} - -static unique_ptr OGRFieldTypeFromLogicalType(const string &name, const LogicalType &type) { - // TODO: Set OGRFieldSubType for integers and integer lists - // TODO: Set string width? - - switch (type.id()) { - case LogicalTypeId::BOOLEAN: { - auto field = make_uniq(name.c_str(), OFTInteger); - field->SetSubType(OFSTBoolean); - return field; - } - case LogicalTypeId::TINYINT: { - // There is no subtype for byte? - return make_uniq(name.c_str(), OFTInteger); - } - case LogicalTypeId::SMALLINT: { - auto field = make_uniq(name.c_str(), OFTInteger); - field->SetSubType(OFSTInt16); - return field; - } - case LogicalTypeId::INTEGER: { - return make_uniq(name.c_str(), OFTInteger); - } - case LogicalTypeId::BIGINT: - return make_uniq(name.c_str(), OFTInteger64); - case LogicalTypeId::FLOAT: { - auto field = make_uniq(name.c_str(), OFTReal); - field->SetSubType(OFSTFloat32); - return field; - } - case LogicalTypeId::DOUBLE: - return make_uniq(name.c_str(), OFTReal); - case LogicalTypeId::VARCHAR: - return make_uniq(name.c_str(), OFTString); - case LogicalTypeId::BLOB: - return make_uniq(name.c_str(), OFTBinary); - case LogicalTypeId::DATE: - return make_uniq(name.c_str(), OFTDate); - case LogicalTypeId::TIME: - return make_uniq(name.c_str(), OFTTime); - case LogicalTypeId::TIMESTAMP: - case LogicalTypeId::TIMESTAMP_NS: - case LogicalTypeId::TIMESTAMP_MS: - case LogicalTypeId::TIMESTAMP_SEC: - case LogicalTypeId::TIMESTAMP_TZ: - return make_uniq(name.c_str(), OFTDateTime); - case LogicalTypeId::LIST: { - auto child_type = ListType::GetChildType(type); - switch (child_type.id()) { - case LogicalTypeId::BOOLEAN: { - auto field = make_uniq(name.c_str(), OFTIntegerList); - field->SetSubType(OFSTBoolean); - return field; - } - case LogicalTypeId::TINYINT: { - // There is no subtype for byte? - return make_uniq(name.c_str(), OFTIntegerList); - } - case LogicalTypeId::SMALLINT: { - auto field = make_uniq(name.c_str(), OFTIntegerList); - field->SetSubType(OFSTInt16); - return field; - } - case LogicalTypeId::INTEGER: - return make_uniq(name.c_str(), OFTIntegerList); - case LogicalTypeId::BIGINT: - return make_uniq(name.c_str(), OFTInteger64List); - case LogicalTypeId::FLOAT: { - auto field = make_uniq(name.c_str(), OFTRealList); - field->SetSubType(OFSTFloat32); - return field; - } - case LogicalTypeId::DOUBLE: - return make_uniq(name.c_str(), OFTRealList); - case LogicalTypeId::VARCHAR: - return make_uniq(name.c_str(), OFTStringList); - default: - throw NotImplementedException("Unsupported type for OGR: %s", type.ToString()); - } - } - default: - throw NotImplementedException("Unsupported type for OGR: %s", type.ToString()); - } -} -static unique_ptr InitGlobal(ClientContext &context, FunctionData &bind_data, - const string &file_path) { - - auto &gdal_data = bind_data.Cast(); - GDALDriver *driver = GetGDALDriverManager()->GetDriverByName(gdal_data.driver_name.c_str()); - if (!driver) { - throw IOException("Could not open driver"); - } - - // Create the dataset - auto &client_ctx = GDALClientContextState::GetOrCreate(context); - auto prefixed_path = client_ctx.GetPrefix(file_path); - auto dataset = GDALDatasetUniquePtr( - driver->Create(prefixed_path.c_str(), 0, 0, 0, GDT_Unknown, gdal_data.dataset_creation_options)); - if (!dataset) { - throw IOException("Could not open dataset"); - } - - // Set the SRS if provided - OGRSpatialReference srs; - if (!gdal_data.target_srs.empty()) { - srs.SetFromUserInput(gdal_data.target_srs.c_str()); - } - // Not all GDAL drivers check if the SRS is empty (cough cough GeoJSONSeq) - // so we have to pass nullptr if we want the default behavior. - OGRSpatialReference *srs_ptr = gdal_data.target_srs.empty() ? nullptr : &srs; - - auto layer = dataset->CreateLayer(gdal_data.layer_name.c_str(), srs_ptr, gdal_data.geometry_type, - gdal_data.layer_creation_options); - if (!layer) { - throw IOException("Could not create layer"); - } - - // Create the layer field definitions - idx_t geometry_field_count = 0; - vector> field_defs; - for (idx_t i = 0; i < gdal_data.field_names.size(); i++) { - auto &name = gdal_data.field_names[i]; - auto &type = gdal_data.field_sql_types[i]; - - if (IsGeometryType(type)) { - geometry_field_count++; - if (geometry_field_count > 1) { - throw NotImplementedException("Multiple geometry fields not supported yet"); - } - } else { - auto field = OGRFieldTypeFromLogicalType(name, type); - if (layer->CreateField(field.get()) != OGRERR_NONE) { - throw IOException("Could not create attribute field"); - } - // TODO: ^ Like we do here vvv - field_defs.push_back(std::move(field)); - } - } - auto global_data = make_uniq(std::move(dataset), layer, std::move(field_defs)); - - return std::move(global_data); -} - -//===--------------------------------------------------------------------===// -// Sink -//===--------------------------------------------------------------------===// - -static OGRGeometryUniquePtr OGRGeometryFromValue(const LogicalType &type, const Value &value, ArenaAllocator &arena) { - if (value.IsNull()) { - return nullptr; - } - - if (type == core::GeoTypes::WKB_BLOB()) { - auto str = value.GetValueUnsafe(); - - OGRGeometry *ptr; - size_t consumed; - auto ok = OGRGeometryFactory::createFromWkb(str.GetDataUnsafe(), nullptr, &ptr, str.GetSize(), wkbVariantIso, - consumed); - - if (ok != OGRERR_NONE) { - throw IOException("Could not parse WKB"); - } - return OGRGeometryUniquePtr(ptr); - } else if (type == core::GeoTypes::GEOMETRY()) { - auto blob = value.GetValueUnsafe(); - uint32_t size; - auto wkb = core::WKBWriter::Write(core::geometry_t(blob), &size, arena); - OGRGeometry *ptr; - auto ok = OGRGeometryFactory::createFromWkb(wkb, nullptr, &ptr, size, wkbVariantIso); - if (ok != OGRERR_NONE) { - throw IOException("Could not parse WKB"); - } - return OGRGeometryUniquePtr(ptr); - } else if (type == core::GeoTypes::POINT_2D()) { - auto children = StructValue::GetChildren(value); - auto x = children[0].GetValue(); - auto y = children[1].GetValue(); - auto ogr_point = new OGRPoint(x, y); - return OGRGeometryUniquePtr(ogr_point); - } else { - throw NotImplementedException("Unsupported geometry type"); - } -} - -static void SetOgrFieldFromValue(OGRFeature *feature, int field_idx, const LogicalType &type, const Value &value) { - // TODO: Set field by index always instead of by name for performance. - if (value.IsNull()) { - feature->SetFieldNull(field_idx); - return; - } - switch (type.id()) { - case LogicalTypeId::BOOLEAN: - feature->SetField(field_idx, value.GetValue()); - break; - case LogicalTypeId::TINYINT: - feature->SetField(field_idx, value.GetValue()); - break; - case LogicalTypeId::SMALLINT: - feature->SetField(field_idx, value.GetValue()); - break; - case LogicalTypeId::INTEGER: - feature->SetField(field_idx, value.GetValue()); - break; - case LogicalTypeId::BIGINT: - feature->SetField(field_idx, (GIntBig)value.GetValue()); - break; - case LogicalTypeId::FLOAT: - feature->SetField(field_idx, value.GetValue()); - break; - case LogicalTypeId::DOUBLE: - feature->SetField(field_idx, value.GetValue()); - break; - case LogicalTypeId::VARCHAR: - case LogicalTypeId::BLOB: { - auto str = value.GetValueUnsafe(); - feature->SetField(field_idx, (int)str.GetSize(), str.GetDataUnsafe()); - } break; - case LogicalTypeId::DATE: { - auto date = value.GetValueUnsafe(); - auto year = Date::ExtractYear(date); - auto month = Date::ExtractMonth(date); - auto day = Date::ExtractDay(date); - feature->SetField(field_idx, year, month, day, 0, 0, 0, 0); - } break; - case LogicalTypeId::TIME: { - auto time = value.GetValueUnsafe(); - auto hour = static_cast(time.micros / Interval::MICROS_PER_HOUR); - auto minute = static_cast((time.micros % Interval::MICROS_PER_HOUR) / Interval::MICROS_PER_MINUTE); - auto second = static_cast(static_cast(time.micros % Interval::MICROS_PER_MINUTE) / - static_cast(Interval::MICROS_PER_SEC)); - feature->SetField(field_idx, 0, 0, 0, hour, minute, second, 0); - } break; - case LogicalTypeId::TIMESTAMP: { - auto timestamp = value.GetValueUnsafe(); - auto date = Timestamp::GetDate(timestamp); - auto time = Timestamp::GetTime(timestamp); - auto year = Date::ExtractYear(date); - auto month = Date::ExtractMonth(date); - auto day = Date::ExtractDay(date); - auto hour = static_cast((time.micros % Interval::MICROS_PER_DAY) / Interval::MICROS_PER_HOUR); - auto minute = static_cast((time.micros % Interval::MICROS_PER_HOUR) / Interval::MICROS_PER_MINUTE); - auto second = static_cast(static_cast(time.micros % Interval::MICROS_PER_MINUTE) / - static_cast(Interval::MICROS_PER_SEC)); - feature->SetField(field_idx, year, month, day, hour, minute, second, 0); - } break; - case LogicalTypeId::TIMESTAMP_NS: { - auto timestamp = value.GetValueUnsafe(); - timestamp = Timestamp::FromEpochNanoSeconds(timestamp.value); - auto date = Timestamp::GetDate(timestamp); - auto time = Timestamp::GetTime(timestamp); - auto year = Date::ExtractYear(date); - auto month = Date::ExtractMonth(date); - auto day = Date::ExtractDay(date); - auto hour = static_cast((time.micros % Interval::MICROS_PER_DAY) / Interval::MICROS_PER_HOUR); - auto minute = static_cast((time.micros % Interval::MICROS_PER_HOUR) / Interval::MICROS_PER_MINUTE); - auto second = static_cast(static_cast(time.micros % Interval::MICROS_PER_MINUTE) / - static_cast(Interval::MICROS_PER_SEC)); - feature->SetField(field_idx, year, month, day, hour, minute, second, 0); - } break; - case LogicalTypeId::TIMESTAMP_MS: { - auto timestamp = value.GetValueUnsafe(); - timestamp = Timestamp::FromEpochMs(timestamp.value); - auto date = Timestamp::GetDate(timestamp); - auto time = Timestamp::GetTime(timestamp); - auto year = Date::ExtractYear(date); - auto month = Date::ExtractMonth(date); - auto day = Date::ExtractDay(date); - auto hour = static_cast((time.micros % Interval::MICROS_PER_DAY) / Interval::MICROS_PER_HOUR); - auto minute = static_cast((time.micros % Interval::MICROS_PER_HOUR) / Interval::MICROS_PER_MINUTE); - auto second = static_cast(static_cast(time.micros % Interval::MICROS_PER_MINUTE) / - static_cast(Interval::MICROS_PER_SEC)); - feature->SetField(field_idx, year, month, day, hour, minute, second, 0); - } break; - case LogicalTypeId::TIMESTAMP_SEC: { - auto timestamp = value.GetValueUnsafe(); - timestamp = Timestamp::FromEpochSeconds(timestamp.value); - auto date = Timestamp::GetDate(timestamp); - auto time = Timestamp::GetTime(timestamp); - auto year = Date::ExtractYear(date); - auto month = Date::ExtractMonth(date); - auto day = Date::ExtractDay(date); - auto hour = static_cast((time.micros % Interval::MICROS_PER_DAY) / Interval::MICROS_PER_HOUR); - auto minute = static_cast((time.micros % Interval::MICROS_PER_HOUR) / Interval::MICROS_PER_MINUTE); - auto second = static_cast(static_cast(time.micros % Interval::MICROS_PER_MINUTE) / - static_cast(Interval::MICROS_PER_SEC)); - feature->SetField(field_idx, year, month, day, hour, minute, second, 0); - } break; - case LogicalTypeId::TIMESTAMP_TZ: { - // Not sure what to with the timezone, just let GDAL parse it? - auto timestamp = value.GetValueUnsafe(); - auto time_str = Timestamp::ToString(timestamp); - feature->SetField(field_idx, time_str.c_str()); - } break; - default: - // TODO: Handle list types - throw NotImplementedException("Unsupported field type"); - } -} - -static void Sink(ExecutionContext &context, FunctionData &bdata, GlobalFunctionData &gstate, LocalFunctionData &lstate, - DataChunk &input) { - auto &bind_data = bdata.Cast(); - auto &global_state = gstate.Cast(); - auto &local_state = lstate.Cast(); - local_state.arena.Reset(); - - lock_guard d_lock(global_state.lock); - auto layer = global_state.layer; - - // Create the feature - input.Flatten(); - for (idx_t row_idx = 0; row_idx < input.size(); row_idx++) { - - auto feature = OGRFeatureUniquePtr(OGRFeature::CreateFeature(layer->GetLayerDefn())); - - // Geometry fields do not count towards the field index, so we need to keep track of them separately. - idx_t field_idx = 0; - for (idx_t col_idx = 0; col_idx < input.ColumnCount(); col_idx++) { - auto &type = bind_data.field_sql_types[col_idx]; - auto value = input.GetValue(col_idx, row_idx); - - if (IsGeometryType(type)) { - // TODO: check how many geometry fields there are and use the correct one. - auto geom = OGRGeometryFromValue(type, value, local_state.arena); - if (geom && bind_data.geometry_type != wkbUnknown && - geom->getGeometryType() != bind_data.geometry_type) { - auto got_name = - StringUtil::Replace(StringUtil::Upper(OGRGeometryTypeToName(geom->getGeometryType())), " ", ""); - auto expected_name = - StringUtil::Replace(StringUtil::Upper(OGRGeometryTypeToName(bind_data.geometry_type)), " ", ""); - throw InvalidInputException("Expected all geometries to be of type '%s', but got one of type '%s'", - expected_name, got_name); - } - - if (feature->SetGeometry(geom.get()) != OGRERR_NONE) { - throw IOException("Could not set geometry"); - } - } else { - SetOgrFieldFromValue(feature.get(), (int)field_idx, type, value); - field_idx++; - } - } - if (layer->CreateFeature(feature.get()) != OGRERR_NONE) { - throw IOException("Could not create feature"); - } - } -} - -//===--------------------------------------------------------------------===// -// Combine -//===--------------------------------------------------------------------===// - -static void Combine(ExecutionContext &context, FunctionData &bind_data, GlobalFunctionData &gstate, - LocalFunctionData &lstate) { -} - -//===--------------------------------------------------------------------===// -// Finalize -//===--------------------------------------------------------------------===// -static void Finalize(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate) { - auto &global_state = (GlobalState &)gstate; - global_state.dataset->FlushCache(); - global_state.dataset->Close(); -} - -void GdalCopyFunction::Register(DatabaseInstance &db) { - // register the copy function - CopyFunction info("GDAL"); - info.copy_to_bind = Bind; - info.copy_to_initialize_local = InitLocal; - info.copy_to_initialize_global = InitGlobal; - info.copy_to_sink = Sink; - info.copy_to_combine = Combine; - info.copy_to_finalize = Finalize; - info.extension = "gdal"; - - ExtensionUtil::RegisterFunction(db, info); -} - -} // namespace gdal - -} // namespace spatial diff --git a/spatial/src/spatial/gdal/module.cpp b/spatial/src/spatial/gdal/module.cpp deleted file mode 100644 index e0cee7ed..00000000 --- a/spatial/src/spatial/gdal/module.cpp +++ /dev/null @@ -1,72 +0,0 @@ -#include "spatial/gdal/module.hpp" -#include "spatial/gdal/functions.hpp" -#include "spatial/gdal/file_handler.hpp" -#include "spatial/common.hpp" - -#include "ogrsf_frmts.h" - -#include - -namespace spatial { - -namespace gdal { - -void GdalModule::Register(DatabaseInstance &db) { - - // Load GDAL (once) - static std::once_flag loaded; - std::call_once(loaded, [&]() { - // Register all embedded drivers (dont go looking for plugins) - OGRRegisterAllInternal(); - - // Set GDAL error handler - - CPLSetErrorHandler([](CPLErr e, int code, const char *raw_msg) { - // DuckDB doesnt do warnings, so we only throw on errors - if (e != CE_Failure && e != CE_Fatal) { - return; - } - - // If the error contains a /vsiduckdb-/ prefix, - // try to strip it off to make the errors more readable - auto msg = string(raw_msg); - auto path_pos = msg.find("/vsiduckdb-"); - if (path_pos != string::npos) { - // We found a path, strip it off - msg.erase(path_pos, 48); - } - - switch (code) { - case CPLE_NoWriteAccess: - throw PermissionException("GDAL Error (%d): %s", code, msg); - case CPLE_UserInterrupt: - throw InterruptException(); - case CPLE_OutOfMemory: - throw OutOfMemoryException("GDAL Error (%d): %s", code, msg); - case CPLE_NotSupported: - throw NotImplementedException("GDAL Error (%d): %s", code, msg); - case CPLE_AssertionFailed: - case CPLE_ObjectNull: - throw InternalException("GDAL Error (%d): %s", code, msg); - case CPLE_IllegalArg: - throw InvalidInputException("GDAL Error (%d): %s", code, msg); - case CPLE_AppDefined: - case CPLE_HttpResponse: - case CPLE_FileIO: - case CPLE_OpenFailed: - default: - throw IOException("GDAL Error (%d): %s", code, msg); - } - }); - }); - - // Register functions - GdalTableFunction::Register(db); - GdalDriversTableFunction::Register(db); - GdalCopyFunction::Register(db); - GdalMetadataFunction::Register(db); -} - -} // namespace gdal - -} // namespace spatial diff --git a/spatial/src/spatial/geographiclib/CMakeLists.txt b/spatial/src/spatial/geographiclib/CMakeLists.txt deleted file mode 100644 index 71f0b128..00000000 --- a/spatial/src/spatial/geographiclib/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -add_subdirectory(functions) - -set(EXTENSION_SOURCES - ${EXTENSION_SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/module.cpp - PARENT_SCOPE -) \ No newline at end of file diff --git a/spatial/src/spatial/geographiclib/functions/CMakeLists.txt b/spatial/src/spatial/geographiclib/functions/CMakeLists.txt deleted file mode 100644 index 8d22d992..00000000 --- a/spatial/src/spatial/geographiclib/functions/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -set(EXTENSION_SOURCES - ${EXTENSION_SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/st_distance_spheroid.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_distance_within_spheroid.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_length_spheroid.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_area_spheroid.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_perimeter_spheroid.cpp - PARENT_SCOPE -) \ No newline at end of file diff --git a/spatial/src/spatial/geographiclib/functions/st_area_spheroid.cpp b/spatial/src/spatial/geographiclib/functions/st_area_spheroid.cpp deleted file mode 100644 index ec1c25a7..00000000 --- a/spatial/src/spatial/geographiclib/functions/st_area_spheroid.cpp +++ /dev/null @@ -1,162 +0,0 @@ -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" - -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/functions/common.hpp" - -#include "spatial/geographiclib/functions.hpp" -#include "spatial/geographiclib/module.hpp" - -#include "GeographicLib/Geodesic.hpp" -#include "GeographicLib/PolygonArea.hpp" - -#include "cmath" - -namespace spatial { - -namespace geographiclib { - -using namespace core; - -//------------------------------------------------------------------------------ -// POLYGON_2D -//------------------------------------------------------------------------------ - -static void GeodesicPolygon2DFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - - auto &input = args.data[0]; - auto count = args.size(); - - auto &ring_vec = ListVector::GetEntry(input); - auto ring_entries = ListVector::GetData(ring_vec); - auto &coord_vec = ListVector::GetEntry(ring_vec); - auto &coord_vec_children = StructVector::GetEntries(coord_vec); - auto x_data = FlatVector::GetData(*coord_vec_children[0]); - auto y_data = FlatVector::GetData(*coord_vec_children[1]); - - const GeographicLib::Geodesic &geod = GeographicLib::Geodesic::WGS84(); - auto polygon_area = GeographicLib::PolygonArea(geod, false); - - UnaryExecutor::Execute(input, result, count, [&](list_entry_t polygon) { - auto polygon_offset = polygon.offset; - auto polygon_length = polygon.length; - - bool first = true; - double area = 0; - for (idx_t ring_idx = polygon_offset; ring_idx < polygon_offset + polygon_length; ring_idx++) { - auto ring = ring_entries[ring_idx]; - auto ring_offset = ring.offset; - auto ring_length = ring.length; - - polygon_area.Clear(); - // Note: the last point is the same as the first point, but geographiclib doesn't know that, - // so skip it. - for (idx_t coord_idx = ring_offset; coord_idx < ring_offset + ring_length - 1; coord_idx++) { - polygon_area.AddPoint(x_data[coord_idx], y_data[coord_idx]); - } - double ring_area; - double _perimeter; - polygon_area.Compute(false, true, _perimeter, ring_area); - if (first) { - // Add outer ring - area += std::abs(ring_area); - first = false; - } else { - // Subtract holes - area -= std::abs(ring_area); - } - } - return std::abs(area); - }); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -static double PolygonArea(const Geometry &poly, GeographicLib::PolygonArea &comp) { - double total_area = 0; - for (uint32_t ring_idx = 0; ring_idx < poly.Count(); ring_idx++) { - comp.Clear(); - auto &ring = Polygon::Part(poly, ring_idx); - // Note: the last point is the same as the first point, but geographiclib doesn't know that, - for (uint32_t coord_idx = 0; coord_idx < ring.Count() - 1; coord_idx++) { - auto coord = LineString::GetVertex(ring, coord_idx); - comp.AddPoint(coord.x, coord.y); - } - double ring_area; - double _perimeter; - // We use the absolute value here so that the actual winding order of the polygon rings dont matter. - comp.Compute(false, true, _perimeter, ring_area); - if (ring_idx == 0) { - // Add outer ring - total_area += std::abs(ring_area); - } else { - // Subtract holes - total_area -= std::abs(ring_area); - } - } - return std::abs(total_area); -} - -static void GeodesicGeometryFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - - auto &input = args.data[0]; - auto count = args.size(); - - const GeographicLib::Geodesic &geod = GeographicLib::Geodesic::WGS84(); - auto comp = GeographicLib::PolygonArea(geod, false); - - UnaryExecutor::Execute(input, result, count, [&](geometry_t input) { - auto geom = Geometry::Deserialize(arena, input); - double area = 0; - Geometry::ExtractPolygons(geom, [&](const Geometry &geom) { area += PolygonArea(geom, comp); }); - return area; - }); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ - -static constexpr const char *DOC_DESCRIPTION = R"( - Returns the area of a geometry in meters, using an ellipsoidal model of the earth - - The input geometry is assumed to be in the [EPSG:4326](https://en.wikipedia.org/wiki/World_Geodetic_System) coordinate system (WGS84), with [latitude, longitude] axis order and the area is returned in square meters. This function uses the [GeographicLib](https://geographiclib.sourceforge.io/) library, calculating the area using an ellipsoidal model of the earth. This is a highly accurate method for calculating the area of a polygon taking the curvature of the earth into account, but is also the slowest. - - Returns `0.0` for any geometry that is not a `POLYGON`, `MULTIPOLYGON` or `GEOMETRYCOLLECTION` containing polygon geometries. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}, {"category", "spheroid"}}; - -void GeographicLibFunctions::RegisterArea(DatabaseInstance &db) { - - // Area - ScalarFunctionSet set("ST_Area_Spheroid"); - set.AddFunction(ScalarFunction({GeoTypes::POLYGON_2D()}, LogicalType::DOUBLE, GeodesicPolygon2DFunction)); - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::DOUBLE, GeodesicGeometryFunction, nullptr, - nullptr, nullptr, GeometryFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Area_Spheroid", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geographiclib - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/geographiclib/functions/st_distance_spheroid.cpp b/spatial/src/spatial/geographiclib/functions/st_distance_spheroid.cpp deleted file mode 100644 index 9a8ed41a..00000000 --- a/spatial/src/spatial/geographiclib/functions/st_distance_spheroid.cpp +++ /dev/null @@ -1,74 +0,0 @@ -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" - -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geographiclib/functions.hpp" -#include "spatial/geographiclib/module.hpp" - -#include "GeographicLib/Geodesic.hpp" - -namespace spatial { - -namespace geographiclib { - -//------------------------------------------------------------------------------ -// POINT_2D -//------------------------------------------------------------------------------ -static void GeodesicPoint2DFunction(DataChunk &args, ExpressionState &state, Vector &result) { - using POINT_TYPE = StructTypeBinary; - using DISTANCE_TYPE = PrimitiveType; - auto count = args.size(); - auto &p1 = args.data[0]; - auto &p2 = args.data[1]; - - const GeographicLib::Geodesic &geod = GeographicLib::Geodesic::WGS84(); - - GenericExecutor::ExecuteBinary( - p1, p2, result, count, [&](POINT_TYPE p1, POINT_TYPE p2) { - double distance; - geod.Inverse(p1.a_val, p1.b_val, p2.a_val, p2.b_val, distance); - return distance; - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( -Returns the distance between two geometries in meters using a ellipsoidal model of the earths surface - -The input geometry is assumed to be in the [EPSG:4326](https://en.wikipedia.org/wiki/World_Geodetic_System) coordinate system (WGS84), with [latitude, longitude] axis order and the distance limit is expected to be in meters. This function uses the [GeographicLib](https://geographiclib.sourceforge.io/) library to solve the [inverse geodesic problem](https://en.wikipedia.org/wiki/Geodesics_on_an_ellipsoid#Solution_of_the_direct_and_inverse_problems), calculating the distance between two points using an ellipsoidal model of the earth. This is a highly accurate method for calculating the distance between two arbitrary points taking the curvature of the earths surface into account, but is also the slowest. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( --- Note: the coordinates are in WGS84 and [latitude, longitude] axis order --- Whats the distance between New York and Amsterdam (JFK and AMS airport)? -SELECT st_distance_spheroid( -st_point(40.6446, -73.7797), -st_point(52.3130, 4.7725) -); ----- -5863418.7459356235 --- Roughly 5863km! -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "relation"}, {"category", "spheroid"}}; - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void GeographicLibFunctions::RegisterDistance(DatabaseInstance &db) { - - // Distance - ScalarFunctionSet set("ST_Distance_Spheroid"); - set.AddFunction(ScalarFunction({spatial::core::GeoTypes::POINT_2D(), spatial::core::GeoTypes::POINT_2D()}, - LogicalType::DOUBLE, GeodesicPoint2DFunction)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Distance_Spheroid", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geographiclib - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/geographiclib/functions/st_distance_within_spheroid.cpp b/spatial/src/spatial/geographiclib/functions/st_distance_within_spheroid.cpp deleted file mode 100644 index e01648ff..00000000 --- a/spatial/src/spatial/geographiclib/functions/st_distance_within_spheroid.cpp +++ /dev/null @@ -1,70 +0,0 @@ -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" - -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geographiclib/functions.hpp" -#include "spatial/geographiclib/module.hpp" - -#include "GeographicLib/Geodesic.hpp" - -namespace spatial { - -namespace geographiclib { - -//------------------------------------------------------------------------------ -// POINT_2D -//------------------------------------------------------------------------------ -static void GeodesicPoint2DFunction(DataChunk &args, ExpressionState &state, Vector &result) { - using POINT_TYPE = StructTypeBinary; - using DISTANCE_TYPE = PrimitiveType; - using BOOL_TYPE = PrimitiveType; - auto count = args.size(); - auto &p1_vec = args.data[0]; - auto &p2_vec = args.data[1]; - auto &limit_vec = args.data[2]; - - const GeographicLib::Geodesic &geod = GeographicLib::Geodesic::WGS84(); - - GenericExecutor::ExecuteTernary( - p1_vec, p2_vec, limit_vec, result, count, [&](POINT_TYPE p1, POINT_TYPE p2, DISTANCE_TYPE limit) { - double distance; - geod.Inverse(p1.a_val, p1.b_val, p2.a_val, p2.b_val, distance); - return distance <= limit.val; - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ - -static constexpr const char *DOC_DESCRIPTION = R"( - Returns if two POINT_2D's are within a target distance in meters, using an ellipsoidal model of the earths surface - - The input geometry is assumed to be in the [EPSG:4326](https://en.wikipedia.org/wiki/World_Geodetic_System) coordinate system (WGS84), with [latitude, longitude] axis order and the distance is returned in meters. This function uses the [GeographicLib](https://geographiclib.sourceforge.io/) library to solve the [inverse geodesic problem](https://en.wikipedia.org/wiki/Geodesics_on_an_ellipsoid#Solution_of_the_direct_and_inverse_problems), calculating the distance between two points using an ellipsoidal model of the earth. This is a highly accurate method for calculating the distance between two arbitrary points taking the curvature of the earths surface into account, but is also the slowest. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "relation"}, {"category", "spheroid"}}; - -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void GeographicLibFunctions::RegisterDistanceWithin(DatabaseInstance &db) { - - // Distance - ScalarFunctionSet set("ST_DWithin_Spheroid"); - set.AddFunction( - ScalarFunction({spatial::core::GeoTypes::POINT_2D(), spatial::core::GeoTypes::POINT_2D(), LogicalType::DOUBLE}, - LogicalType::BOOLEAN, GeodesicPoint2DFunction)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_DWithin_Spheroid", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geographiclib - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/geographiclib/functions/st_length_spheroid.cpp b/spatial/src/spatial/geographiclib/functions/st_length_spheroid.cpp deleted file mode 100644 index 32bb0979..00000000 --- a/spatial/src/spatial/geographiclib/functions/st_length_spheroid.cpp +++ /dev/null @@ -1,128 +0,0 @@ -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" - -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/geographiclib/functions.hpp" -#include "spatial/geographiclib/module.hpp" - -#include "GeographicLib/Geodesic.hpp" -#include "GeographicLib/PolygonArea.hpp" - -namespace spatial { - -namespace geographiclib { - -using namespace core; - -//------------------------------------------------------------------------------ -// LINESTRING_2D -//------------------------------------------------------------------------------ -static void GeodesicLineString2DFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - - auto &line_vec = args.data[0]; - auto count = args.size(); - - auto &coord_vec = ListVector::GetEntry(line_vec); - auto &coord_vec_children = StructVector::GetEntries(coord_vec); - auto x_data = FlatVector::GetData(*coord_vec_children[0]); - auto y_data = FlatVector::GetData(*coord_vec_children[1]); - - const GeographicLib::Geodesic &geod = GeographicLib::Geodesic::WGS84(); - auto polygon_area = GeographicLib::PolygonArea(geod, true); - - UnaryExecutor::Execute(line_vec, result, count, [&](list_entry_t line) { - polygon_area.Clear(); - auto offset = line.offset; - auto length = line.length; - // Loop over the segments - for (idx_t j = offset; j < offset + length; j++) { - auto x = x_data[j]; - auto y = y_data[j]; - polygon_area.AddPoint(x, y); - } - double _area; - double linestring_length; - polygon_area.Compute(false, true, linestring_length, _area); - return linestring_length; - }); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -static double LineLength(const Geometry &line, GeographicLib::PolygonArea &comp) { - comp.Clear(); - for (uint32_t i = 0; i < LineString::VertexCount(line); i++) { - auto vert = LineString::GetVertex(line, i); - comp.AddPoint(vert.x, vert.y); - } - double _area; - double linestring_length; - comp.Compute(false, true, linestring_length, _area); - return linestring_length; -} - -static void GeodesicGeometryFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - - auto &input = args.data[0]; - auto count = args.size(); - - const GeographicLib::Geodesic &geod = GeographicLib::Geodesic::WGS84(); - auto comp = GeographicLib::PolygonArea(geod, true); - - UnaryExecutor::Execute(input, result, count, [&](geometry_t input) { - auto geom = Geometry::Deserialize(arena, input); - double length = 0.0; - Geometry::ExtractLines(geom, [&](const Geometry &line) { length += LineLength(line, comp); }); - return length; - }); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( -Returns the length of the input geometry in meters, using a ellipsoidal model of the earth - -The input geometry is assumed to be in the [EPSG:4326](https://en.wikipedia.org/wiki/World_Geodetic_System) coordinate system (WGS84), with [latitude, longitude] axis order and the length is returned in square meters. This function uses the [GeographicLib](https://geographiclib.sourceforge.io/) library, calculating the length using an ellipsoidal model of the earth. This is a highly accurate method for calculating the length of a line geometry taking the curvature of the earth into account, but is also the slowest. - -Returns `0.0` for any geometry that is not a `LINESTRING`, `MULTILINESTRING` or `GEOMETRYCOLLECTION` containing line geometries. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}, {"category", "spheroid"}}; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void GeographicLibFunctions::RegisterLength(DatabaseInstance &db) { - - // Length - ScalarFunctionSet set("ST_Length_Spheroid"); - set.AddFunction(ScalarFunction({GeoTypes::LINESTRING_2D()}, LogicalType::DOUBLE, GeodesicLineString2DFunction)); - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::DOUBLE, GeodesicGeometryFunction, nullptr, - nullptr, nullptr, GeometryFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Length_Spheroid", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geographiclib - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/geographiclib/functions/st_perimeter_spheroid.cpp b/spatial/src/spatial/geographiclib/functions/st_perimeter_spheroid.cpp deleted file mode 100644 index e9e219bc..00000000 --- a/spatial/src/spatial/geographiclib/functions/st_perimeter_spheroid.cpp +++ /dev/null @@ -1,144 +0,0 @@ -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" - -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/functions/common.hpp" -#include "spatial/geographiclib/functions.hpp" -#include "spatial/geographiclib/module.hpp" - -#include "spatial/geographiclib/functions.hpp" -#include "spatial/geographiclib/module.hpp" - -#include "GeographicLib/Geodesic.hpp" -#include "GeographicLib/PolygonArea.hpp" - -namespace spatial { - -namespace geographiclib { - -using namespace core; - -//------------------------------------------------------------------------------ -// POLYGON_2D -//------------------------------------------------------------------------------ -static void GeodesicPolygon2DFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - - auto &input = args.data[0]; - auto count = args.size(); - - auto &ring_vec = ListVector::GetEntry(input); - auto ring_entries = ListVector::GetData(ring_vec); - auto &coord_vec = ListVector::GetEntry(ring_vec); - auto &coord_vec_children = StructVector::GetEntries(coord_vec); - auto x_data = FlatVector::GetData(*coord_vec_children[0]); - auto y_data = FlatVector::GetData(*coord_vec_children[1]); - - const GeographicLib::Geodesic &geod = GeographicLib::Geodesic::WGS84(); - auto polygon_area = GeographicLib::PolygonArea(geod, false); - - UnaryExecutor::Execute(input, result, count, [&](list_entry_t polygon) { - auto polygon_offset = polygon.offset; - auto polygon_length = polygon.length; - double perimeter = 0; - for (idx_t ring_idx = polygon_offset; ring_idx < polygon_offset + polygon_length; ring_idx++) { - auto ring = ring_entries[ring_idx]; - auto ring_offset = ring.offset; - auto ring_length = ring.length; - - polygon_area.Clear(); - // Note: the last point is the same as the first point, but geographiclib doesn't know that, - // so skip it. - for (idx_t coord_idx = ring_offset; coord_idx < ring_offset + ring_length - 1; coord_idx++) { - polygon_area.AddPoint(x_data[coord_idx], y_data[coord_idx]); - } - double _ring_area; - double ring_perimeter; - polygon_area.Compute(false, true, ring_perimeter, _ring_area); - perimeter += ring_perimeter; - } - return perimeter; - }); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -static double PolygonPerimeter(const Geometry &poly, GeographicLib::PolygonArea &comp) { - - double total_perimeter = 0; - for (auto &ring : Polygon::Parts(poly)) { - comp.Clear(); - // Note: the last point is the same as the first point, but geographiclib doesn't know that, - // so skip it. - for (uint32_t coord_idx = 0; coord_idx < ring.Count() - 1; coord_idx++) { - auto coord = LineString::GetVertex(ring, coord_idx); - comp.AddPoint(coord.x, coord.y); - } - double _ring_area; - double perimeter; - comp.Compute(false, true, perimeter, _ring_area); - total_perimeter += perimeter; - } - return total_perimeter; -} - -static void GeodesicGeometryFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GeometryFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - - auto &input = args.data[0]; - auto count = args.size(); - - const GeographicLib::Geodesic &geod = GeographicLib::Geodesic::WGS84(); - auto comp = GeographicLib::PolygonArea(geod, false); - - UnaryExecutor::Execute(input, result, count, [&](geometry_t input) { - auto geom = Geometry::Deserialize(arena, input); - auto length = 0.0; - Geometry::ExtractPolygons(geom, [&](const Geometry &poly) { length += PolygonPerimeter(poly, comp); }); - return length; - }); - - if (count == 1) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns the length of the perimeter in meters using an ellipsoidal model of the earths surface - - The input geometry is assumed to be in the [EPSG:4326](https://en.wikipedia.org/wiki/World_Geodetic_System) coordinate system (WGS84), with [latitude, longitude] axis order and the length is returned in meters. This function uses the [GeographicLib](https://geographiclib.sourceforge.io/) library, calculating the perimeter using an ellipsoidal model of the earth. This is a highly accurate method for calculating the perimeter of a polygon taking the curvature of the earth into account, but is also the slowest. - - Returns `0.0` for any geometry that is not a `POLYGON`, `MULTIPOLYGON` or `GEOMETRYCOLLECTION` containing polygon geometries. -)"; - -static constexpr const char *DOC_EXAMPLE = R"()"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}, {"category", "spheroid"}}; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void GeographicLibFunctions::RegisterPerimeter(DatabaseInstance &db) { - - // Perimiter - ScalarFunctionSet set("ST_Perimeter_Spheroid"); - set.AddFunction(ScalarFunction({GeoTypes::POLYGON_2D()}, LogicalType::DOUBLE, GeodesicPolygon2DFunction)); - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::DOUBLE, GeodesicGeometryFunction, nullptr, - nullptr, nullptr, GeometryFunctionLocalState::Init)); - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Perimeter_Spheroid", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geographiclib - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/geographiclib/module.cpp b/spatial/src/spatial/geographiclib/module.cpp deleted file mode 100644 index 3b204d31..00000000 --- a/spatial/src/spatial/geographiclib/module.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include "spatial/geographiclib/module.hpp" -#include "spatial/geographiclib/functions.hpp" - -#include "spatial/common.hpp" - -namespace spatial { - -namespace geographiclib { - -void GeographicLibModule::Register(DatabaseInstance &db) { - - // Register functions - GeographicLibFunctions::Register(db); -} - -} // namespace geographiclib - -} // namespace spatial diff --git a/spatial/src/spatial/geos/CMakeLists.txt b/spatial/src/spatial/geos/CMakeLists.txt deleted file mode 100644 index bcae619e..00000000 --- a/spatial/src/spatial/geos/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -add_subdirectory(functions) -set(EXTENSION_SOURCES - ${EXTENSION_SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/module.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/geos_wrappers.cpp - PARENT_SCOPE - ) \ No newline at end of file diff --git a/spatial/src/spatial/geos/functions/CMakeLists.txt b/spatial/src/spatial/geos/functions/CMakeLists.txt deleted file mode 100644 index 7ae78c43..00000000 --- a/spatial/src/spatial/geos/functions/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -add_subdirectory(scalar) - -set(EXTENSION_SOURCES - ${EXTENSION_SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/aggregate.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/cast.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/common.cpp - PARENT_SCOPE - ) \ No newline at end of file diff --git a/spatial/src/spatial/geos/functions/aggregate.cpp b/spatial/src/spatial/geos/functions/aggregate.cpp deleted file mode 100644 index d4aec18b..00000000 --- a/spatial/src/spatial/geos/functions/aggregate.cpp +++ /dev/null @@ -1,220 +0,0 @@ -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/parser/parsed_data/create_aggregate_function_info.hpp" - -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/aggregate.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "geos_c.h" - -namespace spatial { - -namespace geos { - -struct GEOSAggState { - GEOSGeometry *geom = nullptr; - GEOSContextHandle_t context = nullptr; - - ~GEOSAggState() { - if (geom) { - GEOSGeom_destroy_r(context, geom); - geom = nullptr; - } - if (context) { - GEOS_finish_r(context); - context = nullptr; - } - } -}; - -//------------------------------------------------------------------------ -// INTERSECTION -//------------------------------------------------------------------------ -struct IntersectionAggFunction { - template - static void Initialize(STATE &state) { - state.geom = nullptr; - state.context = GEOS_init_r(); - } - - template - static void Combine(const STATE &source, STATE &target, AggregateInputData &data) { - if (!source.geom) { - return; - } - if (!target.geom) { - target.geom = GEOSGeom_clone_r(target.context, source.geom); - return; - } - auto curr = target.geom; - target.geom = GEOSIntersection_r(target.context, curr, source.geom); - GEOSGeom_destroy_r(target.context, curr); - } - - template - static void Operation(STATE &state, const INPUT_TYPE &input, AggregateUnaryInput &) { - if (!state.geom) { - state.geom = DeserializeGEOSGeometry(input, state.context); - } else { - auto next = DeserializeGEOSGeometry(input, state.context); - auto curr = state.geom; - state.geom = GEOSIntersection_r(state.context, curr, next); - GEOSGeom_destroy_r(state.context, next); - GEOSGeom_destroy_r(state.context, curr); - } - } - - template - static void ConstantOperation(STATE &state, const INPUT_TYPE &input, AggregateUnaryInput &, idx_t count) { - // There is no point in doing anything else, intersection is idempotent - if (!state.geom) { - state.geom = DeserializeGEOSGeometry(input, state.context); - } - } - - template - static void Finalize(STATE &state, T &target, AggregateFinalizeData &finalize_data) { - if (!state.geom) { - finalize_data.ReturnNull(); - } else { - target = SerializeGEOSGeometry(finalize_data.result, state.geom, state.context); - } - } - - template - static void Destroy(STATE &state, AggregateInputData &) { - if (state.geom) { - GEOSGeom_destroy_r(state.context, state.geom); - state.geom = nullptr; - } - if (state.context) { - GEOS_finish_r(state.context); - state.context = nullptr; - } - } - - static bool IgnoreNull() { - return true; - } -}; - -//------------------------------------------------------------------------ -// UNION -//------------------------------------------------------------------------ - -struct UnionAggFunction { - template - static void Initialize(STATE &state) { - state.geom = nullptr; - state.context = GEOS_init_r(); - } - - template - static void Combine(const STATE &source, STATE &target, AggregateInputData &data) { - if (!source.geom) { - return; - } - if (!target.geom) { - target.geom = GEOSGeom_clone_r(target.context, source.geom); - return; - } - auto curr = target.geom; - target.geom = GEOSUnion_r(target.context, curr, source.geom); - GEOSGeom_destroy_r(target.context, curr); - } - - template - static void Operation(STATE &state, const INPUT_TYPE &input, AggregateUnaryInput &) { - if (!state.geom) { - state.geom = DeserializeGEOSGeometry(input, state.context); - } else { - auto next = DeserializeGEOSGeometry(input, state.context); - auto curr = state.geom; - state.geom = GEOSUnion_r(state.context, curr, next); - GEOSGeom_destroy_r(state.context, next); - GEOSGeom_destroy_r(state.context, curr); - } - } - - template - static void ConstantOperation(STATE &state, const INPUT_TYPE &input, AggregateUnaryInput &, idx_t count) { - // There is no point in doing anything else, union is idempotent - if (!state.geom) { - state.geom = DeserializeGEOSGeometry(input, state.context); - } - } - - template - static void Finalize(STATE &state, T &target, AggregateFinalizeData &finalize_data) { - if (!state.geom) { - finalize_data.ReturnNull(); - } else { - target = SerializeGEOSGeometry(finalize_data.result, state.geom, state.context); - } - } - - template - static void Destroy(STATE &state, AggregateInputData &) { - if (state.geom) { - GEOSGeom_destroy_r(state.context, state.geom); - state.geom = nullptr; - } - if (state.context) { - GEOS_finish_r(state.context); - state.context = nullptr; - } - } - - static bool IgnoreNull() { - return true; - } -}; - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; -static constexpr const char *INTERSECTION_DOC_DESCRIPTION = R"( - Computes the intersection of a set of geometries -)"; -static constexpr const char *INTERSECTION_DOC_EXAMPLE = R"( - -)"; - -static constexpr const char *UNION_DOC_DESCRIPTION = R"( - Computes the union of a set of input geometries -)"; - -static constexpr const char *UNION_DOC_EXAMPLE = R"( - -)"; - -//------------------------------------------------------------------------ -// Register -//------------------------------------------------------------------------ -void GeosAggregateFunctions::Register(DatabaseInstance &db) { - - AggregateFunctionSet st_intersection_agg("ST_Intersection_Agg"); - st_intersection_agg.AddFunction( - AggregateFunction::UnaryAggregateDestructor(core::GeoTypes::GEOMETRY(), - core::GeoTypes::GEOMETRY())); - - ExtensionUtil::RegisterFunction(db, st_intersection_agg); - DocUtil::AddDocumentation(db, "ST_Intersection_Agg", INTERSECTION_DOC_DESCRIPTION, INTERSECTION_DOC_EXAMPLE, - DOC_TAGS); - - AggregateFunctionSet st_union_agg("ST_Union_Agg"); - st_union_agg.AddFunction( - AggregateFunction::UnaryAggregateDestructor(core::GeoTypes::GEOMETRY(), - core::GeoTypes::GEOMETRY())); - - ExtensionUtil::RegisterFunction(db, st_union_agg); - DocUtil::AddDocumentation(db, "ST_Union_Agg", UNION_DOC_DESCRIPTION, UNION_DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/geos/functions/cast.cpp b/spatial/src/spatial/geos/functions/cast.cpp deleted file mode 100644 index 6f6d0013..00000000 --- a/spatial/src/spatial/geos/functions/cast.cpp +++ /dev/null @@ -1,33 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/cast.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "duckdb/function/cast/cast_function_set.hpp" -#include "duckdb/common/operator/cast_operators.hpp" - -namespace spatial { - -namespace geos { - -static bool WKBToWKTCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { - auto ctx = GeosContextWrapper(); - auto reader = ctx.CreateWKBReader(); - auto writer = ctx.CreateWKTWriter(); - writer.SetTrim(true); - - UnaryExecutor::Execute(source, result, count, [&](string_t input) { - auto geom = reader.Read(input); - return writer.Write(geom, result); - }); - - return true; -} - -void GeosCastFunctions::Register(DatabaseInstance &db) { - ExtensionUtil::RegisterCastFunction(db, core::GeoTypes::WKB_BLOB(), LogicalType::VARCHAR, WKBToWKTCast); -} - -} // namespace geos - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/geos/functions/common.cpp b/spatial/src/spatial/geos/functions/common.cpp deleted file mode 100644 index dde8977f..00000000 --- a/spatial/src/spatial/geos/functions/common.cpp +++ /dev/null @@ -1,38 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/geos/functions/common.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -GEOSFunctionLocalState::GEOSFunctionLocalState(ClientContext &context) : ctx(), arena(BufferAllocator::Get(context)) { - // TODO: Set GEOS error handler - // GEOSContext_setErrorMessageHandler_r() -} - -unique_ptr GEOSFunctionLocalState::Init(ExpressionState &state, const BoundFunctionExpression &expr, - FunctionData *bind_data) { - return make_uniq(state.GetContext()); -} - -unique_ptr GEOSFunctionLocalState::InitCast(CastLocalStateParameters ¶meters) { - return make_uniq(*parameters.context); -} - -GEOSFunctionLocalState &GEOSFunctionLocalState::ResetAndGet(CastParameters ¶meters) { - auto &local_state = parameters.local_state->Cast(); - local_state.arena.Reset(); - return local_state; -} - -GEOSFunctionLocalState &GEOSFunctionLocalState::ResetAndGet(ExpressionState &state) { - auto &local_state = ExecuteFunctionState::GetFunctionState(state)->Cast(); - local_state.arena.Reset(); - return local_state; -} - -} // namespace geos - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/geos/functions/scalar/CMakeLists.txt b/spatial/src/spatial/geos/functions/scalar/CMakeLists.txt deleted file mode 100644 index dabb15e3..00000000 --- a/spatial/src/spatial/geos/functions/scalar/CMakeLists.txt +++ /dev/null @@ -1,38 +0,0 @@ -set(EXTENSION_SOURCES - ${EXTENSION_SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/st_boundary.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_buffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_centroid.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_contains.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_containsproperly.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_convex_hull.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_covered_by.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_covers.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_crosses.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_difference.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_disjoint.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_distance.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_distance_within.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_envelope.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_equals.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_intersection.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_intersects.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_is_ring.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_is_simple.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_is_valid.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_linemerge.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_makevalid.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_normalize.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_shortestline.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_overlaps.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_pointonsurface.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_reduceprecision.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_removerepeatedpoints.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_reverse.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_simplify_preserve_topology.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_simplify.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_touches.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_union.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/st_within.cpp - PARENT_SCOPE - ) \ No newline at end of file diff --git a/spatial/src/spatial/geos/functions/scalar/st_boundary.cpp b/spatial/src/spatial/geos/functions/scalar/st_boundary.cpp deleted file mode 100644 index a8699474..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_boundary.cpp +++ /dev/null @@ -1,63 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void BoundaryFunction(DataChunk &args, ExpressionState &state, Vector &result) { - - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - - UnaryExecutor::ExecuteWithNulls( - args.data[0], result, args.size(), [&](geometry_t &geometry_blob, ValidityMask &mask, idx_t i) { - auto geom = lstate.ctx.Deserialize(geometry_blob); - if (GEOSGeomTypeId_r(lstate.ctx.GetCtx(), geom.get()) == GEOS_GEOMETRYCOLLECTION) { - mask.SetInvalid(i); - return geometry_t {}; - } - - auto boundary = make_uniq_geos(lstate.ctx.GetCtx(), GEOSBoundary_r(lstate.ctx.GetCtx(), geom.get())); - return lstate.ctx.Serialize(result, boundary); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( -Returns the "boundary" of a geometry -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStBoundary(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_Boundary"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), BoundaryFunction, nullptr, nullptr, - nullptr, GEOSFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Boundary", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_buffer.cpp b/spatial/src/spatial/geos/functions/scalar/st_buffer.cpp deleted file mode 100644 index 05833550..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_buffer.cpp +++ /dev/null @@ -1,139 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" -#include "duckdb/common/vector_operations/senary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void BufferFunction(DataChunk &args, ExpressionState &state, Vector &result) { - - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &left = args.data[0]; - auto &right = args.data[1]; - - BinaryExecutor::Execute( - left, right, result, args.size(), [&](geometry_t &geometry_blob, double radius) { - auto geos_geom = lstate.ctx.Deserialize(geometry_blob); - auto boundary = - make_uniq_geos(lstate.ctx.GetCtx(), GEOSBuffer_r(lstate.ctx.GetCtx(), geos_geom.get(), radius, 8)); - return lstate.ctx.Serialize(result, boundary); - }); -} - -static void BufferFunctionWithSegments(DataChunk &args, ExpressionState &state, Vector &result) { - - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &left = args.data[0]; - auto &right = args.data[1]; - auto &segments = args.data[2]; - - TernaryExecutor::Execute( - left, right, segments, result, args.size(), [&](geometry_t &geometry_blob, double radius, int32_t segments) { - auto geos_geom = lstate.ctx.Deserialize(geometry_blob); - auto boundary = make_uniq_geos(lstate.ctx.GetCtx(), - GEOSBuffer_r(lstate.ctx.GetCtx(), geos_geom.get(), radius, segments)); - return lstate.ctx.Serialize(result, boundary); - }); -} - -template -static T TryParseStringArgument(const char *name, const vector &keys, const vector &values, - const string_t &arg) { - D_ASSERT(keys.size() == values.size()); - for (idx_t i = 0; i < keys.size(); i++) { - if (StringUtil::CIEquals(keys[i], arg.GetString())) { - return values[i]; - } - } - - auto candidates = StringUtil::Join(keys, ", "); - throw InvalidInputException("Unknown %s: '%s', accepted inputs: %s", name, arg.GetString().c_str(), - candidates.c_str()); -} - -static void BufferFunctionWithArgs(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - - SenaryExecutor::Execute( - args, result, - [&](const geometry_t &geometry_blob, double radius, int32_t segments, const string_t &cap_style_str, - const string_t &join_style_str, double mitre_limit) { - auto geos_geom = lstate.ctx.Deserialize(geometry_blob); - - auto cap_style = TryParseStringArgument( - "cap style", {"CAP_ROUND", "CAP_FLAT", "CAP_SQUARE"}, - {GEOSBUF_CAP_ROUND, GEOSBUF_CAP_FLAT, GEOSBUF_CAP_SQUARE}, cap_style_str); - - auto join_style = TryParseStringArgument( - "join style", {"JOIN_ROUND", "JOIN_MITRE", "JOIN_BEVEL"}, - {GEOSBUF_JOIN_ROUND, GEOSBUF_JOIN_MITRE, GEOSBUF_JOIN_BEVEL}, join_style_str); - - auto buffer = GEOSBufferWithStyle_r(lstate.ctx.GetCtx(), geos_geom.get(), radius, segments, cap_style, - join_style, mitre_limit); - auto buffer_ptr = make_uniq_geos(lstate.ctx.GetCtx(), buffer); - return lstate.ctx.Serialize(result, buffer_ptr); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns a buffer around the input geometry at the target distance - - `geom` is the input geometry. - - `distance` is the target distance for the buffer, using the same units as the input geometry. - - `num_triangles` represents how many triangles that will be produced to approximate a quarter circle. The larger the number, the smoother the resulting geometry. The default value is 8. - - `join_style` must be one of "JOIN_ROUND", "JOIN_MITRE", "JOIN_BEVEL". This parameter is case-insensitive. - - `cap_style` must be one of "CAP_ROUND", "CAP_FLAT", "CAP_SQUARE". This parameter is case-insensitive. - - `mitre_limit` only applies when `join_style` is "JOIN_MITRE". It is the ratio of the distance from the corner to the mitre point to the corner radius. The default value is 1.0. - - This is a planar operation and will not take into account the curvature of the earth. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}}; - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStBuffer(DatabaseInstance &db) { - ScalarFunctionSet set("ST_Buffer"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY(), LogicalType::DOUBLE}, GeoTypes::GEOMETRY(), BufferFunction, - nullptr, nullptr, nullptr, GEOSFunctionLocalState::Init)); - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY(), LogicalType::DOUBLE, LogicalType::INTEGER}, - GeoTypes::GEOMETRY(), BufferFunctionWithSegments, nullptr, nullptr, nullptr, - GEOSFunctionLocalState::Init)); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY(), LogicalType::DOUBLE, LogicalType::INTEGER, - LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::DOUBLE}, - GeoTypes::GEOMETRY(), BufferFunctionWithArgs, nullptr, nullptr, nullptr, - GEOSFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Buffer", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS, - {"geom", "distance", "num_triangles", "join_style", "cap_style", "mitre_limit"}); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_centroid.cpp b/spatial/src/spatial/geos/functions/scalar/st_centroid.cpp deleted file mode 100644 index 450bc148..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_centroid.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" -#include "duckdb/catalog/catalog_entry/function_entry.hpp" -#include "duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void CentroidFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &ctx = lstate.ctx.GetCtx(); - UnaryExecutor::Execute(args.data[0], result, args.size(), [&](geometry_t &geometry_blob) { - auto geometry = lstate.ctx.Deserialize(geometry_blob); - auto centroid = make_uniq_geos(ctx, GEOSGetCentroid_r(ctx, geometry.get())); - return lstate.ctx.Serialize(result, centroid); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ - -static constexpr const char *DOC_DESCRIPTION = R"( -Calculates the centroid of a geometry -)"; - -static constexpr const char *DOC_EXAMPLE = R"( -select st_centroid('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'::geometry); ----- - POINT(0.5 0.5) -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}}; - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStCentroid(DatabaseInstance &db) { - ScalarFunctionSet set("ST_Centroid"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), CentroidFunction, nullptr, nullptr, - nullptr, GEOSFunctionLocalState::Init)); - - ExtensionUtil::AddFunctionOverload(db, set); - DocUtil::AddDocumentation(db, "ST_Centroid", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_contains.cpp b/spatial/src/spatial/geos/functions/scalar/st_contains.cpp deleted file mode 100644 index 92d81bd5..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_contains.cpp +++ /dev/null @@ -1,68 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" -#include "spatial/geos/geos_executor.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -#include "spatial/core/function_builder.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void ContainsFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &left = args.data[0]; - auto &right = args.data[1]; - auto count = args.size(); - GEOSExecutor::ExecuteNonSymmetricPreparedBinary(lstate, left, right, count, result, GEOSContains_r, - GEOSPreparedContains_r); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( -Returns true if geom1 contains geom2. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( -select st_contains('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'::geometry, 'POINT(0.5 0.5)'::geometry); ----- -true -)"; - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStContains(DatabaseInstance &db) { - - FunctionBuilder::RegisterScalar(db, "ST_Contains", [](ScalarFunctionBuilder &func) { - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); - variant.SetReturnType(LogicalType::BOOLEAN); - variant.SetFunction(ContainsFunction); - variant.SetInit(GEOSFunctionLocalState::Init); - - variant.SetExample(DOC_EXAMPLE); - variant.SetDescription(DOC_DESCRIPTION); - }); - - func.SetDescription(DOC_DESCRIPTION); - - func.SetTag("ext", "spatial"); - func.SetTag("category", "relation"); - }); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_containsproperly.cpp b/spatial/src/spatial/geos/functions/scalar/st_containsproperly.cpp deleted file mode 100644 index d913579e..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_containsproperly.cpp +++ /dev/null @@ -1,90 +0,0 @@ -#include "duckdb/common/vector_operations/binary_executor.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/geos_executor.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "spatial/core/function_builder.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void ExecuteContainsProperlyPrepared(GEOSFunctionLocalState &lstate, Vector &left, Vector &right, idx_t count, - Vector &result) { - auto &ctx = lstate.ctx.GetCtx(); - - if (left.GetVectorType() == VectorType::CONSTANT_VECTOR && right.GetVectorType() != VectorType::CONSTANT_VECTOR) { - auto &left_blob = FlatVector::GetData(left)[0]; - auto left_geom = lstate.ctx.Deserialize(left_blob); - auto left_prepared = make_uniq_geos(ctx, GEOSPrepare_r(ctx, left_geom.get())); - - UnaryExecutor::Execute(right, result, count, [&](geometry_t &right_blob) { - auto right_geometry = lstate.ctx.Deserialize(right_blob); - auto ok = GEOSPreparedContainsProperly_r(ctx, left_prepared.get(), right_geometry.get()); - return ok == 1; - }); - } else { - // ContainsProperly only has a prepared version, so we just prepare the left one always - BinaryExecutor::Execute( - left, right, result, count, [&](geometry_t &left_blob, geometry_t &right_blob) { - auto left_geometry = lstate.ctx.Deserialize(left_blob); - auto right_geometry = lstate.ctx.Deserialize(right_blob); - - auto left_prepared = make_uniq_geos(ctx, GEOSPrepare_r(ctx, left_geometry.get())); - - auto ok = GEOSPreparedContainsProperly_r(ctx, left_prepared.get(), right_geometry.get()); - return ok == 1; - }); - } -} - -static void ContainsProperlyFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &left = args.data[0]; - auto &right = args.data[1]; - auto count = args.size(); - ExecuteContainsProperlyPrepared(lstate, left, right, count, result); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns true if geom1 "properly contains" geom2 -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStContainsProperly(DatabaseInstance &db) { - - FunctionBuilder::RegisterScalar(db, "ST_ContainsProperly", [](ScalarFunctionBuilder &func) { - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); - variant.SetReturnType(LogicalType::BOOLEAN); - variant.SetFunction(ContainsProperlyFunction); - variant.SetInit(GEOSFunctionLocalState::Init); - - variant.SetExample(DOC_EXAMPLE); - variant.SetDescription(DOC_DESCRIPTION); - }); - - func.SetTag("ext", "spatial"); - func.SetTag("category", "relation"); - }); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_convex_hull.cpp b/spatial/src/spatial/geos/functions/scalar/st_convex_hull.cpp deleted file mode 100644 index 54d921e8..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_convex_hull.cpp +++ /dev/null @@ -1,55 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void ConvexHullFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &ctx = lstate.ctx.GetCtx(); - UnaryExecutor::Execute(args.data[0], result, args.size(), [&](geometry_t &geometry_blob) { - auto geometry = lstate.ctx.Deserialize(geometry_blob); - auto convex_hull_geometry = make_uniq_geos(ctx, GEOSConvexHull_r(ctx, geometry.get())); - return lstate.ctx.Serialize(result, convex_hull_geometry); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns the convex hull enclosing the geometry -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}}; -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStConvexHull(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_ConvexHull"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), ConvexHullFunction, nullptr, nullptr, - nullptr, GEOSFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_ConvexHull", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_covered_by.cpp b/spatial/src/spatial/geos/functions/scalar/st_covered_by.cpp deleted file mode 100644 index 19373527..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_covered_by.cpp +++ /dev/null @@ -1,60 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" -#include "spatial/geos/geos_executor.hpp" -#include "spatial/core/function_builder.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void CoveredByFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &left = args.data[0]; - auto &right = args.data[1]; - auto count = args.size(); - GEOSExecutor::ExecuteNonSymmetricPreparedBinary(lstate, left, right, count, result, GEOSCoveredBy_r, - GEOSPreparedCoveredBy_r); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns true if geom1 is "covered" by geom2 -)"; - -static constexpr const char *DOC_EXAMPLE = R"()"; -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStCoveredBy(DatabaseInstance &db) { - - FunctionBuilder::RegisterScalar(db, "ST_CoveredBy", [](ScalarFunctionBuilder &func) { - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); - variant.SetReturnType(LogicalType::BOOLEAN); - variant.SetFunction(CoveredByFunction); - variant.SetInit(GEOSFunctionLocalState::Init); - - variant.SetExample(DOC_EXAMPLE); - variant.SetDescription(DOC_DESCRIPTION); - }); - - func.SetTag("ext", "spatial"); - func.SetTag("category", "relation"); - }); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_covers.cpp b/spatial/src/spatial/geos/functions/scalar/st_covers.cpp deleted file mode 100644 index 61dd0e47..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_covers.cpp +++ /dev/null @@ -1,60 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" -#include "spatial/geos/geos_executor.hpp" -#include "spatial/core/function_builder.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void CoversFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &left = args.data[0]; - auto &right = args.data[1]; - auto count = args.size(); - GEOSExecutor::ExecuteNonSymmetricPreparedBinary(lstate, left, right, count, result, GEOSCovers_r, - GEOSPreparedCovers_r); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns if geom1 "covers" geom2 -)"; - -static constexpr const char *DOC_EXAMPLE = R"()"; -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStCovers(DatabaseInstance &db) { - - FunctionBuilder::RegisterScalar(db, "ST_Covers", [](ScalarFunctionBuilder &func) { - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); - variant.SetReturnType(LogicalType::BOOLEAN); - variant.SetFunction(CoversFunction); - variant.SetInit(GEOSFunctionLocalState::Init); - - variant.SetExample(DOC_EXAMPLE); - variant.SetDescription(DOC_DESCRIPTION); - }); - - func.SetTag("ext", "spatial"); - func.SetTag("category", "relation"); - }); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_crosses.cpp b/spatial/src/spatial/geos/functions/scalar/st_crosses.cpp deleted file mode 100644 index de78bb4e..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_crosses.cpp +++ /dev/null @@ -1,60 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" -#include "spatial/geos/geos_executor.hpp" -#include "spatial/core/function_builder.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void CrossesFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &left = args.data[0]; - auto &right = args.data[1]; - auto count = args.size(); - GEOSExecutor::ExecuteSymmetricPreparedBinary(lstate, left, right, count, result, GEOSCrosses_r, - GEOSPreparedCrosses_r); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns true if geom1 "crosses" geom2 -)"; - -static constexpr const char *DOC_EXAMPLE = R"()"; -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStCrosses(DatabaseInstance &db) { - - FunctionBuilder::RegisterScalar(db, "ST_Crosses", [](ScalarFunctionBuilder &func) { - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); - variant.SetReturnType(LogicalType::BOOLEAN); - variant.SetFunction(CrossesFunction); - variant.SetInit(GEOSFunctionLocalState::Init); - - variant.SetExample(DOC_EXAMPLE); - variant.SetDescription(DOC_DESCRIPTION); - }); - - func.SetTag("ext", "spatial"); - func.SetTag("category", "relation"); - }); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_difference.cpp b/spatial/src/spatial/geos/functions/scalar/st_difference.cpp deleted file mode 100644 index ece77f23..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_difference.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void DifferenceFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &ctx = lstate.ctx.GetCtx(); - BinaryExecutor::Execute( - args.data[0], args.data[1], result, args.size(), [&](geometry_t left, geometry_t right) { - auto left_geos_geom = lstate.ctx.Deserialize(left); - auto right_geos_geom = lstate.ctx.Deserialize(right); - auto geos_result = make_uniq_geos(ctx, GEOSDifference_r(ctx, left_geos_geom.get(), right_geos_geom.get())); - return lstate.ctx.Serialize(result, geos_result); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ - -static constexpr const char *DOC_DESCRIPTION = R"( - Returns the "difference" between two geometries -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStDifference(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_Difference"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY(), GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), - DifferenceFunction, nullptr, nullptr, nullptr, GEOSFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Difference", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_disjoint.cpp b/spatial/src/spatial/geos/functions/scalar/st_disjoint.cpp deleted file mode 100644 index d7bd3a75..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_disjoint.cpp +++ /dev/null @@ -1,55 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" -#include "spatial/geos/geos_executor.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void DisjointFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &left = args.data[0]; - auto &right = args.data[1]; - auto count = args.size(); - GEOSExecutor::ExecuteSymmetricPreparedBinary(lstate, left, right, count, result, GEOSDisjoint_r, - GEOSPreparedDisjoint_r); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns if two geometries are disjoint -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "relation"}}; -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStDisjoint(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_Disjoint"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY(), GeoTypes::GEOMETRY()}, LogicalType::BOOLEAN, DisjointFunction, - nullptr, nullptr, nullptr, GEOSFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Disjoint", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_distance.cpp b/spatial/src/spatial/geos/functions/scalar/st_distance.cpp deleted file mode 100644 index c9003e0f..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_distance.cpp +++ /dev/null @@ -1,96 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void ExecutePreparedDistance(GEOSFunctionLocalState &lstate, Vector &left, Vector &right, idx_t count, - Vector &result) { - auto &ctx = lstate.ctx.GetCtx(); - - // Optimize: if one of the arguments is a constant, we can prepare it once and reuse it - if (left.GetVectorType() == VectorType::CONSTANT_VECTOR && right.GetVectorType() != VectorType::CONSTANT_VECTOR) { - auto &left_blob = FlatVector::GetData(left)[0]; - auto left_geom = lstate.ctx.Deserialize(left_blob); - auto left_prepared = make_uniq_geos(ctx, GEOSPrepare_r(ctx, left_geom.get())); - - UnaryExecutor::Execute(right, result, count, [&](geometry_t &right_blob) { - auto right_geometry = lstate.ctx.Deserialize(right_blob); - double distance; - GEOSPreparedDistance_r(ctx, left_prepared.get(), right_geometry.get(), &distance); - return distance; - }); - } else if (right.GetVectorType() == VectorType::CONSTANT_VECTOR && - left.GetVectorType() != VectorType::CONSTANT_VECTOR) { - auto &right_blob = FlatVector::GetData(right)[0]; - auto right_geom = lstate.ctx.Deserialize(right_blob); - auto right_prepared = make_uniq_geos(ctx, GEOSPrepare_r(ctx, right_geom.get())); - - UnaryExecutor::Execute(left, result, count, [&](geometry_t &left_blob) { - auto left_geometry = lstate.ctx.Deserialize(left_blob); - double distance; - GEOSPreparedDistance_r(ctx, right_prepared.get(), left_geometry.get(), &distance); - return distance; - }); - } else { - BinaryExecutor::Execute( - left, right, result, count, [&](geometry_t &left_blob, geometry_t &right_blob) { - auto left_geometry = lstate.ctx.Deserialize(left_blob); - auto right_geometry = lstate.ctx.Deserialize(right_blob); - double distance; - GEOSDistance_r(ctx, left_geometry.get(), right_geometry.get(), &distance); - return distance; - }); - } -} - -static void DistanceFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &left = args.data[0]; - auto &right = args.data[1]; - auto count = args.size(); - ExecutePreparedDistance(lstate, left, right, count, result); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns the distance between two geometries. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( -select st_distance('POINT(0 0)'::geometry, 'POINT(1 1)'::geometry); ----- -1.4142135623731 -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}}; - -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStDistance(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_Distance"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY(), GeoTypes::GEOMETRY()}, LogicalType::DOUBLE, DistanceFunction, - nullptr, nullptr, nullptr, GEOSFunctionLocalState::Init)); - - ExtensionUtil::AddFunctionOverload(db, set); - DocUtil::AddDocumentation(db, "ST_Distance", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_distance_within.cpp b/spatial/src/spatial/geos/functions/scalar/st_distance_within.cpp deleted file mode 100644 index 1937ad45..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_distance_within.cpp +++ /dev/null @@ -1,95 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace core; - -static void ExecutePreparedDistanceWithin(GEOSFunctionLocalState &lstate, Vector &left, Vector &right, - Vector &distance_vec, idx_t count, Vector &result) { - auto &ctx = lstate.ctx.GetCtx(); - - // Optimize: if one of the arguments is a constant, we can prepare it once and reuse it - if (left.GetVectorType() == VectorType::CONSTANT_VECTOR && right.GetVectorType() != VectorType::CONSTANT_VECTOR) { - auto &left_blob = FlatVector::GetData(left)[0]; - auto left_geom = lstate.ctx.Deserialize(left_blob); - auto left_prepared = make_uniq_geos(ctx, GEOSPrepare_r(ctx, left_geom.get())); - - BinaryExecutor::Execute( - right, distance_vec, result, count, [&](geometry_t &right_blob, double distance) { - auto right_geometry = lstate.ctx.Deserialize(right_blob); - auto ok = GEOSPreparedDistanceWithin_r(ctx, left_prepared.get(), right_geometry.get(), distance); - return ok == 1; - }); - } else if (right.GetVectorType() == VectorType::CONSTANT_VECTOR && - left.GetVectorType() != VectorType::CONSTANT_VECTOR) { - auto &right_blob = FlatVector::GetData(right)[0]; - auto right_geom = lstate.ctx.Deserialize(right_blob); - auto right_prepared = make_uniq_geos(ctx, GEOSPrepare_r(ctx, right_geom.get())); - - BinaryExecutor::Execute( - left, distance_vec, result, count, [&](geometry_t &left_blob, double distance) { - auto left_geometry = lstate.ctx.Deserialize(left_blob); - auto ok = GEOSPreparedDistanceWithin_r(ctx, right_prepared.get(), left_geometry.get(), distance); - return ok == 1; - }); - } else { - TernaryExecutor::Execute( - left, right, distance_vec, result, count, - [&](geometry_t &left_blob, geometry_t &right_blob, double distance) { - auto left_geometry = lstate.ctx.Deserialize(left_blob); - auto right_geometry = lstate.ctx.Deserialize(right_blob); - auto ok = GEOSDistanceWithin_r(ctx, left_geometry.get(), right_geometry.get(), distance); - return ok == 1; - }); - } -} - -static void DistanceWithinFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &left = args.data[0]; - auto &right = args.data[1]; - auto &distance_vec = args.data[2]; - auto count = args.size(); - ExecutePreparedDistanceWithin(lstate, left, right, distance_vec, count, result); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns if two geometries are within a target distance of each-other -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "relation"}}; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStDistanceWithin(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_DWithin"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY(), GeoTypes::GEOMETRY(), LogicalType::DOUBLE}, - LogicalType::BOOLEAN, DistanceWithinFunction, nullptr, nullptr, nullptr, - GEOSFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_DWithin", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_envelope.cpp b/spatial/src/spatial/geos/functions/scalar/st_envelope.cpp deleted file mode 100644 index 9eb7f2a2..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_envelope.cpp +++ /dev/null @@ -1,58 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void EnvelopeFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &ctx = lstate.ctx.GetCtx(); - UnaryExecutor::ExecuteWithNulls( - args.data[0], result, args.size(), [&](geometry_t &geometry_blob, ValidityMask &mask, idx_t i) { - auto geometry = lstate.ctx.Deserialize(geometry_blob); - auto envelope = make_uniq_geos(ctx, GEOSEnvelope_r(ctx, geometry.get())); - return lstate.ctx.Serialize(result, envelope); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ - -static constexpr const char *DOC_DESCRIPTION = R"( - Returns the minimum bounding box for the input geometry as a polygon geometry. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ - -void GEOSScalarFunctions::RegisterStEnvelope(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_Envelope"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), EnvelopeFunction, nullptr, nullptr, - nullptr, GEOSFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Envelope", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_equals.cpp b/spatial/src/spatial/geos/functions/scalar/st_equals.cpp deleted file mode 100644 index c830d1b1..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_equals.cpp +++ /dev/null @@ -1,55 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void EqualsFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &ctx = lstate.ctx.GetCtx(); - BinaryExecutor::Execute(args.data[0], args.data[1], result, args.size(), - [&](geometry_t &left_blob, geometry_t &right_blob) { - auto left = lstate.ctx.Deserialize(left_blob); - auto right = lstate.ctx.Deserialize(right_blob); - return GEOSEquals_r(ctx, left.get(), right.get()); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Compares two geometries for equality -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}}; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ - -void GEOSScalarFunctions::RegisterStEquals(DatabaseInstance &db) { - ScalarFunctionSet set("ST_Equals"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY(), GeoTypes::GEOMETRY()}, LogicalType::BOOLEAN, EqualsFunction, - nullptr, nullptr, nullptr, GEOSFunctionLocalState::Init)); - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Equals", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_intersection.cpp b/spatial/src/spatial/geos/functions/scalar/st_intersection.cpp deleted file mode 100644 index e833a178..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_intersection.cpp +++ /dev/null @@ -1,62 +0,0 @@ -#include "duckdb/common/vector_operations/binary_executor.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/geos_wrappers.hpp" -#include "spatial/core/function_builder.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void IntersectionFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &ctx = lstate.ctx.GetCtx(); - BinaryExecutor::Execute( - args.data[0], args.data[1], result, args.size(), [&](geometry_t left, geometry_t right) { - auto left_geom = lstate.ctx.Deserialize(left); - auto right_geom = lstate.ctx.Deserialize(right); - - auto result_geom = make_uniq_geos(ctx, GEOSIntersection_r(ctx, left_geom.get(), right_geom.get())); - return lstate.ctx.Serialize(result, result_geom); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns the "intersection" of geom1 and geom2 -)"; - -static constexpr const char *DOC_EXAMPLE = R"()"; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStIntersection(DatabaseInstance &db) { - - FunctionBuilder::RegisterScalar(db, "ST_Intersection", [](ScalarFunctionBuilder &func) { - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); - variant.SetFunction(IntersectionFunction); - variant.SetInit(GEOSFunctionLocalState::Init); - - variant.SetExample(DOC_EXAMPLE); - variant.SetDescription(DOC_DESCRIPTION); - }); - - func.SetTag("ext", "spatial"); - func.SetTag("category", "relation"); - }); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_intersects.cpp b/spatial/src/spatial/geos/functions/scalar/st_intersects.cpp deleted file mode 100644 index 47079593..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_intersects.cpp +++ /dev/null @@ -1,55 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" -#include "spatial/geos/geos_executor.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void IntersectsFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &left = args.data[0]; - auto &right = args.data[1]; - auto count = args.size(); - GEOSExecutor::ExecuteSymmetricPreparedBinary(lstate, left, right, count, result, GEOSIntersects_r, - GEOSPreparedIntersects_r); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns true if two geometries intersects -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}}; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStIntersects(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_Intersects"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY(), GeoTypes::GEOMETRY()}, LogicalType::BOOLEAN, - IntersectsFunction, nullptr, nullptr, nullptr, GEOSFunctionLocalState::Init)); - - ExtensionUtil::AddFunctionOverload(db, set); - DocUtil::AddDocumentation(db, "ST_Intersects", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_is_ring.cpp b/spatial/src/spatial/geos/functions/scalar/st_is_ring.cpp deleted file mode 100644 index 9f0c80cb..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_is_ring.cpp +++ /dev/null @@ -1,54 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void IsRingFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &ctx = lstate.ctx.GetCtx(); - UnaryExecutor::Execute(args.data[0], result, args.size(), [&](geometry_t input) { - auto geom = lstate.ctx.Deserialize(input); - return GEOSisRing_r(ctx, geom.get()); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns true if the input line geometry is a ring (both ST_IsClosed and ST_IsSimple). -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}}; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStIsRing(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_IsRing"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::BOOLEAN, IsRingFunction, nullptr, nullptr, - nullptr, GEOSFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_IsRing", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_is_simple.cpp b/spatial/src/spatial/geos/functions/scalar/st_is_simple.cpp deleted file mode 100644 index 1e0033e4..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_is_simple.cpp +++ /dev/null @@ -1,54 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void IsSimpleFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &ctx = lstate.ctx.GetCtx(); - UnaryExecutor::Execute(args.data[0], result, args.size(), [&](geometry_t input) { - auto geom = lstate.ctx.Deserialize(input); - return GEOSisSimple_r(ctx, geom.get()); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns true if the input geometry is "simple" -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}}; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStIsSimple(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_IsSimple"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::BOOLEAN, IsSimpleFunction, nullptr, nullptr, - nullptr, GEOSFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_IsSimple", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_is_valid.cpp b/spatial/src/spatial/geos/functions/scalar/st_is_valid.cpp deleted file mode 100644 index 1463bfc4..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_is_valid.cpp +++ /dev/null @@ -1,112 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static bool IsValidForGeos(Geometry &geometry) { - switch (geometry.GetType()) { - case GeometryType::LINESTRING: - // Every linestring needs 0 or at least 2 points - return LineString::VertexCount(geometry) != 1; - - case GeometryType::POLYGON: { - // Every ring needs 0 or at least 4 points - for (uint32_t i = 0; i < Polygon::PartCount(geometry); i++) { - auto &ring = Polygon::Part(geometry, i); - if (LineString::VertexCount(ring) < 4) { - return false; - } - } - return true; - } - case GeometryType::MULTILINESTRING: { - for (uint32_t i = 0; i < MultiLineString::PartCount(geometry); i++) { - auto &linestring = MultiLineString::Part(geometry, i); - if (LineString::VertexCount(linestring) == 1) { - return false; - } - } - return true; - } - case GeometryType::MULTIPOLYGON: { - for (uint32_t i = 0; i < MultiPolygon::PartCount(geometry); i++) { - auto &polygon = MultiPolygon::Part(geometry, i); - for (uint32_t j = 0; j < Polygon::PartCount(polygon); j++) { - auto &ring = Polygon::Part(polygon, j); - if (LineString::VertexCount(ring) < 4) { - return false; - } - } - } - return true; - } - case GeometryType::GEOMETRYCOLLECTION: { - for (uint32_t i = 0; i < GeometryCollection::PartCount(geometry); i++) { - auto &geom = GeometryCollection::Part(geometry, i); - if (!IsValidForGeos(geom)) { - return false; - } - } - return true; - } - default: - return true; - } -} - -static void IsValidFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &arena = lstate.arena; - UnaryExecutor::Execute(args.data[0], result, args.size(), [&](geometry_t input) { - auto geom = Geometry::Deserialize(arena, input); - - // double check before calling into geos - if (!IsValidForGeos(geom)) { - return false; - } - - auto geos_geom = lstate.ctx.Deserialize(input); - return (bool)GEOSisValid_r(lstate.ctx.GetCtx(), geos_geom.get()); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns true if the geometry is topologically "valid" -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "property"}}; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStIsValid(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_IsValid"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, LogicalType::BOOLEAN, IsValidFunction, nullptr, nullptr, - nullptr, GEOSFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_IsValid", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_linemerge.cpp b/spatial/src/spatial/geos/functions/scalar/st_linemerge.cpp deleted file mode 100644 index b6f18539..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_linemerge.cpp +++ /dev/null @@ -1,71 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void LineMergeFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &ctx = lstate.ctx.GetCtx(); - UnaryExecutor::Execute(args.data[0], result, args.size(), [&](geometry_t &geometry_blob) { - auto geometry = lstate.ctx.Deserialize(geometry_blob); - auto convex_hull_geometry = make_uniq_geos(ctx, GEOSLineMerge_r(ctx, geometry.get())); - return lstate.ctx.Serialize(result, convex_hull_geometry); - }); -} - -static void LineMergeFunctionWithDirected(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &ctx = lstate.ctx.GetCtx(); - BinaryExecutor::Execute( - args.data[0], args.data[1], result, args.size(), [&](geometry_t &geometry_blob, bool directed) { - auto geometry = lstate.ctx.Deserialize(geometry_blob); - auto convex_hull_geometry = directed ? make_uniq_geos(ctx, GEOSLineMergeDirected_r(ctx, geometry.get())) - : make_uniq_geos(ctx, GEOSLineMerge_r(ctx, geometry.get())); - - return lstate.ctx.Serialize(result, convex_hull_geometry); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - "Merges" the input line geometry, optionally taking direction into account. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStLineMerge(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_LineMerge"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), LineMergeFunction, nullptr, nullptr, - nullptr, GEOSFunctionLocalState::Init)); - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY(), LogicalType::BOOLEAN}, GeoTypes::GEOMETRY(), - LineMergeFunctionWithDirected, nullptr, nullptr, nullptr, - GEOSFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_LineMerge", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_makevalid.cpp b/spatial/src/spatial/geos/functions/scalar/st_makevalid.cpp deleted file mode 100644 index 347d9089..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_makevalid.cpp +++ /dev/null @@ -1,55 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void MakeValidFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &ctx = lstate.ctx.GetCtx(); - UnaryExecutor::Execute(args.data[0], result, args.size(), [&](geometry_t input) { - auto geom = lstate.ctx.Deserialize(input); - auto valid = make_uniq_geos(ctx, GEOSMakeValid_r(ctx, geom.get())); - return lstate.ctx.Serialize(result, valid); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Attempts to make an invalid geometry valid without removing any vertices -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStMakeValid(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_MakeValid"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), MakeValidFunction, nullptr, nullptr, - nullptr, GEOSFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_MakeValid", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_normalize.cpp b/spatial/src/spatial/geos/functions/scalar/st_normalize.cpp deleted file mode 100644 index 0d84a709..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_normalize.cpp +++ /dev/null @@ -1,56 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void NormalizeFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &ctx = lstate.ctx.GetCtx(); - UnaryExecutor::Execute(args.data[0], result, args.size(), [&](geometry_t input) { - auto geom = lstate.ctx.Deserialize(input); - auto res = GEOSNormalize_r(ctx, geom.get()); - if (res == -1) { - throw InvalidInputException("Could not normalize geometry"); - } - return lstate.ctx.Serialize(result, geom); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns a "normalized" version of the input geometry. -)"; - -static constexpr const char *DOC_EXAMPLE = R"()"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStNormalize(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_Normalize"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), NormalizeFunction, nullptr, nullptr, - nullptr, GEOSFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Normalize", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_overlaps.cpp b/spatial/src/spatial/geos/functions/scalar/st_overlaps.cpp deleted file mode 100644 index 9e46e31c..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_overlaps.cpp +++ /dev/null @@ -1,56 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" -#include "spatial/geos/geos_executor.hpp" -#include "spatial/core/function_builder.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void OverlapsFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &left = args.data[0]; - auto &right = args.data[1]; - auto count = args.size(); - GEOSExecutor::ExecuteSymmetricPreparedBinary(lstate, left, right, count, result, GEOSOverlaps_r, - GEOSPreparedOverlaps_r); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns true if geom1 "overlaps" geom2 -)"; - -static constexpr const char *DOC_EXAMPLE = R"()"; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ - -void GEOSScalarFunctions::RegisterStOverlaps(DatabaseInstance &db) { - FunctionBuilder::RegisterScalar(db, "ST_Overlaps", [](ScalarFunctionBuilder &func) { - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); - variant.SetReturnType(LogicalType::BOOLEAN); - variant.SetFunction(OverlapsFunction); - variant.SetInit(GEOSFunctionLocalState::Init); - - variant.SetExample(DOC_EXAMPLE); - variant.SetDescription(DOC_DESCRIPTION); - }); - - func.SetTag("ext", "spatial"); - func.SetTag("category", "relation"); - }); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_pointonsurface.cpp b/spatial/src/spatial/geos/functions/scalar/st_pointonsurface.cpp deleted file mode 100644 index a1bed79e..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_pointonsurface.cpp +++ /dev/null @@ -1,54 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void PointOnSurfaceFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto ctx = lstate.ctx.GetCtx(); - UnaryExecutor::Execute(args.data[0], result, args.size(), [&](geometry_t &geometry_blob) { - auto geometry = lstate.ctx.Deserialize(geometry_blob); - auto result_geom = make_uniq_geos(ctx, GEOSPointOnSurface_r(ctx, geometry.get())); - return lstate.ctx.Serialize(result, result_geom); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( -Returns a point that is guaranteed to be on the surface of the input geometry. Sometimes a useful alternative to ST_Centroid. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStPointOnSurface(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_PointOnSurface"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), PointOnSurfaceFunction, nullptr, - nullptr, nullptr, GEOSFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_PointOnSurface", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_reduceprecision.cpp b/spatial/src/spatial/geos/functions/scalar/st_reduceprecision.cpp deleted file mode 100644 index 4fd2738d..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_reduceprecision.cpp +++ /dev/null @@ -1,56 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void ReducePrecisionFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto ctx = lstate.ctx.GetCtx(); - BinaryExecutor::Execute( - args.data[0], args.data[1], result, args.size(), [&](geometry_t &geometry_blob, double precision) { - auto geometry = lstate.ctx.Deserialize(geometry_blob); - // Follow PostGIS behavior and dont set any special flags - auto result_geom = make_uniq_geos(ctx, GEOSGeom_setPrecision_r(ctx, geometry.get(), precision, 0)); - return lstate.ctx.Serialize(result, result_geom); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns the geometry with all vertices reduced to the target precision -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}}; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStReducePrecision(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_ReducePrecision"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY(), LogicalType::DOUBLE}, GeoTypes::GEOMETRY(), - ReducePrecisionFunction, nullptr, nullptr, nullptr, GEOSFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_ReducePrecision", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_removerepeatedpoints.cpp b/spatial/src/spatial/geos/functions/scalar/st_removerepeatedpoints.cpp deleted file mode 100644 index 71230058..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_removerepeatedpoints.cpp +++ /dev/null @@ -1,77 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void RemoveRepeatedPointsFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - auto &input = args.data[0]; - auto count = args.size(); - - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto ctx = lstate.ctx.GetCtx(); - UnaryExecutor::Execute(input, result, count, [&](geometry_t input) { - auto geom = lstate.ctx.Deserialize(input); - auto result_geom = make_uniq_geos(ctx, GEOSRemoveRepeatedPoints_r(ctx, geom.get(), 0)); - return lstate.ctx.Serialize(result, result_geom); - }); -} - -static void RemoveRepeatedPointsFunctionWithTolerance(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 2); - auto &input = args.data[0]; - auto &tolerance = args.data[1]; - auto count = args.size(); - - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto ctx = lstate.ctx.GetCtx(); - - BinaryExecutor::Execute( - input, tolerance, result, count, [&](geometry_t input, double tolerance) { - auto geom = lstate.ctx.Deserialize(input); - auto result_geom = make_uniq_geos(ctx, GEOSRemoveRepeatedPoints_r(ctx, geom.get(), tolerance)); - return lstate.ctx.Serialize(result, result_geom); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns a new geometry with repeated points removed, optionally within a target distance of eachother. -)"; - -static constexpr const char *DOC_EXAMPLE = R"()"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStRemoveRepeatedPoints(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_RemoveRepeatedPoints"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), RemoveRepeatedPointsFunction, nullptr, - nullptr, nullptr, GEOSFunctionLocalState::Init)); - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY(), LogicalType::DOUBLE}, GeoTypes::GEOMETRY(), - RemoveRepeatedPointsFunctionWithTolerance, nullptr, nullptr, nullptr, - GEOSFunctionLocalState::Init)); - - ExtensionUtil::AddFunctionOverload(db, set); - DocUtil::AddDocumentation(db, "ST_RemoveRepeatedPoints", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_reverse.cpp b/spatial/src/spatial/geos/functions/scalar/st_reverse.cpp deleted file mode 100644 index 5ac1db60..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_reverse.cpp +++ /dev/null @@ -1,60 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -//------------------------------------------------------------------------------ -// GEOMETRY -//------------------------------------------------------------------------------ -static void GeometryReverseFunction(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - auto &input = args.data[0]; - auto count = args.size(); - - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto ctx = lstate.ctx.GetCtx(); - UnaryExecutor::Execute(input, result, count, [&](geometry_t input) { - auto geom = lstate.ctx.Deserialize(input); - auto result_geom = make_uniq_geos(ctx, GEOSReverse_r(ctx, geom.get())); - return lstate.ctx.Serialize(result, result_geom); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( -Returns a new version of the input geometry with the order of its vertices reversed -)"; - -static constexpr const char *DOC_EXAMPLE = R"()"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; -//------------------------------------------------------------------------------ -// Register functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStReverse(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_Reverse"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), GeometryReverseFunction, nullptr, - nullptr, nullptr, GEOSFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Reverse", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_shortestline.cpp b/spatial/src/spatial/geos/functions/scalar/st_shortestline.cpp deleted file mode 100644 index 2b329204..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_shortestline.cpp +++ /dev/null @@ -1,65 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/function_builder.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void ShortestLineFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &ctx = lstate.ctx.GetCtx(); - BinaryExecutor::Execute( - args.data[0], args.data[1], result, args.size(), [&](geometry_t left, geometry_t right) { - auto left_geom = lstate.ctx.Deserialize(left); - auto right_geom = lstate.ctx.Deserialize(right); - - auto coord_seq = GEOSNearestPoints_r(ctx, left_geom.get(), right_geom.get()); - auto result_geom = make_uniq_geos(ctx, GEOSGeom_createLineString_r(ctx, coord_seq)); - - return lstate.ctx.Serialize(result, result_geom); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns the line between the two closest points between geom1 and geom2 -)"; - -static constexpr const char *DOC_EXAMPLE = R"( - -)"; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStShortestLine(DatabaseInstance &db) { - - FunctionBuilder::RegisterScalar(db, "ST_ShortestLine", [](ScalarFunctionBuilder &func) { - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); - variant.SetFunction(ShortestLineFunction); - variant.SetInit(GEOSFunctionLocalState::Init); - - variant.SetExample(DOC_EXAMPLE); - variant.SetDescription(DOC_DESCRIPTION); - }); - - func.SetTag("ext", "spatial"); - func.SetTag("category", "construction"); - }); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_simplify.cpp b/spatial/src/spatial/geos/functions/scalar/st_simplify.cpp deleted file mode 100644 index 4ed2f5f7..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_simplify.cpp +++ /dev/null @@ -1,54 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void SimplifyFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &ctx = lstate.ctx.GetCtx(); - BinaryExecutor::Execute( - args.data[0], args.data[1], result, args.size(), [&](geometry_t input, double distance) { - auto geom = lstate.ctx.Deserialize(input); - auto simplified = make_uniq_geos(ctx, GEOSSimplify_r(ctx, geom.get(), distance)); - return lstate.ctx.Serialize(result, simplified); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( -Simplifies the input geometry by collapsing edges smaller than 'distance' -)"; - -static constexpr const char *DOC_EXAMPLE = R"()"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}}; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStSimplify(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_Simplify"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY(), LogicalType::DOUBLE}, GeoTypes::GEOMETRY(), SimplifyFunction, - nullptr, nullptr, nullptr, GEOSFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Simplify", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_simplify_preserve_topology.cpp b/spatial/src/spatial/geos/functions/scalar/st_simplify_preserve_topology.cpp deleted file mode 100644 index 70f019c6..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_simplify_preserve_topology.cpp +++ /dev/null @@ -1,55 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void SimplifyPreserveTopologyFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &ctx = lstate.ctx.GetCtx(); - BinaryExecutor::Execute( - args.data[0], args.data[1], result, args.size(), [&](geometry_t input, double distance) { - auto geom = lstate.ctx.Deserialize(input); - auto simplified = make_uniq_geos(ctx, GEOSTopologyPreserveSimplify_r(ctx, geom.get(), distance)); - return lstate.ctx.Serialize(result, simplified); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( -Returns a simplified geometry but avoids creating invalid topologies -)"; - -static constexpr const char *DOC_EXAMPLE = R"()"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStSimplifyPreserveTopology(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_SimplifyPreserveTopology"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY(), LogicalType::DOUBLE}, GeoTypes::GEOMETRY(), - SimplifyPreserveTopologyFunction, nullptr, nullptr, nullptr, - GEOSFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_SimplifyPreserveTopology", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_touches.cpp b/spatial/src/spatial/geos/functions/scalar/st_touches.cpp deleted file mode 100644 index 6dc79e15..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_touches.cpp +++ /dev/null @@ -1,56 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/function_builder.hpp" - -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" -#include "spatial/geos/geos_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void TouchesFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &left = args.data[0]; - auto &right = args.data[1]; - auto count = args.size(); - GEOSExecutor::ExecuteSymmetricPreparedBinary(lstate, left, right, count, result, GEOSTouches_r, - GEOSPreparedTouches_r); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( -Returns true if geom1 "touches" geom2 -)"; - -static constexpr const char *DOC_EXAMPLE = R"()"; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStTouches(DatabaseInstance &db) { - FunctionBuilder::RegisterScalar(db, "ST_Touches", [](ScalarFunctionBuilder &func) { - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); - variant.SetReturnType(LogicalType::BOOLEAN); - variant.SetFunction(TouchesFunction); - variant.SetInit(GEOSFunctionLocalState::Init); - - variant.SetExample(DOC_EXAMPLE); - variant.SetDescription(DOC_DESCRIPTION); - }); - - func.SetTag("ext", "spatial"); - func.SetTag("category", "relation"); - }); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_union.cpp b/spatial/src/spatial/geos/functions/scalar/st_union.cpp deleted file mode 100644 index da751450..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_union.cpp +++ /dev/null @@ -1,65 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" - -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/common/vector_operations/unary_executor.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void UnionFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &ctx = lstate.ctx.GetCtx(); - BinaryExecutor::Execute( - args.data[0], args.data[1], result, args.size(), [&](geometry_t left, geometry_t right) { - auto left_geom = lstate.ctx.Deserialize(left); - auto right_geom = lstate.ctx.Deserialize(right); - auto result_geom = make_uniq_geos(ctx, GEOSUnion_r(ctx, left_geom.get(), right_geom.get())); - return lstate.ctx.Serialize(result, result_geom); - }); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( -Returns the union of two geometries. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( -SELECT ST_AsText( - ST_Union( - ST_GeomFromText('POINT(1 2)'), - ST_GeomFromText('POINT(3 4)') - ) -); ----- -MULTIPOINT (1 2, 3 4) -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}}; - -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStUnion(DatabaseInstance &db) { - - ScalarFunctionSet set("ST_Union"); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY(), GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), UnionFunction, - nullptr, nullptr, nullptr, GEOSFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Union", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/functions/scalar/st_within.cpp b/spatial/src/spatial/geos/functions/scalar/st_within.cpp deleted file mode 100644 index 69e1d2e6..00000000 --- a/spatial/src/spatial/geos/functions/scalar/st_within.cpp +++ /dev/null @@ -1,55 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/function_builder.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" -#include "spatial/geos/geos_executor.hpp" - -namespace spatial { - -namespace geos { - -using namespace spatial::core; - -static void WithinFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto &lstate = GEOSFunctionLocalState::ResetAndGet(state); - auto &left = args.data[0]; - auto &right = args.data[1]; - auto count = args.size(); - GEOSExecutor::ExecuteNonSymmetricPreparedBinary(lstate, left, right, count, result, GEOSWithin_r, - GEOSPreparedWithin_r); -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ -static constexpr const char *DOC_DESCRIPTION = R"( - Returns true if geom1 is "within" geom2 -)"; - -static constexpr const char *DOC_EXAMPLE = R"()"; -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void GEOSScalarFunctions::RegisterStWithin(DatabaseInstance &db) { - FunctionBuilder::RegisterScalar(db, "ST_Within", [](ScalarFunctionBuilder &func) { - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); - variant.SetReturnType(LogicalType::BOOLEAN); - variant.SetFunction(WithinFunction); - variant.SetInit(GEOSFunctionLocalState::Init); - - variant.SetExample(DOC_EXAMPLE); - variant.SetDescription(DOC_DESCRIPTION); - }); - - func.SetTag("ext", "spatial"); - func.SetTag("category", "relation"); - }); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/geos/geos_wrappers.cpp b/spatial/src/spatial/geos/geos_wrappers.cpp deleted file mode 100644 index 2605a261..00000000 --- a/spatial/src/spatial/geos/geos_wrappers.cpp +++ /dev/null @@ -1,593 +0,0 @@ -#include "spatial/common.hpp" -#include "spatial/geos/geos_wrappers.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/util/cursor.hpp" -#include "spatial/core/util/math.hpp" -#include "spatial/core/geometry/geometry_processor.hpp" - -namespace spatial { - -namespace geos { - -using namespace core; - -//------------------------------------------------------------------------------ -// Deserialize -//------------------------------------------------------------------------------ - -template -bool IsPointerAligned(const void *ptr) { - auto uintptr = reinterpret_cast(ptr); - return (uintptr % alignof(T)) == 0; -} - -class GEOSDeserializer final : GeometryProcessor { -private: - GEOSContextHandle_t ctx; - vector aligned_buffer; - -private: - GEOSCoordSeq_t *HandleVertexData(const VertexData &vertices) { - auto n_dims = 2 + (HasZ() ? 1 : 0) + (HasM() ? 1 : 0); - auto vertex_size = sizeof(double) * n_dims; - - // We know that the data is interleaved :^) - auto data = vertices.data[0]; - auto count = vertices.count; - - if (HasZ()) { - // GEOS does a memcpy in this case, so we can pass the buffer directly even if it's not aligned - return GEOSCoordSeq_copyFromBuffer_r(ctx, reinterpret_cast(data), count, HasZ(), HasM()); - } else { - auto data_ptr = data; - auto vertex_data = reinterpret_cast(data_ptr); - if (!IsPointerAligned(data_ptr)) { - // If the pointer is not aligned we need to copy the data to an aligned buffer before passing it to GEOS - aligned_buffer.clear(); - aligned_buffer.resize(count * n_dims); - memcpy(aligned_buffer.data(), data_ptr, count * vertex_size); - vertex_data = aligned_buffer.data(); - } - - return GEOSCoordSeq_copyFromBuffer_r(ctx, vertex_data, count, HasZ(), HasM()); - } - } - - GEOSGeometry *ProcessPoint(const VertexData &data) override { - if (data.IsEmpty()) { - return GEOSGeom_createEmptyPoint_r(ctx); - } else { - auto seq = HandleVertexData(data); - return GEOSGeom_createPoint_r(ctx, seq); - } - } - - GEOSGeometry *ProcessLineString(const VertexData &data) override { - if (data.IsEmpty()) { - return GEOSGeom_createEmptyLineString_r(ctx); - } else { - auto seq = HandleVertexData(data); - return GEOSGeom_createLineString_r(ctx, seq); - } - } - - GEOSGeometry *ProcessPolygon(PolygonState &state) override { - auto num_rings = state.RingCount(); - if (num_rings == 0) { - return GEOSGeom_createEmptyPolygon_r(ctx); - } else { - // TODO: Make a vector here instead of using new - auto geoms = new GEOSGeometry *[num_rings]; - for (uint32_t i = 0; i < num_rings; i++) { - auto vertices = state.Next(); - auto seq = HandleVertexData(vertices); - geoms[i] = GEOSGeom_createLinearRing_r(ctx, seq); - } - auto result = GEOSGeom_createPolygon_r(ctx, geoms[0], geoms + 1, num_rings - 1); - delete[] geoms; - return result; - } - } - - GEOSGeometry *ProcessCollection(CollectionState &state) override { - GEOSGeomTypes collection_type = GEOS_GEOMETRYCOLLECTION; - switch (CurrentType()) { - case GeometryType::MULTIPOINT: - collection_type = GEOS_MULTIPOINT; - break; - case GeometryType::MULTILINESTRING: - collection_type = GEOS_MULTILINESTRING; - break; - case GeometryType::MULTIPOLYGON: - collection_type = GEOS_MULTIPOLYGON; - break; - default: - break; - } - auto item_count = state.ItemCount(); - if (item_count == 0) { - return GEOSGeom_createEmptyCollection_r(ctx, collection_type); - } else { - auto geoms = new GEOSGeometry *[item_count]; - for (uint32_t i = 0; i < item_count; i++) { - geoms[i] = state.Next(); - } - auto result = GEOSGeom_createCollection_r(ctx, collection_type, geoms, item_count); - delete[] geoms; - return result; - } - } - -public: - explicit GEOSDeserializer(GEOSContextHandle_t ctx) : ctx(ctx) { - } - - GeometryPtr Execute(const geometry_t &geom) { - return GeometryPtr {Process(geom)}; - } -}; - -GEOSGeometry *DeserializeGEOSGeometry(const geometry_t &blob, GEOSContextHandle_t ctx) { - GEOSDeserializer deserializer(ctx); - return deserializer.Execute(blob).release(); -} - -GeometryPtr GeosContextWrapper::Deserialize(const geometry_t &blob) { - GEOSDeserializer deserializer(ctx); - return deserializer.Execute(blob); -} - -//------------------------------------------------------------------- -// Serialize -//------------------------------------------------------------------- -static uint32_t GetSerializedSize(const GEOSGeometry *geom, const GEOSContextHandle_t ctx) { - auto type = GEOSGeomTypeId_r(ctx, geom); - bool has_z = GEOSHasZ_r(ctx, geom); - bool has_m = GEOSHasM_r(ctx, geom); - - auto vertex_size = sizeof(double) * (2 + (has_z ? 1 : 0) + (has_m ? 1 : 0)); - - switch (type) { - case GEOS_POINT: { - // 4 bytes for type, - // 4 bytes for num points, - // vertex_size bytes for data if not empty - bool empty = GEOSisEmpty_r(ctx, geom); - return 4 + 4 + (empty ? 0 : vertex_size); - } - case GEOS_LINESTRING: { - // 4 bytes for type, - // 4 bytes for num points, - // vertex_size bytes per point - auto seq = GEOSGeom_getCoordSeq_r(ctx, geom); - uint32_t count; - GEOSCoordSeq_getSize_r(ctx, seq, &count); - return 4 + 4 + count * vertex_size; - } - case GEOS_POLYGON: { - // 4 bytes for type, - // 4 bytes for num rings - // 4 bytes for num points in shell, - // vertex_size bytes per point in shell, - // 4 bytes for num holes, - // 4 bytes for num points in hole, - // vertex_size bytes per point in hole - // 4 bytes padding if (shell + holes) % 2 == 1 - uint32_t size = 4 + 4; - auto shell = GEOSGetExteriorRing_r(ctx, geom); - auto seq = GEOSGeom_getCoordSeq_r(ctx, shell); - uint32_t count; - GEOSCoordSeq_getSize_r(ctx, seq, &count); - size += 4 + (count * vertex_size); - auto num_holes = GEOSGetNumInteriorRings_r(ctx, geom); - for (uint32_t i = 0; i < num_holes; i++) { - auto hole = GEOSGetInteriorRingN_r(ctx, geom, i); - auto seq = GEOSGeom_getCoordSeq_r(ctx, hole); - uint32_t count; - GEOSCoordSeq_getSize_r(ctx, seq, &count); - size += 4 + (count * vertex_size); - } - - if ((num_holes + 1) % 2 == 1) { - size += 4; - } - - return size; - } - case GEOS_MULTIPOINT: { - // 4 bytes for type, - // 4 bytes for num points, - // x bytes per point - auto size = 4 + 4; - auto num_points = GEOSGetNumGeometries_r(ctx, geom); - for (uint32_t i = 0; i < num_points; i++) { - auto point = GEOSGetGeometryN_r(ctx, geom, i); - size += GetSerializedSize(point, ctx); - } - return size; - } - case GEOS_MULTILINESTRING: { - // 4 bytes for type, - // 4 bytes for num lines, - // x bytes per line - auto size = 4 + 4; - auto num_lines = GEOSGetNumGeometries_r(ctx, geom); - for (uint32_t i = 0; i < num_lines; i++) { - auto line = GEOSGetGeometryN_r(ctx, geom, i); - size += GetSerializedSize(line, ctx); - } - return size; - } - case GEOS_MULTIPOLYGON: { - // 4 bytes for type, - // 4 bytes for num polygons, - // x bytes per polygon - auto size = 4 + 4; - auto num_polygons = GEOSGetNumGeometries_r(ctx, geom); - for (uint32_t i = 0; i < num_polygons; i++) { - auto polygon = GEOSGetGeometryN_r(ctx, geom, i); - size += GetSerializedSize(polygon, ctx); - } - return size; - } - case GEOS_GEOMETRYCOLLECTION: { - // 4 bytes for type, - // 4 bytes for num geoms, - // x bytes per geom - auto size = 4 + 4; - auto num_geoms = GEOSGetNumGeometries_r(ctx, geom); - for (uint32_t i = 0; i < num_geoms; i++) { - auto subgeom = GEOSGetGeometryN_r(ctx, geom, i); - size += GetSerializedSize(subgeom, ctx); - } - return size; - } - default: { - throw NotImplementedException(StringUtil::Format("GEOS SerializedSize: Geometry type %d not supported", type)); - } - } -} - -static void SerializeGeometry(Cursor &writer, const GEOSGeometry *geom, const GEOSContextHandle_t ctx); - -static void SerializeCoordSeq(Cursor &writer, const GEOSCoordSequence *seq, bool has_z, bool has_m, uint32_t count, - const GEOSContextHandle_t ctx) { - GEOSCoordSeq_copyToBuffer_r(ctx, seq, reinterpret_cast(writer.GetPtr()), has_z, has_m); - auto vertex_size = sizeof(double) * (2 + (has_z ? 1 : 0) + (has_m ? 1 : 0)); - writer.Skip(count * vertex_size); -} - -static void SerializePoint(Cursor &writer, const GEOSGeometry *geom, const GEOSContextHandle_t ctx) { - writer.Write((uint32_t)GeometryType::POINT); - - if (GEOSisEmpty_r(ctx, geom)) { - writer.Write(0); - return; - } - auto has_z = GEOSHasZ_r(ctx, geom); - auto has_m = GEOSHasM_r(ctx, geom); - auto seq = GEOSGeom_getCoordSeq_r(ctx, geom); - uint32_t count; - GEOSCoordSeq_getSize_r(ctx, seq, &count); - writer.Write(count); - SerializeCoordSeq(writer, seq, has_z, has_m, count, ctx); -} - -static void SerializeLineString(Cursor &writer, const GEOSGeometry *geom, const GEOSContextHandle_t ctx) { - writer.Write((uint32_t)GeometryType::LINESTRING); - if (GEOSisEmpty_r(ctx, geom)) { - writer.Write(0); - return; - } - auto has_z = GEOSHasZ_r(ctx, geom); - auto has_m = GEOSHasM_r(ctx, geom); - auto seq = GEOSGeom_getCoordSeq_r(ctx, geom); - uint32_t count; - GEOSCoordSeq_getSize_r(ctx, seq, &count); - writer.Write(count); - SerializeCoordSeq(writer, seq, has_z, has_m, count, ctx); -} - -static void SerializePolygon(Cursor &writer, const GEOSGeometry *geom, const GEOSContextHandle_t ctx) { - // TODO: check this - writer.Write((uint32_t)GeometryType::POLYGON); - - // Write number of rings - if (GEOSisEmpty_r(ctx, geom)) { - writer.Write(0); - return; - } - - uint32_t num_holes = GEOSGetNumInteriorRings_r(ctx, geom); - writer.Write(num_holes + 1); // +1 for the shell - - // Get shell - auto shell = GEOSGetExteriorRing_r(ctx, geom); - auto shell_seq = GEOSGeom_getCoordSeq_r(ctx, shell); - - // First pass, write all ring counts (including shell) - // Start with shell - uint32_t shell_count; - GEOSCoordSeq_getSize_r(ctx, shell_seq, &shell_count); - writer.Write(shell_count); - - // Then write all holes - for (uint32_t i = 0; i < num_holes; i++) { - auto ring = GEOSGetInteriorRingN_r(ctx, geom, i); - auto ring_seq = GEOSGeom_getCoordSeq_r(ctx, ring); - uint32_t ring_count; - GEOSCoordSeq_getSize_r(ctx, ring_seq, &ring_count); - writer.Write(ring_count); - } - - // If rings are odd, add padding - if ((num_holes + 1) % 2 == 1) { - writer.Write(0); - } - - // Second pass, write data for each ring - bool has_z = GEOSHasZ_r(ctx, geom); - bool has_m = GEOSHasM_r(ctx, geom); - // Start with shell - SerializeCoordSeq(writer, shell_seq, has_z, has_m, shell_count, ctx); - - // Then write each hole - for (uint32_t i = 0; i < num_holes; i++) { - auto ring = GEOSGetInteriorRingN_r(ctx, geom, i); - auto ring_seq = GEOSGeom_getCoordSeq_r(ctx, ring); - uint32_t ring_count; - - GEOSCoordSeq_getSize_r(ctx, ring_seq, &ring_count); - SerializeCoordSeq(writer, ring_seq, has_z, has_m, ring_count, ctx); - } -} - -static void SerializeMultiPoint(Cursor &writer, const GEOSGeometry *geom, const GEOSContextHandle_t ctx) { - writer.Write((uint32_t)GeometryType::MULTIPOINT); - uint32_t num_points = GEOSGetNumGeometries_r(ctx, geom); - writer.Write(num_points); - for (uint32_t i = 0; i < num_points; i++) { - auto point = GEOSGetGeometryN_r(ctx, geom, i); - SerializePoint(writer, point, ctx); - } -} - -static void SerializeMultiLineString(Cursor &writer, const GEOSGeometry *geom, const GEOSContextHandle_t ctx) { - writer.Write((uint32_t)GeometryType::MULTILINESTRING); - uint32_t num_linestrings = GEOSGetNumGeometries_r(ctx, geom); - writer.Write(num_linestrings); - for (uint32_t i = 0; i < num_linestrings; i++) { - auto linestring = GEOSGetGeometryN_r(ctx, geom, i); - SerializeLineString(writer, linestring, ctx); - } -} - -static void SerializeMultiPolygon(Cursor &writer, const GEOSGeometry *geom, const GEOSContextHandle_t ctx) { - writer.Write((uint32_t)GeometryType::MULTIPOLYGON); - uint32_t num_polygons = GEOSGetNumGeometries_r(ctx, geom); - writer.Write(num_polygons); - for (uint32_t i = 0; i < num_polygons; i++) { - auto polygon = GEOSGetGeometryN_r(ctx, geom, i); - SerializePolygon(writer, polygon, ctx); - } -} - -static void SerializeGeometryCollection(Cursor &writer, const GEOSGeometry *geom, const GEOSContextHandle_t ctx) { - writer.Write((uint32_t)GeometryType::GEOMETRYCOLLECTION); - uint32_t num_geometries = GEOSGetNumGeometries_r(ctx, geom); - writer.Write(num_geometries); - for (uint32_t i = 0; i < num_geometries; i++) { - auto geometry = GEOSGetGeometryN_r(ctx, geom, i); - SerializeGeometry(writer, geometry, ctx); - } -} - -static void SerializeGeometry(Cursor &writer, const GEOSGeometry *geom, const GEOSContextHandle_t ctx) { - auto type = GEOSGeomTypeId_r(ctx, geom); - switch (type) { - case GEOS_POINT: - SerializePoint(writer, geom, ctx); - break; - case GEOS_LINESTRING: - SerializeLineString(writer, geom, ctx); - break; - case GEOS_POLYGON: - SerializePolygon(writer, geom, ctx); - break; - case GEOS_MULTIPOINT: - SerializeMultiPoint(writer, geom, ctx); - break; - case GEOS_MULTILINESTRING: - SerializeMultiLineString(writer, geom, ctx); - break; - case GEOS_MULTIPOLYGON: - SerializeMultiPolygon(writer, geom, ctx); - break; - case GEOS_GEOMETRYCOLLECTION: - SerializeGeometryCollection(writer, geom, ctx); - break; - default: - throw NotImplementedException(StringUtil::Format("GEOS Serialize: Geometry type %d not supported", type)); - } -} - -template -static void GetExtendedExtent(const GEOSCoordSeq_t *seq, double *zmin, double *zmax, double *mmin, double *mmax, - GEOSContextHandle_t ctx) { - uint32_t size; - GEOSCoordSeq_getSize_r(ctx, seq, &size); - if (size > 2) { - if (HAS_Z && HAS_M) { - double z, m; - GEOSCoordSeq_getOrdinate_r(ctx, seq, 0, 2, &z); - GEOSCoordSeq_getOrdinate_r(ctx, seq, 0, 3, &m); - *zmin = std::min(z, *zmin); - *zmax = std::max(z, *zmax); - *mmin = std::min(m, *mmin); - *mmax = std::max(m, *mmax); - } else if (HAS_Z) { - double z; - GEOSCoordSeq_getOrdinate_r(ctx, seq, 0, 2, &z); - *zmin = std::min(z, *zmin); - *zmax = std::max(z, *zmax); - } else if (HAS_M) { - double m; - GEOSCoordSeq_getOrdinate_r(ctx, seq, 0, 2, &m); - *mmin = std::min(m, *mmin); - *mmax = std::max(m, *mmax); - } - } -} - -template -static void GetExtendedExtent(const GEOSGeometry *geom, double *zmin, double *zmax, double *mmin, double *mmax, - GEOSContextHandle_t ctx) { - auto geos_type = GEOSGeomTypeId_r(ctx, geom); - switch (geos_type) { - case GEOS_POINT: - case GEOS_LINESTRING: { - auto seq = GEOSGeom_getCoordSeq_r(ctx, geom); - GetExtendedExtent(seq, zmin, zmax, mmin, mmax, ctx); - break; - } - case GEOS_POLYGON: { - auto shell = GEOSGetExteriorRing_r(ctx, geom); - auto seq = GEOSGeom_getCoordSeq_r(ctx, shell); - GetExtendedExtent(seq, zmin, zmax, mmin, mmax, ctx); - auto num_holes = GEOSGetNumInteriorRings_r(ctx, geom); - for (auto i = 0; i < num_holes; i++) { - auto hole = GEOSGetInteriorRingN_r(ctx, geom, i); - auto rseq = GEOSGeom_getCoordSeq_r(ctx, hole); - GetExtendedExtent(rseq, zmin, zmax, mmin, mmax, ctx); - } - break; - } - case GEOS_MULTIPOINT: - case GEOS_MULTILINESTRING: - case GEOS_MULTIPOLYGON: - case GEOS_GEOMETRYCOLLECTION: { - auto num_polygons = GEOSGetNumGeometries_r(ctx, geom); - for (auto i = 0; i < num_polygons; i++) { - auto polygon = GEOSGetGeometryN_r(ctx, geom, i); - GetExtendedExtent(polygon, zmin, zmax, mmin, mmax, ctx); - } - break; - } - default: - throw NotImplementedException(StringUtil::Format("GEOS Serialize: Geometry type %d not supported", geos_type)); - } -} - -static void GetExtendedExtent(const GEOSGeometry *geom, double *zmin, double *zmax, double *mmin, double *mmax, - GEOSContextHandle_t ctx) { - *zmin = std::numeric_limits::max(); - *zmax = std::numeric_limits::lowest(); - *mmin = std::numeric_limits::max(); - *mmax = std::numeric_limits::lowest(); - auto has_z = GEOSHasZ_r(ctx, geom); - auto has_m = GEOSHasM_r(ctx, geom); - if (has_z && has_m) { - GetExtendedExtent(geom, zmin, zmax, mmin, mmax, ctx); - } else if (has_z) { - GetExtendedExtent(geom, zmin, zmax, mmin, mmax, ctx); - } else if (has_m) { - GetExtendedExtent(geom, zmin, zmax, mmin, mmax, ctx); - } else { - GetExtendedExtent(geom, zmin, zmax, mmin, mmax, ctx); - } -} - -geometry_t SerializeGEOSGeometry(Vector &result, const GEOSGeometry *geom, GEOSContextHandle_t ctx) { - - GeometryType type; - auto geos_type = GEOSGeomTypeId_r(ctx, geom); - switch (geos_type) { - case GEOS_POINT: - type = GeometryType::POINT; - break; - case GEOS_LINESTRING: - type = GeometryType::LINESTRING; - break; - case GEOS_POLYGON: - type = GeometryType::POLYGON; - break; - case GEOS_MULTIPOINT: - type = GeometryType::MULTIPOINT; - break; - case GEOS_MULTILINESTRING: - type = GeometryType::MULTILINESTRING; - break; - case GEOS_MULTIPOLYGON: - type = GeometryType::MULTIPOLYGON; - break; - case GEOS_GEOMETRYCOLLECTION: - type = GeometryType::GEOMETRYCOLLECTION; - break; - default: - throw NotImplementedException( - StringUtil::Format("GEOS Wrapper Serialize: Geometry type %d not supported", geos_type)); - } - - bool has_bbox = type != GeometryType::POINT && GEOSisEmpty_r(ctx, geom) == 0; - bool has_z = GEOSHasZ_r(ctx, geom); - bool has_m = GEOSHasM_r(ctx, geom); - - auto bbox_size = has_bbox ? (sizeof(float) * 2 * (2 + (has_z ? 1 : 0) + (has_m ? 1 : 0))) : 0; - - auto size = GetSerializedSize(geom, ctx); - size += 4; // Header - size += sizeof(uint32_t); // Padding - size += bbox_size; // BBox - - auto blob = StringVector::EmptyString(result, size); - Cursor writer(blob); - - uint16_t hash = 0; - - GeometryProperties properties; - properties.SetBBox(has_bbox); - properties.SetZ(GEOSHasZ_r(ctx, geom)); - properties.SetM(GEOSHasM_r(ctx, geom)); - writer.Write(type); // Type - writer.Write(properties); // Properties - writer.Write(hash); // Hash - writer.Write(0); // Padding - - // If the geom is not a point, write the bounding box - if (has_bbox) { - double minx, maxx, miny, maxy; - GEOSGeom_getExtent_r(ctx, geom, &minx, &miny, &maxx, &maxy); - writer.Write(MathUtil::DoubleToFloatDown(minx)); - writer.Write(MathUtil::DoubleToFloatDown(miny)); - writer.Write(MathUtil::DoubleToFloatUp(maxx)); - writer.Write(MathUtil::DoubleToFloatUp(maxy)); - - // well, this sucks. GEOS doesnt have a native way to get the Z and M value extents. - if (has_z || has_m) { - double minz, maxz, minm, maxm; - GetExtendedExtent(geom, &minz, &maxz, &minm, &maxm, ctx); - if (has_z) { - writer.Write(MathUtil::DoubleToFloatDown(minz)); - writer.Write(MathUtil::DoubleToFloatUp(maxz)); - } - if (has_m) { - writer.Write(MathUtil::DoubleToFloatDown(minm)); - writer.Write(MathUtil::DoubleToFloatUp(maxm)); - } - } - } - - SerializeGeometry(writer, geom, ctx); - - blob.Finalize(); - - return geometry_t(blob); -} - -geometry_t GeosContextWrapper::Serialize(Vector &result, const GeometryPtr &geom) { - return SerializeGEOSGeometry(result, geom.get(), ctx); -} - -} // namespace geos - -} // namespace spatial \ No newline at end of file diff --git a/spatial/src/spatial/geos/module.cpp b/spatial/src/spatial/geos/module.cpp deleted file mode 100644 index ad27e0d1..00000000 --- a/spatial/src/spatial/geos/module.cpp +++ /dev/null @@ -1,20 +0,0 @@ -#include "spatial/geos/module.hpp" -#include "spatial/geos/functions/aggregate.hpp" -#include "spatial/geos/functions/scalar.hpp" -#include "spatial/geos/functions/cast.hpp" - -#include "spatial/common.hpp" - -namespace spatial { - -namespace geos { - -void GeosModule::Register(DatabaseInstance &db) { - GEOSScalarFunctions::Register(db); - GeosAggregateFunctions::Register(db); - GeosCastFunctions::Register(db); -} - -} // namespace geos - -} // namespace spatial diff --git a/spatial/src/spatial/proj/functions.cpp b/spatial/src/spatial/proj/functions.cpp deleted file mode 100644 index 9a7b962c..00000000 --- a/spatial/src/spatial/proj/functions.cpp +++ /dev/null @@ -1,520 +0,0 @@ -#include "duckdb/common/vector_operations/generic_executor.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/parser/parsed_data/create_table_function_info.hpp" -#include "duckdb/parser/parsed_data/create_view_info.hpp" -#include "duckdb/execution/expression_executor.hpp" -#include "duckdb/planner/expression/bound_function_expression.hpp" - -#include "spatial/common.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/proj/functions.hpp" -#include "spatial/proj/module.hpp" - -#include "proj.h" - -namespace spatial { - -namespace proj { - -using namespace core; - -struct ProjFunctionLocalState : public FunctionLocalState { - - PJ_CONTEXT *proj_ctx; - ArenaAllocator arena; - - explicit ProjFunctionLocalState(ClientContext &context) - : proj_ctx(ProjModule::GetThreadProjContext()), arena(BufferAllocator::Get(context)) { - } - - ~ProjFunctionLocalState() override { - proj_context_destroy(proj_ctx); - } - - static unique_ptr Init(ExpressionState &state, const BoundFunctionExpression &expr, - FunctionData *bind_data) { - auto result = make_uniq(state.GetContext()); - return std::move(result); - } - - static ProjFunctionLocalState &ResetAndGet(ExpressionState &state) { - auto &local_state = (ProjFunctionLocalState &)*ExecuteFunctionState::GetFunctionState(state); - local_state.arena.Reset(); - return local_state; - } -}; - -struct TransformFunctionData : FunctionData { - - // Whether or not to always return XY coordinates, even when the CRS has a different axis order. - bool conventional_gis_order = false; - - unique_ptr Copy() const override { - auto result = make_uniq(); - result->conventional_gis_order = conventional_gis_order; - return std::move(result); - } - bool Equals(const FunctionData &other) const override { - auto &data = other.Cast(); - return conventional_gis_order == data.conventional_gis_order; - } -}; - -static unique_ptr TransformBind(ClientContext &context, ScalarFunction &bound_function, - vector> &arguments) { - - auto result = make_uniq(); - if (arguments.size() == 4) { - // Ensure the "always_xy" parameter is a constant - auto &arg = arguments[3]; - if (arg->HasParameter()) { - throw InvalidInputException("The 'always_xy' parameter must be a constant"); - } - if (!arg->IsFoldable()) { - throw InvalidInputException("The 'always_xy' parameter must be a constant"); - } - result->conventional_gis_order = BooleanValue::Get(ExpressionExecutor::EvaluateScalar(context, *arg)); - } - return std::move(result); -} - -static void Box2DTransformFunction(DataChunk &args, ExpressionState &state, Vector &result) { - using BOX_TYPE = StructTypeQuaternary; - using PROJ_TYPE = PrimitiveType; - - auto count = args.size(); - auto &box = args.data[0]; - auto &proj_from = args.data[1]; - auto &proj_to = args.data[2]; - - auto &local_state = ProjFunctionLocalState::ResetAndGet(state); - auto &proj_ctx = local_state.proj_ctx; - auto &func_expr = state.expr.Cast(); - auto &info = func_expr.bind_info->Cast(); - - if (proj_from.GetVectorType() == VectorType::CONSTANT_VECTOR && - proj_to.GetVectorType() == VectorType::CONSTANT_VECTOR && !ConstantVector::IsNull(proj_from) && - !ConstantVector::IsNull(proj_to)) { - // Special case: both projections are constant, so we can create the projection once and reuse it - auto from_str = ConstantVector::GetData(proj_from)[0].val.GetString(); - auto to_str = ConstantVector::GetData(proj_to)[0].val.GetString(); - - auto crs = proj_create_crs_to_crs(proj_ctx, from_str.c_str(), to_str.c_str(), nullptr); - if (!crs) { - throw InvalidInputException("Could not create projection: " + from_str + " -> " + to_str); - } - - if (info.conventional_gis_order) { - auto normalized_crs = proj_normalize_for_visualization(proj_ctx, crs); - if (normalized_crs) { - proj_destroy(crs); - crs = normalized_crs; - } - // otherwise fall back to the original CRS - } - - GenericExecutor::ExecuteUnary(box, result, count, [&](BOX_TYPE box_in) { - BOX_TYPE box_out; - int densify_pts = 0; - proj_trans_bounds(proj_ctx, crs, PJ_FWD, box_in.a_val, box_in.b_val, box_in.c_val, box_in.d_val, - &box_out.a_val, &box_out.b_val, &box_out.c_val, &box_out.d_val, densify_pts); - return box_out; - }); - - proj_destroy(crs); - } else { - GenericExecutor::ExecuteTernary( - box, proj_from, proj_to, result, count, [&](BOX_TYPE box_in, PROJ_TYPE proj_from, PROJ_TYPE proj_to) { - auto from_str = proj_from.val.GetString(); - auto to_str = proj_to.val.GetString(); - - auto crs = proj_create_crs_to_crs(nullptr, from_str.c_str(), to_str.c_str(), nullptr); - if (!crs) { - throw InvalidInputException("Could not create projection: " + from_str + " -> " + to_str); - } - - if (info.conventional_gis_order) { - auto normalized_crs = proj_normalize_for_visualization(proj_ctx, crs); - if (normalized_crs) { - proj_destroy(crs); - crs = normalized_crs; - } - // otherwise fall back to the original CRS - } - - // TODO: this may be interesting to use, but at that point we can only return a BOX_TYPE - int densify_pts = 0; - BOX_TYPE box_out; - proj_trans_bounds(proj_ctx, crs, PJ_FWD, box_in.a_val, box_in.b_val, box_in.c_val, box_in.d_val, - &box_out.a_val, &box_out.b_val, &box_out.c_val, &box_out.d_val, densify_pts); - - proj_destroy(crs); - - return box_out; - }); - } -} - -static void Point2DTransformFunction(DataChunk &args, ExpressionState &state, Vector &result) { - using POINT_TYPE = StructTypeBinary; - using PROJ_TYPE = PrimitiveType; - - auto count = args.size(); - auto &point = args.data[0]; - auto &proj_from = args.data[1]; - auto &proj_to = args.data[2]; - - auto &local_state = ProjFunctionLocalState::ResetAndGet(state); - auto &proj_ctx = local_state.proj_ctx; - auto &func_expr = state.expr.Cast(); - auto &info = func_expr.bind_info->Cast(); - - if (proj_from.GetVectorType() == VectorType::CONSTANT_VECTOR && - proj_to.GetVectorType() == VectorType::CONSTANT_VECTOR && !ConstantVector::IsNull(proj_from) && - !ConstantVector::IsNull(proj_to)) { - // Special case: both projections are constant, so we can create the projection once and reuse it - auto from_str = ConstantVector::GetData(proj_from)[0].val.GetString(); - auto to_str = ConstantVector::GetData(proj_to)[0].val.GetString(); - - auto crs = proj_create_crs_to_crs(proj_ctx, from_str.c_str(), to_str.c_str(), nullptr); - if (!crs) { - throw InvalidInputException("Could not create projection: " + from_str + " -> " + to_str); - } - - if (info.conventional_gis_order) { - auto normalized_crs = proj_normalize_for_visualization(proj_ctx, crs); - if (normalized_crs) { - proj_destroy(crs); - crs = normalized_crs; - } - // otherwise fall back to the original CRS - } - - GenericExecutor::ExecuteUnary(point, result, count, [&](POINT_TYPE point_in) { - POINT_TYPE point_out; - auto transformed = proj_trans(crs, PJ_FWD, proj_coord(point_in.a_val, point_in.b_val, 0, 0)).xy; - point_out.a_val = transformed.x; - point_out.b_val = transformed.y; - return point_out; - }); - proj_destroy(crs); - - } else { - GenericExecutor::ExecuteTernary( - point, proj_from, proj_to, result, count, [&](POINT_TYPE point_in, PROJ_TYPE proj_from, PROJ_TYPE proj_to) { - auto from_str = proj_from.val.GetString(); - auto to_str = proj_to.val.GetString(); - - auto crs = proj_create_crs_to_crs(proj_ctx, from_str.c_str(), to_str.c_str(), nullptr); - if (!crs) { - throw InvalidInputException("Could not create projection: " + from_str + " -> " + to_str); - } - - if (info.conventional_gis_order) { - auto normalized_crs = proj_normalize_for_visualization(proj_ctx, crs); - if (normalized_crs) { - proj_destroy(crs); - crs = normalized_crs; - } - // otherwise fall back to the original CRS - } - - POINT_TYPE point_out; - auto transformed = proj_trans(crs, PJ_FWD, proj_coord(point_in.a_val, point_in.b_val, 0, 0)).xy; - point_out.a_val = transformed.x; - point_out.b_val = transformed.y; - - proj_destroy(crs); - - return point_out; - }); - } -} - -struct TransformOp { - static void Case(Geometry::Tags::SinglePartGeometry, Geometry &geom, PJ *crs, ArenaAllocator &arena) { - SinglePartGeometry::MakeMutable(geom, arena); - for (uint32_t i = 0; i < geom.Count(); i++) { - auto vertex = SinglePartGeometry::GetVertex(geom, i); - auto transformed = proj_trans(crs, PJ_FWD, proj_coord(vertex.x, vertex.y, 0, 0)).xy; - // we own the array, so we can use SetUnsafe - SinglePartGeometry::SetVertex(geom, i, {transformed.x, transformed.y}); - } - } - static void Case(Geometry::Tags::MultiPartGeometry, Geometry &geom, PJ *crs, ArenaAllocator &arena) { - for (auto &part : MultiPartGeometry::Parts(geom)) { - Geometry::Match(part, crs, arena); - } - } -}; - -struct ProjCRSDelete { - void operator()(PJ *crs) { - proj_destroy(crs); - } -}; - -using ProjCRS = unique_ptr; - -static void GeometryTransformFunction(DataChunk &args, ExpressionState &state, Vector &result) { - auto count = args.size(); - auto &geom_vec = args.data[0]; - auto &proj_from_vec = args.data[1]; - auto &proj_to_vec = args.data[2]; - - auto &local_state = ProjFunctionLocalState::ResetAndGet(state); - - auto &func_expr = state.expr.Cast(); - auto &info = func_expr.bind_info->Cast(); - - auto &proj_ctx = local_state.proj_ctx; - auto &arena = local_state.arena; - - if (proj_from_vec.GetVectorType() == VectorType::CONSTANT_VECTOR && - proj_to_vec.GetVectorType() == VectorType::CONSTANT_VECTOR && !ConstantVector::IsNull(proj_from_vec) && - !ConstantVector::IsNull(proj_to_vec)) { - // Special case: both projections are constant (very common) - // we can create the projection once and reuse it for all geometries - - // TODO: In the future we can cache the projections in the state instead. - - auto from_str = ConstantVector::GetData(proj_from_vec)[0].GetString(); - auto to_str = ConstantVector::GetData(proj_to_vec)[0].GetString(); - auto crs = ProjCRS(proj_create_crs_to_crs(proj_ctx, from_str.c_str(), to_str.c_str(), nullptr)); - if (!crs.get()) { - throw InvalidInputException("Could not create projection: " + from_str + " -> " + to_str); - } - - if (info.conventional_gis_order) { - auto normalized_crs = proj_normalize_for_visualization(proj_ctx, crs.get()); - if (normalized_crs) { - crs = ProjCRS(normalized_crs); - } - // otherwise fall back to the original CRS - } - - UnaryExecutor::Execute(geom_vec, result, count, [&](geometry_t input_geom) { - auto geom = Geometry::Deserialize(arena, input_geom); - Geometry::Match(geom, crs.get(), arena); - return Geometry::Serialize(geom, result); - }); - } else { - // General case: projections are not constant - // we need to create a projection for each geometry - TernaryExecutor::Execute( - geom_vec, proj_from_vec, proj_to_vec, result, count, - [&](geometry_t input_geom, string_t proj_from, string_t proj_to) { - auto from_str = proj_from.GetString(); - auto to_str = proj_to.GetString(); - auto crs = ProjCRS(proj_create_crs_to_crs(proj_ctx, from_str.c_str(), to_str.c_str(), nullptr)); - - if (!crs.get()) { - throw InvalidInputException("Could not create projection: " + from_str + " -> " + to_str); - } - - if (info.conventional_gis_order) { - auto normalized_crs = proj_normalize_for_visualization(proj_ctx, crs.get()); - if (normalized_crs) { - crs = ProjCRS(normalized_crs); - } - // otherwise fall back to the original CRS - } - - auto geom = Geometry::Deserialize(arena, input_geom); - Geometry::Match(geom, crs.get(), arena); - return Geometry::Serialize(geom, result); - }); - } -} - -// SPATIAL_REF_SYS table function -struct GenerateSpatialRefSysTable { - - struct State : public GlobalTableFunctionState { - idx_t current_idx; - State() : current_idx(0) { - } - }; - - static unique_ptr Bind(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names); - - static unique_ptr Init(ClientContext &context, TableFunctionInitInput &input); - - static void Execute(ClientContext &context, TableFunctionInput &data_p, DataChunk &output); - - static void Register(DatabaseInstance &db); -}; - -unique_ptr GenerateSpatialRefSysTable::Bind(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names) { - - names.push_back("auth_name"); - return_types.push_back(LogicalType::VARCHAR); - names.push_back("code"); - return_types.push_back(LogicalType::VARCHAR); - names.push_back("name"); - return_types.push_back(LogicalType::VARCHAR); - names.push_back("type"); - return_types.push_back(LogicalType::VARCHAR); // TODO: this should maybe be an enum? - names.push_back("deprecated"); - return_types.push_back(LogicalType::BOOLEAN); - - // TODO: output BBOX here as well as BOX_2D (or null!) - - names.push_back("area_name"); - return_types.push_back(LogicalType::VARCHAR); - - names.push_back("projection_method_name"); - return_types.push_back(LogicalType::VARCHAR); - - names.push_back("celestial_body_name"); - return_types.push_back(LogicalType::VARCHAR); - - return nullptr; -} - -unique_ptr GenerateSpatialRefSysTable::Init(ClientContext &context, - TableFunctionInitInput &input) { - auto result = make_uniq(); - return std::move(result); -} - -void GenerateSpatialRefSysTable::Execute(ClientContext &context, TableFunctionInput &input, DataChunk &output) { - // TODO: This is a lot slower than it has to be, ideally we only do one call to proj_get_crs_info_list - // and return the whole list in one go. - auto &state = (State &)*input.global_state; - int result_count = 0; - auto crs_list = proj_get_crs_info_list_from_database(nullptr, nullptr, nullptr, &result_count); - - idx_t count = 0; - auto next_idx = MinValue(state.current_idx + STANDARD_VECTOR_SIZE, result_count); - - // TODO: this just returns the crs info, not a spatial_ref_sys table that follows the schema. - for (idx_t i = state.current_idx; i < next_idx; i++) { - auto proj = crs_list[i]; - output.SetValue(0, count, Value(proj->auth_name)); - output.SetValue(1, count, Value(proj->code)); - output.SetValue(2, count, Value(proj->name)); - output.SetValue(3, count, Value(proj->type)); - output.SetValue(4, count, Value(proj->deprecated)); - output.SetValue(5, count, Value(proj->area_name)); - output.SetValue(6, count, Value(proj->projection_method_name)); - output.SetValue(7, count, Value(proj->celestial_body_name)); - count++; - } - - proj_crs_info_list_destroy(crs_list); - - state.current_idx += count; - output.SetCardinality(count); -} - -void GenerateSpatialRefSysTable::Register(DatabaseInstance &db) { - TableFunction func("ST_List_Proj_CRS", {}, Execute, Bind, Init); - ExtensionUtil::RegisterFunction(db, func); - - // Also create a view - /* - auto view = make_uniq(); - view->schema = DEFAULT_SCHEMA; - view->view_name = "SPATIAL_REF_SYS"; - view->sql = "SELECT * FROM st_list_proj_crs()"; // TODO: this is not SQL/MM compliant - view->temporary = true; - view->internal = true; - CreateViewInfo::FromSelect(context, std::move(view)); - catalog.CreateView(context, view.get()); - */ -} - -//------------------------------------------------------------------------------ -// Documentation -//------------------------------------------------------------------------------ - -static constexpr const char *DOC_DESCRIPTION = R"( -Transforms a geometry between two coordinate systems - -The source and target coordinate systems can be specified using any format that the [PROJ library](https://proj.org) supports. - -The third optional `always_xy` parameter can be used to force the input and output geometries to be interpreted as having a [easting, northing] coordinate axis order regardless of what the source and target coordinate system definition says. This is particularly useful when transforming to/from the [WGS84/EPSG:4326](https://en.wikipedia.org/wiki/World_Geodetic_System) coordinate system (what most people think of when they hear "longitude"/"latitude" or "GPS coordinates"), which is defined as having a [latitude, longitude] axis order even though [longitude, latitude] is commonly used in practice (e.g. in [GeoJSON](https://tools.ietf.org/html/rfc7946)). More details available in the [PROJ documentation](https://proj.org/en/9.3/faq.html#why-is-the-axis-ordering-in-proj-not-consistent). - -DuckDB spatial vendors its own static copy of the PROJ database of coordinate systems, so if you have your own installation of PROJ on your system the available coordinate systems may differ to what's available in other GIS software. -)"; - -static constexpr const char *DOC_EXAMPLE = R"( --- Transform a geometry from EPSG:4326 to EPSG:3857 (WGS84 to WebMercator) --- Note that since WGS84 is defined as having a [latitude, longitude] axis order --- we follow the standard and provide the input geometry using that axis order, --- but the output will be [easting, northing] because that is what's defined by --- WebMercator. - -SELECT ST_AsText( - ST_Transform( - st_point(52.373123, 4.892360), - 'EPSG:4326', - 'EPSG:3857' - ) -); ----- -POINT (544615.0239773799 6867874.103539125) - --- Alternatively, let's say we got our input point from e.g. a GeoJSON file, --- which uses WGS84 but with [longitude, latitude] axis order. We can use the --- `always_xy` parameter to force the input geometry to be interpreted as having --- a [northing, easting] axis order instead, even though the source coordinate --- reference system definition (WGS84) says otherwise. - -SELECT ST_AsText( - ST_Transform( - -- note the axis order is reversed here - st_point(4.892360, 52.373123), - 'EPSG:4326', - 'EPSG:3857', - always_xy := true - ) -); ----- -POINT (544615.0239773799 6867874.103539125) -)"; - -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "conversion"}}; - -//------------------------------------------------------------------------------ -// Register Functions -//------------------------------------------------------------------------------ -void ProjFunctions::Register(DatabaseInstance &db) { - ScalarFunctionSet set("ST_Transform"); - - using namespace spatial::core; - - set.AddFunction(ScalarFunction({GeoTypes::BOX_2D(), LogicalType::VARCHAR, LogicalType::VARCHAR}, GeoTypes::BOX_2D(), - Box2DTransformFunction, TransformBind, nullptr, nullptr, - ProjFunctionLocalState::Init)); - set.AddFunction(ScalarFunction( - {GeoTypes::BOX_2D(), LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::BOOLEAN}, GeoTypes::BOX_2D(), - Box2DTransformFunction, TransformBind, nullptr, nullptr, ProjFunctionLocalState::Init)); - - set.AddFunction(ScalarFunction({GeoTypes::POINT_2D(), LogicalType::VARCHAR, LogicalType::VARCHAR}, - GeoTypes::POINT_2D(), Point2DTransformFunction, TransformBind, nullptr, nullptr, - ProjFunctionLocalState::Init)); - set.AddFunction(ScalarFunction( - {GeoTypes::POINT_2D(), LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::BOOLEAN}, GeoTypes::POINT_2D(), - Point2DTransformFunction, TransformBind, nullptr, nullptr, ProjFunctionLocalState::Init)); - - set.AddFunction(ScalarFunction({GeoTypes::GEOMETRY(), LogicalType::VARCHAR, LogicalType::VARCHAR}, - GeoTypes::GEOMETRY(), GeometryTransformFunction, TransformBind, nullptr, nullptr, - ProjFunctionLocalState::Init)); - set.AddFunction(ScalarFunction( - {GeoTypes::GEOMETRY(), LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::BOOLEAN}, GeoTypes::GEOMETRY(), - GeometryTransformFunction, TransformBind, nullptr, nullptr, ProjFunctionLocalState::Init)); - - ExtensionUtil::RegisterFunction(db, set); - DocUtil::AddDocumentation(db, "ST_Transform", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS, - {"geom", "source_crs", "target_crs", "always_xy"}); - GenerateSpatialRefSysTable::Register(db); -} - -} // namespace proj - -} // namespace spatial diff --git a/spatial/src/spatial/proj/module.cpp b/spatial/src/spatial/proj/module.cpp deleted file mode 100644 index ff985e91..00000000 --- a/spatial/src/spatial/proj/module.cpp +++ /dev/null @@ -1,82 +0,0 @@ -#include "spatial/common.hpp" - -#include "spatial/proj/module.hpp" -#include "spatial/proj/functions.hpp" - -#include "proj.h" -#include "sqlite3.h" - -namespace spatial { - -namespace proj { - -// We embed the whole proj.db in the proj_db.c file, which we then link into the extension binary -// We can then use the sqlite3 "memvfs" (which we also statically link to) to point to the proj.db database in memory -// To genereate the proj_db.c file, we use the following command: -// `xxd -i proj.db > proj_db.c` -// Then rename the array to proj_db and the length to proj_db_len if necessary -// We link these from the proj_db.c file externally instead of #include:ing so our IDE doesnt go haywire -extern "C" unsigned char proj_db[]; -extern "C" unsigned int proj_db_len; -extern "C" int sqlite3_memvfs_init(sqlite3 *, char **, const sqlite3_api_routines *); - -PJ_CONTEXT *ProjModule::GetThreadProjContext() { - - auto ctx = proj_context_create(); - - // We set the default context proj.db path to the one in the binary here - // Otherwise GDAL will try to load the proj.db from the system - // Any PJ_CONTEXT we create after this will inherit these settings - auto path = StringUtil::Format("file:/proj.db?ptr=%llu&sz=%lu&max=%lu", (void *)proj_db, proj_db_len, proj_db_len); - - proj_context_set_sqlite3_vfs_name(ctx, "memvfs"); - auto ok = proj_context_set_database_path(ctx, path.c_str(), nullptr, nullptr); - if (!ok) { - throw InternalException("Could not set proj.db path"); - } - - // Dont log errors to stderr - proj_log_level(ctx, PJ_LOG_NONE); - - // Dont allow network - proj_context_set_enable_network(ctx, false); - - return ctx; -} - -// TODO: ignore memvfs, load into :memory: at runtime instead...? - -// IMPORTANT: Make sure this module is loaded before any other modules that use proj (like GDAL) -void ProjModule::Register(DatabaseInstance &db) { - // we use the sqlite "memvfs" to store the proj.db database in the extension binary itself - // this way we don't have to worry about the user having the proj.db database installed - // on their system. We therefore have to tell proj to use memvfs as the sqlite3 vfs and - // point it to the segment of the binary that contains the proj.db database - - sqlite3_initialize(); - sqlite3_memvfs_init(nullptr, nullptr, nullptr); - auto vfs = sqlite3_vfs_find("memvfs"); - if (!vfs) { - throw InternalException("Could not find sqlite memvfs extension"); - } - sqlite3_vfs_register(vfs, 0); - - // We set the default context proj.db path to the one in the binary here - // Otherwise GDAL will try to load the proj.db from the system - // Any PJ_CONTEXT we create after this will inherit these settings (on this thread?) - auto path = StringUtil::Format("file:/proj.db?ptr=%llu&sz=%lu&max=%lu", (void *)proj_db, proj_db_len, proj_db_len); - - proj_context_set_sqlite3_vfs_name(nullptr, "memvfs"); - - auto ok = proj_context_set_database_path(nullptr, path.c_str(), nullptr, nullptr); - if (!ok) { - throw InternalException("Could not set proj.db path"); - } - - // Register functions - ProjFunctions::Register(db); -} - -} // namespace proj - -} // namespace spatial diff --git a/spatial/src/spatial_extension.cpp b/spatial/src/spatial_extension.cpp deleted file mode 100644 index f5839843..00000000 --- a/spatial/src/spatial_extension.cpp +++ /dev/null @@ -1,126 +0,0 @@ -#define DUCKDB_EXTENSION_MAIN - -#include "spatial_extension.hpp" -#include "duckdb.hpp" -#include "duckdb/common/exception.hpp" -#include "duckdb/function/scalar_function.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/parser/parsed_data/create_view_info.hpp" - -#include "spatial/core/module.hpp" -#include "spatial/gdal/module.hpp" -#include "spatial/geos/module.hpp" -#include "spatial/proj/module.hpp" -#include "spatial/geographiclib/module.hpp" - -#include "spatial/doc_util.hpp" -#include "duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp" - -static string RemoveIndentAndTrailingWhitespace(const char *text) { - string result; - // Skip any empty first newlines if present - while (*text == '\n') { - text++; - } - - // Track indent length - auto indent_start = text; - while (isspace(*text) && *text != '\n') { - text++; - } - auto indent_len = text - indent_start; - while (*text) { - result += *text; - if (*text++ == '\n') { - // Remove all indentation, but only if it matches the first line's indentation - bool matched_indent = true; - for (auto i = 0; i < indent_len; i++) { - if (*text != indent_start[i]) { - matched_indent = false; - break; - } - } - if (matched_indent) { - text += indent_len; - } - } - } - - // Also remove any trailing whitespace - result.erase(result.find_last_not_of(" \n\r\t") + 1); - return result; -} - -void spatial::DocUtil::AddDocumentation(duckdb::DatabaseInstance &db, const char *function_name, - const char *description, const char *example, - const duckdb::unordered_map &tags, - duckdb::vector parameter_names) { - - auto &system_catalog = Catalog::GetSystemCatalog(db); - auto data = CatalogTransaction::GetSystemTransaction(db); - auto &schema = system_catalog.GetSchema(data, DEFAULT_SCHEMA); - auto catalog_entry = schema.GetEntry(data, CatalogType::SCALAR_FUNCTION_ENTRY, function_name); - if (!catalog_entry) { - // Try get a aggregate function - catalog_entry = schema.GetEntry(data, CatalogType::AGGREGATE_FUNCTION_ENTRY, function_name); - if (!catalog_entry) { - // Try get a table function - catalog_entry = schema.GetEntry(data, CatalogType::TABLE_FUNCTION_ENTRY, function_name); - if (!catalog_entry) { - throw duckdb::InvalidInputException("Function with name \"%s\" not found in DocUtil::AddDocumentation", - function_name); - } - } - } - - auto &func_entry = catalog_entry->Cast(); - FunctionDescription func_description; - if (description != nullptr) { - func_description.description = RemoveIndentAndTrailingWhitespace(description); - } - if (example != nullptr) { - func_description.examples.push_back(RemoveIndentAndTrailingWhitespace(example)); - } - if (!parameter_names.empty()) { - func_description.parameter_names = std::move(parameter_names); - } - func_entry.descriptions.push_back(std::move(func_description)); - if (!tags.empty()) { - func_entry.tags = tags; - } -} - -namespace duckdb { - -static void LoadInternal(DatabaseInstance &instance) { - spatial::core::CoreModule::Register(instance); - spatial::proj::ProjModule::Register(instance); - spatial::gdal::GdalModule::Register(instance); - spatial::geos::GeosModule::Register(instance); - spatial::geographiclib::GeographicLibModule::Register(instance); -} - -void SpatialExtension::Load(DuckDB &db) { - LoadInternal(*db.instance); -} - -std::string SpatialExtension::Name() { - return "spatial"; -} - -} // namespace duckdb - -extern "C" { - -DUCKDB_EXTENSION_API void spatial_init(duckdb::DatabaseInstance &db) { - LoadInternal(db); -} - -DUCKDB_EXTENSION_API const char *spatial_version() { - return duckdb::DuckDB::LibraryVersion(); -} -} - -#ifndef DUCKDB_EXTENSION_MAIN -#error DUCKDB_EXTENSION_MAIN not defined -#endif diff --git a/src/sgl/CMakeLists.txt b/src/sgl/CMakeLists.txt new file mode 100644 index 00000000..d9c5d116 --- /dev/null +++ b/src/sgl/CMakeLists.txt @@ -0,0 +1,4 @@ +set(EXTENSION_SOURCES + ${EXTENSION_SOURCES} + ${CMAKE_CURRENT_SOURCE_DIR}/sgl.cpp + PARENT_SCOPE) diff --git a/src/sgl/sgl.cpp b/src/sgl/sgl.cpp new file mode 100644 index 00000000..35cdd47a --- /dev/null +++ b/src/sgl/sgl.cpp @@ -0,0 +1,1919 @@ +// TODO: Dont depend on duckdb +#include "duckdb/common/assert.hpp" +#define SGL_ASSERT(condition) D_ASSERT(condition) + +#include "sgl/sgl.hpp" + +#include +#include + +namespace sgl { + +namespace ops { + +static uint8_t *resize_vertices(allocator &alloc, geometry *geom, bool set_z, bool set_m, double default_z, + double default_m) { + + const auto has_z = geom->has_z(); + const auto has_m = geom->has_m(); + + const auto source_type = static_cast(has_z + 2 * has_m); + const auto target_type = static_cast(set_z + 2 * set_m); + + const auto source_data = geom->get_vertex_data(); + const auto count = geom->get_count(); + + if (source_type == target_type) { + return source_data; + } + + switch (source_type) { + case vertex_type::XY: { + constexpr auto source_size = sizeof(double) * 2; + switch (target_type) { + case vertex_type::XY: { + // Do nothing + return source_data; + } + case vertex_type::XYZ: { + constexpr auto target_size = sizeof(double) * 3; + const auto target_data = static_cast(alloc.alloc(count * target_size)); + + for (size_t i = 0; i < count; i++) { + const auto source_offset = i * source_size; + const auto target_offset = i * target_size; + memcpy(target_data + target_offset, source_data + source_offset, source_size); + memcpy(target_data + target_offset + source_size, &default_z, sizeof(double)); + } + + return target_data; + } + case vertex_type::XYM: { + constexpr auto target_size = sizeof(double) * 3; + const auto target_data = static_cast(alloc.alloc(count * target_size)); + + for (size_t i = 0; i < count; i++) { + const auto source_offset = i * source_size; + const auto target_offset = i * target_size; + memcpy(target_data + target_offset, source_data + source_offset, source_size); + memcpy(target_data + target_offset + source_size, &default_m, sizeof(double)); + } + + return target_data; + } + case vertex_type::XYZM: { + constexpr auto target_size = sizeof(double) * 4; + const auto target_data = static_cast(alloc.alloc(count * target_size)); + + for (size_t i = 0; i < count; i++) { + const auto source_offset = i * source_size; + const auto target_offset = i * target_size; + memcpy(target_data + target_offset, source_data + source_offset, source_size); + memcpy(target_data + target_offset + source_size, &default_z, sizeof(double)); + memcpy(target_data + target_offset + source_size + sizeof(double), &default_m, sizeof(double)); + } + + return target_data; + } + default: + SGL_ASSERT(false); + return nullptr; + } + } + case vertex_type::XYZ: { + constexpr auto source_size = sizeof(double) * 3; + switch (target_type) { + case vertex_type::XY: { + constexpr auto target_size = sizeof(double) * 2; + const auto target_data = static_cast(alloc.alloc(count * target_size)); + + for (size_t i = 0; i < count; i++) { + const auto source_offset = i * source_size; + const auto target_offset = i * target_size; + memcpy(target_data + target_offset, source_data + source_offset, target_size); + } + + return target_data; + } + case vertex_type::XYZ: { + // Do nothing + return source_data; + } + case vertex_type::XYM: { + constexpr auto target_size = sizeof(double) * 3; + const auto target_data = static_cast(alloc.alloc(count * target_size)); + + for (size_t i = 0; i < count; i++) { + const auto source_offset = i * source_size; + const auto target_offset = i * target_size; + memcpy(target_data + target_offset, source_data + source_offset, target_size); + memcpy(target_data + target_offset + sizeof(double) * 2, &default_m, sizeof(double)); + } + + return target_data; + } + case vertex_type::XYZM: { + constexpr auto target_size = sizeof(double) * 4; + const auto target_data = static_cast(alloc.alloc(count * target_size)); + + for (size_t i = 0; i < count; i++) { + const auto source_offset = i * source_size; + const auto target_offset = i * target_size; + memcpy(target_data + target_offset, source_data + source_offset, target_size); + memcpy(target_data + target_offset + sizeof(double) * 3, &default_m, sizeof(double)); + } + + return target_data; + } + default: + SGL_ASSERT(false); + return nullptr; + } + } + case vertex_type::XYM: { + constexpr auto source_size = sizeof(double) * 3; + switch (target_type) { + case vertex_type::XY: { + constexpr auto target_size = sizeof(double) * 2; + const auto target_data = static_cast(alloc.alloc(count * target_size)); + + for (size_t i = 0; i < count; i++) { + const auto source_offset = i * source_size; + const auto target_offset = i * target_size; + memcpy(target_data + target_offset, source_data + source_offset, target_size); + } + + return target_data; + } + case vertex_type::XYZ: { + constexpr auto target_size = sizeof(double) * 3; + const auto target_data = static_cast(alloc.alloc(count * target_size)); + + for (size_t i = 0; i < count; i++) { + const auto source_offset = i * source_size; + const auto target_offset = i * target_size; + memcpy(target_data + target_offset, source_data + source_offset, sizeof(double) * 2); + memcpy(target_data + target_offset + sizeof(double) * 2, &default_z, sizeof(double)); + } + + return target_data; + } + case vertex_type::XYM: { + // Do nothing + return source_data; + } + case vertex_type::XYZM: { + constexpr auto target_size = sizeof(double) * 4; + const auto target_data = static_cast(alloc.alloc(count * target_size)); + + for (size_t i = 0; i < count; i++) { + const auto source_offset = i * source_size; + const auto target_offset = i * target_size; + memcpy(target_data + target_offset, source_data + source_offset, sizeof(double) * 2); + memcpy(target_data + target_offset + sizeof(double) * 2, &default_z, sizeof(double)); + memcpy(target_data + target_offset + sizeof(double) * 3, + source_data + source_offset + sizeof(double) * 2, sizeof(double)); + } + + return target_data; + } + default: + SGL_ASSERT(false); + return nullptr; + } + } + case vertex_type::XYZM: { + constexpr auto source_size = sizeof(double) * 4; + switch (target_type) { + case vertex_type::XY: { + constexpr auto target_size = sizeof(double) * 2; + const auto target_data = static_cast(alloc.alloc(count * target_size)); + + for (size_t i = 0; i < count; i++) { + const auto source_offset = i * source_size; + const auto target_offset = i * target_size; + memcpy(target_data + target_offset, source_data + source_offset, sizeof(double) * 2); + } + + return target_data; + } + case vertex_type::XYZ: { + constexpr auto target_size = sizeof(double) * 3; + const auto target_data = static_cast(alloc.alloc(count * target_size)); + + for (size_t i = 0; i < count; i++) { + const auto source_offset = i * source_size; + const auto target_offset = i * target_size; + memcpy(target_data + target_offset, source_data + source_offset, sizeof(double) * 3); + } + + return target_data; + } + case vertex_type::XYM: { + constexpr auto target_size = sizeof(double) * 3; + const auto target_data = static_cast(alloc.alloc(count * target_size)); + + for (size_t i = 0; i < count; i++) { + const auto source_offset = i * source_size; + const auto target_offset = i * target_size; + memcpy(target_data + target_offset, source_data + source_offset, sizeof(double) * 2); + memcpy(target_data + target_offset + sizeof(double) * 2, + source_data + source_offset + sizeof(double) * 3, sizeof(double)); + } + + return target_data; + } + case vertex_type::XYZM: { + // Do nothing + return source_data; + } + default: + SGL_ASSERT(false); + return nullptr; + } + } + default: + SGL_ASSERT(false); + return nullptr; + } +} + +void force_zm(allocator &alloc, geometry *geom, bool set_z, bool set_m, double default_z, double default_m) { + + auto part = geom; + if (part == nullptr) { + return; + } + const auto root = part->get_parent(); + + while (part != root) { + + switch (part->get_type()) { + case geometry_type::POINT: + case geometry_type::LINESTRING: { + // Convert the vertices + const auto target_data = resize_vertices(alloc, part, set_z, set_m, default_z, default_m); + part->set_vertex_data(target_data, part->get_count()); + part->set_z(set_z); + part->set_m(set_m); + } break; + case geometry_type::POLYGON: + case geometry_type::MULTI_POINT: + case geometry_type::MULTI_LINESTRING: + case geometry_type::MULTI_POLYGON: + case geometry_type::MULTI_GEOMETRY: { + part->set_z(set_z); + part->set_m(set_m); + if (!part->is_empty()) { + part = part->get_first_part(); + } + } break; + default: + SGL_ASSERT(false); + break; + } + + // Now go up/sideways + while (part != nullptr) { + const auto parent = part->get_parent(); + if (parent == root) { + return; + } + + if (part != parent->get_last_part()) { + // Go sideways + part = part->get_next(); + break; + } + + // Go up + part = parent; + } + } +} + +//------------------------------------------------------------------------------ +// WKB Parsing +//------------------------------------------------------------------------------ +static void wkb_reader_skip(wkb_reader *state, size_t size) { + if (state->pos + size > state->end) { + state->error = SGL_WKB_READER_OUT_OF_BOUNDS; + return; + } + state->pos += size; +} + +static uint8_t wkb_reader_read_u8(wkb_reader *state) { + if (state->pos + sizeof(uint8_t) > state->end) { + state->error = SGL_WKB_READER_OUT_OF_BOUNDS; + return 0; + } + + const auto val = *state->pos; + state->pos += sizeof(uint8_t); + return val; +} + +static uint32_t wkb_reader_read_u32(wkb_reader *state) { + if (state->pos + sizeof(uint32_t) > state->end) { + state->error = SGL_WKB_READER_OUT_OF_BOUNDS; + return 0; + } + + uint32_t val; + + if (state->le) { + memcpy(&val, state->pos, sizeof(uint32_t)); + } else { + char ibuf[sizeof(uint32_t)]; + char obuf[sizeof(uint32_t)]; + memcpy(ibuf, state->pos, sizeof(uint32_t)); + for (size_t i = 0; i < sizeof(uint32_t); i++) { + obuf[i] = ibuf[sizeof(uint32_t) - i - 1]; + } + memcpy(&val, obuf, sizeof(uint32_t)); + } + + state->pos += sizeof(uint32_t); + + return val; +} + +static double wkb_reader_read_f64(wkb_reader *state) { + if (state->pos + sizeof(double) > state->end) { + state->error = SGL_WKB_READER_OUT_OF_BOUNDS; + return 0; + } + + double val; + + if (state->le) { + memcpy(&val, state->pos, sizeof(double)); + } else { + char ibuf[sizeof(double)]; + char obuf[sizeof(double)]; + memcpy(ibuf, state->pos, sizeof(double)); + for (size_t i = 0; i < sizeof(double); i++) { + obuf[i] = ibuf[sizeof(double) - i - 1]; + } + memcpy(&val, obuf, sizeof(double)); + } + + state->pos += sizeof(double); + + return val; +} + +static bool wkb_reader_read_point(wkb_reader *state, geometry *geom) { + const size_t dims = 2 + geom->has_z() + geom->has_m(); + + bool all_nan = true; + double coords[4]; + + const auto ptr = state->pos; + for (size_t i = 0; i < dims; i++) { + coords[i] = wkb_reader_read_f64(state); + if (state->error) { + return false; + } + if (!std::isnan(coords[i])) { + all_nan = false; + } + } + + if (state->nan_as_empty && all_nan) { + geom->set_vertex_data(static_cast(nullptr), 0); + return true; + } + if (state->le && !state->copy_vertices) { + geom->set_vertex_data(ptr, 1); + return true; + } + + const auto data = static_cast(state->alloc->alloc(sizeof(double) * dims)); + memcpy(data, coords, sizeof(double) * dims); + geom->set_vertex_data(data, 1); + return true; +} + +static bool wkb_reader_read_line(wkb_reader *state, geometry *geom) { + const auto vertex_count = wkb_reader_read_u32(state); + if (state->error) { + return false; + } + + const auto vertex_size = geom->get_vertex_size(); + const auto byte_size = vertex_count * vertex_size; + + if (state->pos + byte_size > state->end) { + state->error = SGL_WKB_READER_OUT_OF_BOUNDS; + return false; + } + + const auto ptr = state->pos; + state->pos += byte_size; + + // If this is LE encoded, and we dont want to copy the vertices, we can just return the pointer + if (state->le) { + if (state->copy_vertices) { + const auto mem = static_cast(state->alloc->alloc(byte_size)); + memcpy(mem, ptr, byte_size); + geom->set_vertex_data(mem, vertex_count); + } else { + geom->set_vertex_data(ptr, vertex_count); + } + } else { + // Otherwise, we need to allocate and swap the bytes + const auto mem = static_cast(state->alloc->alloc(byte_size)); + for (size_t i = 0; i < vertex_count; i++) { + const auto src = ptr + i * vertex_size; + const auto dst = mem + i * vertex_size; + + // Swap doubles within the vertex + for (size_t j = 0; j < vertex_size; j += sizeof(double)) { + for (size_t k = 0; k < sizeof(double); k++) { + dst[j + k] = src[j + sizeof(double) - k - 1]; + } + } + } + + geom->set_vertex_data(mem, vertex_count); + } + return true; +} + +// TODO: Also collect stats? +bool wkb_reader_try_parse(wkb_reader *state, geometry *out) { + +// clang-format off +#define read_u8(state) wkb_reader_read_u8(state); if (state->error) { return false; } +#define read_u32(state) wkb_reader_read_u32(state); if (state->error) { return false; } +#define read_f64(state) wkb_reader_read_f64(state); if (state->error) { return false; } +#define read_verts(state, vcount, vsize) wkb_reader_read_vertices(state, vcount, vsize); if (state->error) { return false; } +#define read_skip(state, size) wkb_reader_skip(state, size); if (state->error) { return false; } + // clang-format on + + SGL_ASSERT(state); + SGL_ASSERT(out); + SGL_ASSERT(state->buf); + SGL_ASSERT(state->end); + SGL_ASSERT(state->alloc); + SGL_ASSERT(state->stack_buf); + SGL_ASSERT(state->stack_cap > 0); + + // Setup state + state->pos = state->buf; + state->error = SGL_WKB_READER_OK; + state->depth = 0; + state->le = false; + state->type_id = 0; + state->has_any_m = false; + state->has_any_z = false; + + geometry *geom = out; + + while (true) { + state->le = read_u8(state); + state->type_id = read_u32(state); + + const auto type = static_cast((state->type_id & 0xffff) % 1000); + const auto flags = (state->type_id & 0xffff) / 1000; + const auto has_z = (flags == 1) || (flags == 3) || ((state->type_id & 0x80000000) != 0); + const auto has_m = (flags == 2) || (flags == 3) || ((state->type_id & 0x40000000) != 0); + const auto has_srid = (state->type_id & 0x20000000) != 0; + + if (has_srid) { + // skip the SRID + const auto srid = read_u32(state); + (void)srid; + } + + geom->set_type(type); + geom->set_z(has_z); + geom->set_m(has_m); + + // Compare with root + if (!state->has_mixed_zm && (out->has_m() != has_m || out->has_z() != has_z)) { + state->has_any_z |= has_z; + state->has_any_m |= has_m; + state->has_mixed_zm = true; + if (!state->allow_mixed_zm) { + // Error out! + state->error = SGL_WKB_READER_MIXED_ZM; + return false; + } + } + + switch (geom->get_type()) { + case geometry_type::POINT: { + // Read the point data + if (!wkb_reader_read_point(state, geom)) { + return false; + } + } break; + case geometry_type::LINESTRING: { + if (!wkb_reader_read_line(state, geom)) { + return false; + } + } break; + case geometry_type::POLYGON: { + // Read the ring count + const auto ring_count = read_u32(state); + + // Read the point data; + for (size_t i = 0; i < ring_count; i++) { + const auto ring = static_cast(state->alloc->alloc(sizeof(geometry))); + new (ring) geometry(geometry_type::LINESTRING, has_z, has_m); + if (!wkb_reader_read_line(state, ring)) { + return false; + } + geom->append_part(ring); + } + } break; + case geometry_type::MULTI_POINT: + case geometry_type::MULTI_LINESTRING: + case geometry_type::MULTI_POLYGON: + case geometry_type::MULTI_GEOMETRY: { + + // Check stack depth + if (state->depth >= state->stack_cap) { + state->error = SGL_WKB_READER_RECURSION_LIMIT; + return false; + } + + // Read the count + const auto count = read_u32(state); + if (count == 0) { + break; + } + + state->stack_buf[state->depth++] = count; + + // Make a new child + auto part = static_cast(state->alloc->alloc(sizeof(geometry))); + new (part) geometry(geometry_type::INVALID, has_z, has_m); + geom->append_part(part); + + // Set the new child as the current geometry + geom = part; + + // Continue to the next iteration in the outer loop + continue; + } + default: + state->error = SGL_WKB_READER_UNSUPPORTED_TYPE; + return false; + } + + // Inner loop + while (true) { + const auto parent = geom->get_parent(); + + if (state->depth == 0) { + SGL_ASSERT(parent == nullptr); + // Done! + return true; + } + + SGL_ASSERT(parent != nullptr); + + // Check that we are of the right type + const auto ptype = parent->get_type(); + const auto ctype = geom->get_type(); + + if (ptype == geometry_type::MULTI_POINT && ctype != geometry_type::POINT) { + state->error = SGL_WKB_INVALID_CHILD_TYPE; + return false; + } + if (ptype == geometry_type::MULTI_LINESTRING && ctype != geometry_type::LINESTRING) { + state->error = SGL_WKB_INVALID_CHILD_TYPE; + return false; + } + if (ptype == geometry_type::MULTI_POLYGON && ctype != geometry_type::POLYGON) { + state->error = SGL_WKB_INVALID_CHILD_TYPE; + return false; + } + + // Check if we are done with the current part + state->stack_buf[state->depth - 1]--; + + if (state->stack_buf[state->depth - 1] > 0) { + // There are still more parts to read + // Create a new part and append it to the parent + auto part = static_cast(state->alloc->alloc(sizeof(geometry))); + new (part) geometry(geometry_type::INVALID, has_z, has_m); + parent->append_part(part); + + // Go "sideways" to the new part + geom = part; + break; + } + + // Go upwards + geom = parent; + state->depth--; + } + } + +} + +bool wkb_reader_try_parse_stats(wkb_reader *state, box_xy *out_extent, size_t *out_vertex_count) { + + SGL_ASSERT(state); + SGL_ASSERT(state->buf); + SGL_ASSERT(state->end); + SGL_ASSERT(state->stack_buf); + SGL_ASSERT(state->stack_cap > 0); + + // Setup state + state->pos = state->buf; + state->error = SGL_WKB_READER_OK; + state->depth = 0; + state->le = false; + state->type_id = 0; + state->has_any_m = false; + state->has_any_z = false; + + uint32_t vertex_count = 0; + box_xy extent = box_xy::smallest(); + + while(true) { + state->le = read_u8(state); + state->type_id = read_u32(state); + + const auto type = static_cast((state->type_id & 0xffff) % 1000); + const auto flags = (state->type_id & 0xffff) / 1000; + const auto has_z = (flags == 1) || (flags == 3) || ((state->type_id & 0x80000000) != 0); + const auto has_m = (flags == 2) || (flags == 3) || ((state->type_id & 0x40000000) != 0); + const auto has_srid = (state->type_id & 0x20000000) != 0; + + if (has_srid) { + // skip the SRID + const auto srid = read_u32(state); + (void)srid; + } + + switch (type) { + case geometry_type::POINT: { + bool all_nan = true; + const auto x = read_f64(state); + all_nan = all_nan && std::isnan(x); + const auto y = read_f64(state); + all_nan = all_nan && std::isnan(y); + if (has_z) { + const auto z = read_f64(state); + all_nan = all_nan && std::isnan(z); + } + if (has_m) { + const auto m = read_f64(state); + all_nan = all_nan && std::isnan(m); + } + // For points, all NaN is usually interpreted as an empty point + if(state->nan_as_empty && all_nan) { + break; + } + extent.min.x = std::min(extent.min.x, x); + extent.min.y = std::min(extent.min.y, y); + extent.max.x = std::max(extent.max.x, x); + extent.max.y = std::max(extent.max.y, y); + vertex_count++; + } break; + case geometry_type::LINESTRING: { + const auto num_points = read_u32(state); + for(uint32_t i = 0; i < num_points; i++) { + const auto x = read_f64(state); + const auto y = read_f64(state); + if(has_z) { + read_skip(state, sizeof(double)); + } + if(has_m) { + read_skip(state, sizeof(double)); + } + extent.min.x = std::min(extent.min.x, x); + extent.min.y = std::min(extent.min.y, y); + extent.max.x = std::max(extent.max.x, x); + extent.max.y = std::max(extent.max.y, y); + } + vertex_count += num_points; + } break; + case geometry_type::POLYGON: { + const auto num_rings = read_u32(state); + for(uint32_t i = 0; i < num_rings; i++) { + const auto num_points = read_u32(state); + for(uint32_t j = 0; j < num_points; j++) { + const auto x = read_f64(state); + const auto y = read_f64(state); + if(has_z) { + read_skip(state, sizeof(double)); + } + if(has_m) { + read_skip(state, sizeof(double)); + } + extent.min.x = std::min(extent.min.x, x); + extent.min.y = std::min(extent.min.y, y); + extent.max.x = std::max(extent.max.x, x); + extent.max.y = std::max(extent.max.y, y); + } + vertex_count += num_points; + } + } break; + case geometry_type::MULTI_POINT: + case geometry_type::MULTI_LINESTRING: + case geometry_type::MULTI_POLYGON: + case geometry_type::MULTI_GEOMETRY: { + // Check stack depth + if (state->depth >= state->stack_cap) { + state->error = SGL_WKB_READER_RECURSION_LIMIT; + return false; + } + + // Read the count + const auto count = read_u32(state); + if (count == 0) { + break; + } + + // Push the count to the stack, go downwards and continue + state->stack_buf[state->depth++] = count; + continue; + } + default: + state->error = SGL_WKB_READER_UNSUPPORTED_TYPE; + return false; + } + + while(true) { + if (state->depth == 0) { + // We reached the bottom, return! + if (out_vertex_count) { + *out_vertex_count = vertex_count; + } + if (out_extent) { + *out_extent = extent; + } + return true; + } + + // Decrement current remaining count + state->stack_buf[state->depth - 1]--; + + // Are there still more parts to read, then break out and continue + if (state->stack_buf[state->depth - 1] > 0) { + break; + } + + // Otherwise, go upwards + state->depth--; + } + } + +#undef read_u8 +#undef read_u32 +#undef read_u64 +#undef read_verts +#undef read_skip +} + +std::string wkb_reader_get_error_message(const wkb_reader *state) { + if (!state || state->error == SGL_WKB_READER_OK) { + return ""; + } + switch (state->error) { + case SGL_WKB_READER_OUT_OF_BOUNDS: { + return "Out of bounds read (is the WKB corrupt?)"; + } + case SGL_WKB_READER_MIXED_ZM: { + return "Mixed Z and M values are not allowed"; + } + case SGL_WKB_READER_RECURSION_LIMIT: { + return "Recursion limit '" + std::to_string(state->stack_cap) + "' reached"; + } + case SGL_WKB_READER_UNSUPPORTED_TYPE: { + // Try to fish out the type anyway + const auto type = ((state->type_id & 0xffff) % 1000); + const auto flags = (state->type_id & 0xffff) / 1000; + const auto has_z = (flags == 1) || (flags == 3) || ((state->type_id & 0x80000000) != 0); + const auto has_m = (flags == 2) || (flags == 3) || ((state->type_id & 0x40000000) != 0); + const auto has_srid = (state->type_id & 0x20000000) != 0; + + auto guessed_type = "UNKNOWN"; + switch (type) { + case 1: guessed_type = "POINT"; break; + case 2: guessed_type = "LINESTRING"; break; + case 3: guessed_type = "POLYGON"; break; + case 4: guessed_type = "MULTIPOINT"; break; + case 5: guessed_type = "MULTILINESTRING"; break; + case 6: guessed_type = "MULTIPOLYGON"; break; + case 7: guessed_type = "GEOMETRYCOLLECTION"; break; + case 8: guessed_type = "CIRCULARSTRING"; break; + case 9: guessed_type = "COMPOUNDCURVE"; break; + case 10: guessed_type = "CURVEPOLYGON"; break; + case 11: guessed_type = "MULTICURVE"; break; + case 12: guessed_type = "MULTISURFACE"; break; + case 13: guessed_type = "CURVE"; break; + case 14: guessed_type = "SURFACE"; break; + case 15: guessed_type = "POLYHEDRALSURFACE"; break; + case 16: guessed_type = "TIN"; break; + case 17: guessed_type = "TRIANGLE"; break; + case 18: guessed_type = "CIRCLE"; break; + case 19: guessed_type = "GEODESICSTRING"; break; + case 20: guessed_type = "ELLIPTICALCURVE"; break; + case 21: guessed_type = "NURBSCURVE"; break; + case 22: guessed_type = "CLOTHOID"; break; + case 23: guessed_type = "SPIRALCURVE"; break; + case 24: guessed_type = "COMPOUNDSURFACE"; break; + case 25: guessed_type = "ORIENTABLESURFACE"; break; + case 102: guessed_type = "AFFINEPLACEMENT"; break; + default: + break; + } + + std::string msg = "WKB type '"; + msg += guessed_type; + if(has_z || has_m) { + msg += " "; + } + if(has_z) { + msg += "Z"; + } + if(has_m) { + msg += "M"; + } + msg += "' is not supported!"; + msg += " (type id: " + std::to_string(state->type_id) + ")"; + if(has_srid) { + msg += " (SRID present)"; + } + return msg; + } + + case SGL_WKB_INVALID_CHILD_TYPE: { + return "Invalid child type"; + } + default: { + return "Unknown error"; + } + } +} + +size_t to_wkb_size(const geometry *geom) { + if (!geom) { + return 0; + } + + const auto root = geom->get_parent(); + + size_t size = 0; + const geometry *curr = geom; + + // Main loop + while (true) { + switch (curr->get_type()) { + case geometry_type::POINT: { + size += 1 + 4 + curr->get_vertex_size(); + } break; + case geometry_type::LINESTRING: { + size += 1 + 4 + 4 + curr->get_count() * curr->get_vertex_size(); + } break; + case geometry_type::POLYGON: { + size += 1 + 4 + 4; + const auto tail = curr->get_last_part(); + auto head = tail; + if (head) { + do { + head = head->get_next(); + size += 4 + head->get_count() * head->get_vertex_size(); + } while (head != tail); + } + } break; + case geometry_type::MULTI_POINT: { + size += 1 + 4 + 4 + curr->get_count() * (1 + 4 + curr->get_vertex_size()); + } break; + case geometry_type::MULTI_LINESTRING: { + size += 1 + 4 + 4; + const auto tail = curr->get_last_part(); + auto head = tail; + if (head) { + do { + head = head->get_next(); + size += 1 + 4 + 4 + head->get_count() * head->get_vertex_size(); + } while (head != tail); + } + } break; + case geometry_type::MULTI_POLYGON: { + size += 1 + 4 + 4; + const auto tail = curr->get_last_part(); + auto head = tail; + if (head) { + do { + head = head->get_next(); + size += 1 + 4 + 4; + const auto rtail = head->get_last_part(); + auto rhead = rtail; + if (rhead) { + do { + rhead = rhead->get_next(); + size += 4 + rhead->get_count() * rhead->get_vertex_size(); + } while (rhead != rtail); + } + } while (head != tail); + } + } break; + case geometry_type::MULTI_GEOMETRY: { + size += 1 + 4 + 4; + if (!curr->is_empty()) { + curr = curr->get_first_part(); + continue; + } + } break; + default: { + SGL_ASSERT(false); + return 0; + } + } + + // Inner loop + while (true) { + const auto parent = curr->get_parent(); + if (parent == root) { + // Done! + return size; + } + + if (curr != parent->get_last_part()) { + // Go sideways + curr = curr->get_next(); + break; + } + + // Go upwards + curr = parent; + } + } +} + +// TODO: Make non-recursive +size_t to_wkb(const geometry *geom, uint8_t *buffer, size_t size) { + +#define WKB_WRITE_U8(PTR, VAL) \ + do { \ + uint8_t v = VAL; \ + memcpy(PTR, &v, sizeof(uint8_t)); \ + PTR += sizeof(uint8_t); \ + } while (0) +#define WKB_WRITE_U32(PTR, VAL) \ + do { \ + uint32_t v = VAL; \ + memcpy(PTR, &v, sizeof(uint32_t)); \ + PTR += sizeof(uint32_t); \ + } while (0) +#define WKB_WRITE_DOUBLE(PTR, VAL) \ + do { \ + double v = VAL; \ + memcpy(PTR, &v, sizeof(double)); \ + PTR += sizeof(double); \ + } while (0) +#define WKB_WRITE_DATA(PTR, SRC, SIZE) \ + do { \ + memcpy(PTR, SRC, SIZE); \ + PTR += SIZE; \ + } while (0) + + auto ptr = buffer; + + // Write header + const auto type_id = static_cast(geom->get_type()) + geom->has_z() * 1000 + geom->has_m() * 2000; + WKB_WRITE_U8(ptr, 1); + WKB_WRITE_U32(ptr, type_id); + + // Write the body + switch (geom->get_type()) { + case geometry_type::POINT: { + if (geom->is_empty()) { + // WKB does not support empty points, so we write NaNs instead + WKB_WRITE_DOUBLE(ptr, std::numeric_limits::quiet_NaN()); + WKB_WRITE_DOUBLE(ptr, std::numeric_limits::quiet_NaN()); + } else { + WKB_WRITE_DATA(ptr, geom->get_vertex_data(), geom->get_vertex_size()); + } + } break; + case geometry_type::LINESTRING: { + WKB_WRITE_U32(ptr, geom->get_count()); + WKB_WRITE_DATA(ptr, geom->get_vertex_data(), geom->get_vertex_size() * geom->get_count()); + } break; + case geometry_type::POLYGON: { + WKB_WRITE_U32(ptr, geom->get_count()); + const auto tail = geom->get_last_part(); + auto head = tail; + if (head) { + do { + head = head->get_next(); + WKB_WRITE_U32(ptr, head->get_count()); + WKB_WRITE_DATA(ptr, head->get_vertex_data(), head->get_vertex_size() * head->get_count()); + } while (head != tail); + } + } break; + case geometry_type::MULTI_POINT: + case geometry_type::MULTI_LINESTRING: + case geometry_type::MULTI_POLYGON: + case geometry_type::MULTI_GEOMETRY: { + WKB_WRITE_U32(ptr, geom->get_count()); + const auto tail = geom->get_last_part(); + auto head = tail; + if (head) { + do { + head = head->get_next(); + ptr += to_wkb(head, ptr, size - (ptr - buffer)); + } while (head != tail); + } + } break; + default: + SGL_ASSERT(false); + break; + } + + return ptr - buffer; + +#undef WKB_WRITE_U8 +#undef WKB_WRITE_U32 +#undef WKB_WRITE_DOUBLE +#undef WKB_WRITE_DATA +} + +//------------------------------------------------------------------------------ +// WKT Parsing +//------------------------------------------------------------------------------ + +static void parse_ws(wkt_reader *state) { + while (state->pos < state->end && std::isspace(*state->pos)) { + state->pos++; + } +} + +static bool match_token(wkt_reader *state, const char *token) { + // case insensitive match + auto ptr = state->pos; + while (ptr < state->end && *token != '\0' && std::tolower(*token) == std::tolower(*ptr)) { + token++; + ptr++; + } + + if (*token != '\0') { + return false; + } + + state->pos = ptr; + parse_ws(state); + return true; +} + +static bool match_char(wkt_reader *state, char c) { + if (state->pos < state->end && std::tolower(*state->pos) == std::tolower(c)) { + state->pos++; + parse_ws(state); + return true; + } + return false; +} + +static bool match_double(wkt_reader *state, double *result) { + // Because we care about the length, we cant just use std::strtod straight away without risking + // out-of-bounds reads. Instead, we will manually parse the number and then use std::strtod to + // convert the value. + + auto ptr = state->pos; + + // Match sign + if (ptr < state->end && (*ptr == '+' || *ptr == '-')) { + ptr++; + } + + // Match number part + while (ptr < state->end && std::isdigit(*ptr)) { + ptr++; + } + + // Match decimal part + if (ptr < state->end && *ptr == '.') { + ptr++; + while (ptr < state->end && std::isdigit(*ptr)) { + ptr++; + } + } + + // Match exponent part + if (ptr < state->end && (*ptr == 'e' || *ptr == 'E')) { + ptr++; + if (ptr < state->end && (*ptr == '+' || *ptr == '-')) { + ptr++; + } + + while (ptr < state->end && std::isdigit(*ptr)) { + ptr++; + } + } + + // Did we manage to parse anything? + if (ptr == state->pos) { + return false; + } + + // If we got here, we know there is something resembling a number within the bounds of the buffer + // We can now use std::strtod to actually parse the number + char *end; + *result = std::strtod(state->pos, &end); + if (state->pos == end) { + return false; + } + state->pos = end; + parse_ws(state); + return true; +} + +struct vertex_buffer { + allocator *alloc; + const uint32_t stride; + double *ptr; + uint32_t len; + uint32_t cap; + + vertex_buffer(allocator *alloc, uint32_t stride) : alloc(alloc), stride(stride), len(0), cap(1) { + ptr = static_cast(this->alloc->alloc(sizeof(double) * stride * cap)); + } + + void push_back(const double *data) { + if (len == cap) { + const auto new_cap = cap * 2; + const auto old_size = sizeof(double) * stride * cap; + const auto new_size = sizeof(double) * stride * new_cap; + + ptr = static_cast(alloc->realloc(ptr, old_size, new_size)); + cap = new_cap; + } + + memcpy(ptr + len * stride, data, sizeof(double) * stride); + len++; + } + + void assign(geometry *geom) { + + // Shrink to fit + if (cap > len) { + const auto old_size = sizeof(double) * stride * cap; + const auto new_size = sizeof(double) * stride * len; + ptr = static_cast(alloc->realloc(ptr, old_size, new_size)); + } + + geom->set_vertex_data(reinterpret_cast(ptr), len); + } +}; + +// TODO: break this up into smaller functions, unify result/state +bool wkt_reader_try_parse(wkt_reader *state, geometry *out) { + + SGL_ASSERT(state != nullptr); + SGL_ASSERT(out != nullptr); + + // These need to be set by the caller + SGL_ASSERT(state->alloc != nullptr); + SGL_ASSERT(state->buf != nullptr); + SGL_ASSERT(state->end != nullptr); + + // Setup state + state->pos = state->buf; + state->error = nullptr; + + allocator *alloc = state->alloc; + + geometry *root = out; + geometry *geom = root; + + // clang-format off +#define expect_char(STATE, C) do { if(!match_char(STATE, C)) { (STATE)->error = "Expected character: '" #C "'"; return false; } } while(0) +#define expect_number(STATE, RESULT) do { if(!match_double(STATE, RESULT)) { (STATE)->error = "Expected number"; return false; } } while(0) + // clang-format on + + // Skip whitespace + parse_ws(state); + + // Skip leading SRID, we dont support it + // TODO: Parse this and stuff it into the result + if (match_token(state, "SRID")) { + + while (state->pos < state->end && *state->pos != ';') { + state->pos++; + } + expect_char(state, ';'); + } + + // Main loop + while (true) { + // Now we should have a geometry type + if (match_token(state, "POINT")) { + geom->set_type(geometry_type::POINT); + } else if (match_token(state, "LINESTRING")) { + geom->set_type(geometry_type::LINESTRING); + } else if (match_token(state, "POLYGON")) { + geom->set_type(geometry_type::POLYGON); + } else if (match_token(state, "MULTIPOINT")) { + geom->set_type(geometry_type::MULTI_POINT); + } else if (match_token(state, "MULTILINESTRING")) { + geom->set_type(geometry_type::MULTI_LINESTRING); + } else if (match_token(state, "MULTIPOLYGON")) { + geom->set_type(geometry_type::MULTI_POLYGON); + } else if (match_token(state, "GEOMETRYCOLLECTION")) { + geom->set_type(geometry_type::MULTI_GEOMETRY); + } else { + state->error = "Expected geometry type"; + return false; + } + + // Match Z and M + if (match_char(state, 'z')) { + geom->set_z(true); + } + if (match_char(state, 'm')) { + geom->set_m(true); + } + + // TODO: make this check configurable + if ((geom->has_m() != root->has_m()) || (geom->has_z() != root->has_z())) { + state->error = "Mixed Z and M values are not supported"; + return false; + } + + const size_t vertex_stride = 2 + geom->has_z() + geom->has_m(); + + // Parse EMPTY + if (!match_token(state, "EMPTY")) { + switch (geom->get_type()) { + case geometry_type::POINT: { + expect_char(state, '('); + + vertex_buffer verts(alloc, vertex_stride); + double vert[4] = {0, 0, 0, 0}; + for (size_t i = 0; i < vertex_stride; i++) { + expect_number(state, &vert[i]); + } + verts.push_back(vert); + verts.assign(geom); + + expect_char(state, ')'); + } break; + case geometry_type::LINESTRING: { + expect_char(state, '('); + + vertex_buffer verts(alloc, vertex_stride); + do { + double vert[4] = {0, 0, 0, 0}; + for (size_t i = 0; i < vertex_stride; i++) { + expect_number(state, &vert[i]); + } + verts.push_back(vert); + } while (match_char(state, ',')); + + verts.assign(geom); + + expect_char(state, ')'); + } break; + case geometry_type::POLYGON: { + expect_char(state, '('); + do { + auto ring = static_cast(alloc->alloc(sizeof(geometry))); + new (ring) geometry(geometry_type::LINESTRING, geom->has_z(), geom->has_m()); + if (!match_token(state, "EMPTY")) { + expect_char(state, '('); + + vertex_buffer verts(alloc, vertex_stride); + do { + double vert[4] = {0, 0, 0, 0}; + for (size_t i = 0; i < vertex_stride; i++) { + expect_number(state, &vert[i]); + } + verts.push_back(vert); + } while (match_char(state, ',')); + + verts.assign(ring); + + expect_char(state, ')'); + } + geom->append_part(ring); + } while (match_char(state, ',')); + expect_char(state, ')'); + } break; + case geometry_type::MULTI_POINT: { + expect_char(state, '('); + // Multipoints are special in that parens around each point is optional. + do { + bool has_paren = false; + if (match_char(state, '(')) { + has_paren = true; + } + auto point = static_cast(alloc->alloc(sizeof(geometry))); + new (point) geometry(geometry_type::POINT, geom->has_z(), geom->has_m()); + if (!match_token(state, "EMPTY")) { + // TODO: Do we need to have optional parens to accept EMPTY? + + vertex_buffer verts(alloc, vertex_stride); + double vert[4] = {0, 0, 0, 0}; + for (size_t i = 0; i < vertex_stride; i++) { + expect_number(state, &vert[i]); + } + verts.push_back(vert); + verts.assign(point); + } + if (has_paren) { + expect_char(state, ')'); + } + geom->append_part(point); + } while (match_char(state, ',')); + expect_char(state, ')'); + } break; + case geometry_type::MULTI_LINESTRING: { + expect_char(state, '('); + do { + auto line = static_cast(alloc->alloc(sizeof(geometry))); + new (line) geometry(geometry_type::LINESTRING, geom->has_z(), geom->has_m()); + if (!match_token(state, "EMPTY")) { + expect_char(state, '('); + + vertex_buffer verts(alloc, vertex_stride); + do { + double vert[4] = {0, 0, 0, 0}; + for (size_t i = 0; i < vertex_stride; i++) { + expect_number(state, &vert[i]); + } + verts.push_back(vert); + } while (match_char(state, ',')); + + verts.assign(line); + + expect_char(state, ')'); + } + geom->append_part(line); + } while (match_char(state, ',')); + expect_char(state, ')'); + } break; + case geometry_type::MULTI_POLYGON: { + expect_char(state, '('); + do { + auto poly = static_cast(alloc->alloc(sizeof(geometry))); + new (poly) geometry(geometry_type::POLYGON, geom->has_z(), geom->has_m()); + if (!match_token(state, "EMPTY")) { + expect_char(state, '('); + do { + auto ring = static_cast(alloc->alloc(sizeof(geometry))); + new (ring) geometry(geometry_type::LINESTRING, geom->has_z(), geom->has_m()); + if (!match_token(state, "EMPTY")) { + expect_char(state, '('); + + vertex_buffer verts(alloc, vertex_stride); + do { + double vert[4] = {0, 0, 0, 0}; + for (size_t i = 0; i < vertex_stride; i++) { + expect_number(state, &vert[i]); + } + verts.push_back(vert); + } while (match_char(state, ',')); + + verts.assign(ring); + + expect_char(state, ')'); + } + poly->append_part(ring); + } while (match_char(state, ',')); + expect_char(state, ')'); + } + geom->append_part(poly); + } while (match_char(state, ',')); + expect_char(state, ')'); + } break; + case geometry_type::MULTI_GEOMETRY: { + expect_char(state, '('); + + // add another child + auto new_geom = static_cast(alloc->alloc(sizeof(geometry))); + new (new_geom) geometry(geometry_type::INVALID); + + geom->append_part(new_geom); + geom = new_geom; + } + continue; // This continue moves us to the next iteration + default: + SGL_ASSERT(false); + state->error = "Invalid geometry type"; + return false; + } + } + + while (true) { + const auto parent = geom->get_parent(); + if (!parent) { + // Done! + return true; + } + + SGL_ASSERT(parent->get_type() == geometry_type::MULTI_GEOMETRY); + + if (match_char(state, ',')) { + // The geometry collection is not done yet, add another sibling + auto new_geom = static_cast(alloc->alloc(sizeof(geometry))); + new (new_geom) geometry(geometry_type::INVALID); + + parent->append_part(new_geom); + geom = new_geom; + + // goto begin; + break; + } + + expect_char(state, ')'); + // The geometry collection is done, go up + geom = parent; + } + } + +#undef expect_char +#undef expect_number +} + +std::string wkt_reader_get_error_message(const wkt_reader *state) { + if (!state || !state->error) { + return ""; + } + + // Return a string of the current position in the input string + const auto len = 32; + const auto range_beg = std::max(state->pos - len, state->buf); + const auto range_End = std::min(state->pos + 1, state->end); + auto range = std::string(range_beg, range_End); + if (range_beg != state->buf) { + range = "..." + range; + } + + // Add an arrow to indicate the position + const auto err = std::string(state->error); + const auto pos = std::to_string(state->pos - state->buf); + const auto msg = err + " at position '" + pos + "' near: '" + range + "'|<---"; + + return msg; +} + +//------------------------------------------------------------------------------ +// Extract +//------------------------------------------------------------------------------ +// TODO: Make these non-recursive + +static bool select_points(void *, const sgl::geometry *geom) { + switch (geom->get_type()) { + case sgl::geometry_type::POINT: + case sgl::geometry_type::MULTI_POINT: + case sgl::geometry_type::MULTI_GEOMETRY: + return true; + default: + return false; + } +} + +static void handle_points(void *state, sgl::geometry *geom) { + auto &points = *static_cast(state); + + switch (geom->get_type()) { + case sgl::geometry_type::POINT: + points.append_part(geom); + break; + case sgl::geometry_type::MULTI_POINT: + case sgl::geometry_type::MULTI_GEOMETRY: + geom->filter_parts(state, select_points, handle_points); + break; + default: + SGL_ASSERT(false); + break; + } +} + +static bool select_lines(void *state, const sgl::geometry *geom) { + switch (geom->get_type()) { + case sgl::geometry_type::LINESTRING: + case sgl::geometry_type::MULTI_LINESTRING: + case sgl::geometry_type::MULTI_GEOMETRY: + return true; + default: + return false; + } +} + +static void handle_lines(void *state, sgl::geometry *geom) { + auto &lines = *static_cast(state); + + switch (geom->get_type()) { + case sgl::geometry_type::LINESTRING: + lines.append_part(geom); + break; + case sgl::geometry_type::MULTI_LINESTRING: + case sgl::geometry_type::MULTI_GEOMETRY: + geom->filter_parts(state, select_lines, handle_lines); + break; + default: + SGL_ASSERT(false); + break; + } +} + +static bool select_polygons(void *state, const sgl::geometry *geom) { + switch (geom->get_type()) { + case sgl::geometry_type::POLYGON: + case sgl::geometry_type::MULTI_POLYGON: + case sgl::geometry_type::MULTI_GEOMETRY: + return true; + default: + return false; + } +} + +static void handle_polygons(void *state, sgl::geometry *geom) { + auto &polygons = *static_cast(state); + + switch (geom->get_type()) { + case sgl::geometry_type::POLYGON: + polygons.append_part(geom); + break; + case sgl::geometry_type::MULTI_POLYGON: + case sgl::geometry_type::MULTI_GEOMETRY: + geom->filter_parts(state, select_polygons, handle_polygons); + break; + default: + SGL_ASSERT(false); + break; + } +} + +geometry extract_points(sgl::geometry *geom) { + auto points = sgl::geometry(sgl::geometry_type::MULTI_POINT, geom->has_z(), geom->has_m()); + geom->filter_parts(&points, select_points, handle_points); + return points; +} + +geometry extract_linestrings(sgl::geometry *geom) { + auto lines = sgl::geometry(sgl::geometry_type::MULTI_LINESTRING, geom->has_z(), geom->has_m()); + geom->filter_parts(&lines, select_lines, handle_lines); + return lines; +} + +geometry extract_polygons(sgl::geometry *geom) { + auto polygons = sgl::geometry(sgl::geometry_type::MULTI_POLYGON, geom->has_z(), geom->has_m()); + geom->filter_parts(&polygons, select_polygons, handle_polygons); + return polygons; +} + + +//------------------------------------------------------------------------------ +// Distance +//------------------------------------------------------------------------------ +static double point_point_distance(const sgl::geometry *lhs, const sgl::geometry *rhs) { + SGL_ASSERT(lhs->get_type() == sgl::geometry_type::POINT); + SGL_ASSERT(rhs->get_type() == sgl::geometry_type::POINT); + + if(lhs->is_empty() || rhs->is_empty()) { + return std::numeric_limits::quiet_NaN(); + } + + const auto lhs_vertex = lhs->get_vertex_xy(0); + const auto rhs_vertex = rhs->get_vertex_xy(0); + + return std::hypot(lhs_vertex.x - rhs_vertex.x, lhs_vertex.y - rhs_vertex.y); +} + +/* +function sqr(x) { return x * x } +function dist2(v, w) { return sqr(v.x - w.x) + sqr(v.y - w.y) } +function distToSegmentSquared(p, v, w) { +var l2 = dist2(v, w); +if (l2 == 0) return dist2(p, v); +var t = ((p.x - v.x) * (w.x - v.x) + (p.y - v.y) * (w.y - v.y)) / l2; +t = Math.max(0, Math.min(1, t)); +return dist2(p, { x: v.x + t * (w.x - v.x), +y: v.y + t * (w.y - v.y) }); +} +function distToSegment(p, v, w) { return Math.sqrt(distToSegmentSquared(p, v, w)); } + */ + +static double vertex_distance_squared(const vertex_xy *lhs, const vertex_xy *rhs) { + return std::pow(lhs->x - rhs->x, 2) + std::pow(lhs->y - rhs->y, 2); +} + +static double vertex_distance(const vertex_xy *lhs, const vertex_xy *rhs) { + return std::hypot(lhs->x - rhs->x, lhs->y - rhs->y); +} + +static double point_line_distance(const vertex_xy *p, const vertex_xy *v, const vertex_xy *w) { + const auto l2 = vertex_distance_squared(v, w); + if (l2 == 0) { + // is not better to just compare if w == v? + return vertex_distance(p, v); + } + + const auto t = ((p->x - v->x) * (w->x - v->x) + (p->y - v->y) * (w->y - v->y)) / l2; + const auto t_clamped = std::max(0.0, std::min(1.0, t)); + const auto x = v->x + t_clamped * (w->x - v->x); + const auto y = v->y + t_clamped * (w->y - v->y); + + const vertex_xy intersection {x, y}; + + return vertex_distance(p, &intersection); +} + +static double point_linestring_distance(const sgl::geometry *lhs, const sgl::geometry *rhs) { + SGL_ASSERT(lhs->get_type() == sgl::geometry_type::POINT); + SGL_ASSERT(rhs->get_type() == sgl::geometry_type::LINESTRING); + + if(lhs->is_empty() || rhs->is_empty()) { + return std::numeric_limits::quiet_NaN(); + } + + const auto lhs_vertex = lhs->get_vertex_xy(0); + double min_dist = std::numeric_limits::infinity(); + const auto count = rhs->get_count(); + + auto v1 = rhs->get_vertex_xy(0); + if(count == 1) { + // Degenerate case, should not happen + return vertex_distance(&lhs_vertex, &v1); + } + + for(size_t i = 1; i < count; i++) { + const auto v2 = rhs->get_vertex_xy(i); + const auto dist = point_line_distance(&lhs_vertex, &v1, &v2); + min_dist = std::min(min_dist, dist); + v1 = v2; + } + + return min_dist; +} + +static double point_polygon_distance(const sgl::geometry *lhs, const sgl::geometry *rhs) { + SGL_ASSERT(lhs->get_type() == sgl::geometry_type::POINT); + SGL_ASSERT(rhs->get_type() == sgl::geometry_type::POLYGON); + + if(rhs->is_empty()) { + return std::numeric_limits::quiet_NaN(); + } + const auto shell = rhs->get_first_part(); + SGL_ASSERT(shell != nullptr); + return point_linestring_distance(lhs, shell); +} + +static double linestring_linestring_distance(const geometry *lhs, const geometry *rhs) { + SGL_ASSERT(lhs->get_type() == geometry_type::LINESTRING); + SGL_ASSERT(rhs->get_type() == geometry_type::LINESTRING); + + if(lhs->is_empty() || rhs->is_empty()) { + return std::numeric_limits::quiet_NaN(); + } + + double min_dist = std::numeric_limits::infinity(); + // TODO: + return min_dist; +} + +static double linestring_polygon_distance(const geometry *lhs, const geometry *rhs) { + SGL_ASSERT(lhs->get_type() == geometry_type::LINESTRING); + SGL_ASSERT(rhs->get_type() == geometry_type::POLYGON); + + if(lhs->is_empty() || rhs->is_empty()) { + return std::numeric_limits::quiet_NaN(); + } + + if(rhs->is_empty()) { + return std::numeric_limits::quiet_NaN(); + } + const auto shell = rhs->get_first_part(); + if(shell->is_empty()) { + return std::numeric_limits::quiet_NaN(); + } + return linestring_linestring_distance(lhs, shell); +} + +static double polygon_polygon_distance(const geometry *lhs, const geometry *rhs) { + SGL_ASSERT(lhs->get_type() == geometry_type::POLYGON); + SGL_ASSERT(rhs->get_type() == geometry_type::POLYGON); + + if(lhs->is_empty() || rhs->is_empty()) { + return std::numeric_limits::quiet_NaN(); + } + + if(lhs->is_empty()) { + return std::numeric_limits::quiet_NaN(); + } + const auto lhs_shell = lhs->get_first_part(); + if(lhs_shell->is_empty()) { + return std::numeric_limits::quiet_NaN(); + } + + if(rhs->is_empty()) { + return std::numeric_limits::quiet_NaN(); + } + const auto rhs_shell = rhs->get_first_part(); + if(rhs_shell->is_empty()) { + return std::numeric_limits::quiet_NaN(); + } + + return linestring_linestring_distance(lhs_shell, rhs_shell); +} + +static double distance_dispatch(const geometry *lhs_p, const geometry *rhs_p) { + SGL_ASSERT(!lhs_p->is_collection()); + SGL_ASSERT(!rhs_p->is_collection()); + + switch (lhs_p->get_type()) { + case geometry_type::POINT: + switch (rhs_p->get_type()) { + case geometry_type::POINT: + return point_point_distance(lhs_p, rhs_p); + case geometry_type::LINESTRING: + return point_linestring_distance(lhs_p, rhs_p); + case geometry_type::POLYGON: + return point_polygon_distance(lhs_p, rhs_p); + default: + SGL_ASSERT(false); + return std::numeric_limits::quiet_NaN(); + } + case geometry_type::LINESTRING: + switch (rhs_p->get_type()) { + case geometry_type::POINT: + return point_linestring_distance(rhs_p, lhs_p); + case geometry_type::LINESTRING: + return linestring_linestring_distance(lhs_p, rhs_p); + case geometry_type::POLYGON: + return linestring_polygon_distance(lhs_p, rhs_p); + default: + SGL_ASSERT(false); + return std::numeric_limits::quiet_NaN(); + } + case geometry_type::POLYGON: + switch (rhs_p->get_type()) { + case geometry_type::POINT: + return point_polygon_distance(rhs_p, lhs_p); + case geometry_type::LINESTRING: + return linestring_polygon_distance(rhs_p, lhs_p); + case geometry_type::POLYGON: + return polygon_polygon_distance(lhs_p, rhs_p); + default: + SGL_ASSERT(false); + return std::numeric_limits::quiet_NaN(); + } + default: + SGL_ASSERT(false); + return std::numeric_limits::quiet_NaN(); + } +} + +double distance(const geometry* lhs_p, const geometry* rhs_p) { + SGL_ASSERT(lhs_p != nullptr); + SGL_ASSERT(rhs_p != nullptr); + + auto lhs = lhs_p; + auto rhs = rhs_p; + + const auto lhs_root = lhs->get_parent(); + const auto rhs_root = rhs->get_parent(); + + double min_dist = std::numeric_limits::infinity(); + + while(lhs != lhs_root) { + + if(lhs->is_collection() && !lhs->is_empty()) { + lhs = lhs->get_first_part(); + continue; + } + + // Otherwise, we have a leaf on the LHS + // I guess this is where we create an LHS index? + // I guess it makes sense to re-order lhs and rhs depending on number of parts/verts? + // Maybe calculate a part/vertex ratio. Although dont count interior polygon rings for that. + // Alt just cache every calculation. + + while(rhs != rhs_root) { + if(rhs->is_collection() && !rhs->is_empty()) { + rhs = rhs->get_first_part(); + continue; + } + + // If we get here, we have a leaf on both sides! + min_dist = std::min(min_dist, distance_dispatch(lhs, rhs)); + + // Now move the rhs up + while(rhs != rhs_root) { + const auto parent = rhs->get_parent(); + if(parent == rhs_root) { + rhs = parent; + break; + } + + if(rhs != parent->get_last_part()) { + rhs = rhs->get_next(); + break; + } + + rhs = parent; + } + } + + while (lhs != lhs_root) { + const auto parent = lhs->get_parent(); + if (parent == lhs_root) { + lhs = parent; + break; + } + + if (lhs != parent->get_last_part()) { + lhs = lhs->get_next(); + break; + } + + lhs = parent; + } + } + + return min_dist; +} + + +//---------------------------------------------------------------------------------------------------------------------- +// Validity +//---------------------------------------------------------------------------------------------------------------------- + +bool is_valid(const sgl::geometry *geom) { + if(!geom) { + return false; + } + + const auto root = geom->get_parent(); + auto curr = geom; + + while (true) { + switch(curr->get_type()) { + case sgl::geometry_type::POINT: { + // Points cant have more than one vertex + if(curr->get_count() > 1) { + return false; + } + } break; + case sgl::geometry_type::LINESTRING: { + // Linestrings must have zero or at least two vertices + if(curr->get_count() == 1) { + return false; + } + } break; + case sgl::geometry_type::POLYGON: { + const auto tail = curr->get_last_part(); + auto head = tail; + if (!head) { + break; + } + do { + head = head->get_next(); + // Polygon rings must have at least four vertices + if(head->get_count() < 4) { + return false; + } + } while (head != tail); + } break; + case sgl::geometry_type::MULTI_POINT: + case sgl::geometry_type::MULTI_LINESTRING: + case sgl::geometry_type::MULTI_POLYGON: + case sgl::geometry_type::MULTI_GEOMETRY: { + if(!curr->is_empty()) { + // Go downwards + curr = curr->get_first_part(); + continue; + } + } break; + default: + // Just return false! + return false; + } + + // Inner loop + while(true) { + const auto parent = curr->get_parent(); + if(parent == root) { + // Done! + return true; + } + if(curr != parent->get_last_part()) { + // Go sideways + curr = curr->get_next(); + break; + } + // Go upwards + curr = parent; + } + } +} + + + +} // namespace ops + +} // namespace sgl \ No newline at end of file diff --git a/src/sgl/sgl.hpp b/src/sgl/sgl.hpp new file mode 100644 index 00000000..dfc20de7 --- /dev/null +++ b/src/sgl/sgl.hpp @@ -0,0 +1,1555 @@ +#pragma once + +#include +#include +#include +#include +#include + +// Assert macro +#ifndef SGL_ASSERT +#ifdef NDEBUG +#define SGL_ASSERT(x) ((void)0) +#else +#include +#define SGL_ASSERT(x) assert(x) +#endif +#endif + +namespace sgl { + +struct allocator { + virtual void *alloc(size_t size) = 0; + virtual void dealloc(void *ptr, size_t size) = 0; + virtual void *realloc(void *ptr, size_t old_size, size_t new_size) = 0; + virtual ~allocator() = default; +}; + +struct vertex_xy { + double x; + double y; + + bool operator==(const vertex_xy &other) const { + return x == other.x && y == other.y; + } + + vertex_xy operator-(const vertex_xy &other) const { + return {x - other.x, y - other.y}; + } + + vertex_xy operator+(const vertex_xy &other) const { + return {x + other.x, y + other.y}; + } + + vertex_xy operator*(double scalar) const { + return {x * scalar, y * scalar}; + } + + vertex_xy operator/(double scalar) const { + return {x / scalar, y / scalar}; + } +}; + +struct vertex_xyzm { + double x; + double y; + double zm; + double m; + + bool operator==(const vertex_xyzm &other) const { + return x == other.x && y == other.y && zm == other.zm && m == other.m; + } + + vertex_xyzm operator-(const vertex_xyzm &other) const { + return {x - other.x, y - other.y, zm - other.zm, m - other.m}; + } + + vertex_xyzm operator+(const vertex_xyzm &other) const { + return {x + other.x, y + other.y, zm + other.zm, m + other.m}; + } + + vertex_xyzm operator*(double scalar) const { + return {x * scalar, y * scalar, zm * scalar, m * scalar}; + } + + vertex_xyzm operator/(double scalar) const { + return {x / scalar, y / scalar, zm / scalar, m / scalar}; + } +}; + +struct box_xy { + vertex_xy min; + vertex_xy max; + + static box_xy smallest() { + constexpr auto dmax = std::numeric_limits::max(); + constexpr auto dmin = std::numeric_limits::lowest(); + return { + {dmax, dmax}, + {dmin, dmin}, + }; + } + + bool intersects(const box_xy &other) const { + return !(min.x > other.max.x || max.x < other.min.x || min.y > other.max.y || max.y < other.min.y); + } +}; + +struct box_xyzm { + vertex_xyzm min; + vertex_xyzm max; + + static box_xyzm smallest() { + constexpr auto dmax = std::numeric_limits::max(); + constexpr auto dmin = std::numeric_limits::lowest(); + return { + {dmax, dmax, dmax, dmax}, + {dmin, dmin, dmin, dmin}, + }; + } +}; + +enum class geometry_type : uint8_t { + INVALID = 0, + POINT, + LINESTRING, + POLYGON, + MULTI_POINT, + MULTI_LINESTRING, + MULTI_POLYGON, + MULTI_GEOMETRY, +}; + +enum class vertex_type : uint8_t { + XY = 0, + XYZ = 1, + XYM = 2, + XYZM = 3, +}; + +class geometry { +private: + // clang-format off + geometry_type type = geometry_type::INVALID; + uint8_t flag = 0; + uint16_t padd = 0; + uint32_t size = 0; + void* data = nullptr; + geometry* next = nullptr; + geometry* prnt = nullptr; + // clang-format on +public: + geometry() = default; + explicit geometry(const geometry_type type, const bool has_z = false, const bool has_m = false) : type(type) { + set_z(has_z); + set_m(has_m); + } + + geometry_type get_type() const; + void set_type(geometry_type type); + + bool has_z() const; + bool has_m() const; + bool set_z(bool value); + bool set_m(bool value); + + bool is_single_part() const; + bool is_multi_part() const; + bool is_collection() const; + + uint32_t get_count() const; + void set_count(uint32_t count); + bool is_empty() const; + + const geometry *get_last_part() const; + const geometry *get_first_part() const; + const geometry *get_nth_part(uint32_t n) const; + const geometry *get_next() const; + const geometry *get_parent() const; + + geometry *get_last_part(); + geometry *get_first_part(); + geometry *get_nth_part(uint32_t n); + geometry *get_next(); + geometry *get_parent(); + + void append_part(geometry *part); + + typedef bool (*select_func)(void *state, const geometry *part); + typedef void (*handle_func)(void *state, geometry *part); + + void filter_parts(void *state, select_func select, handle_func handle); + + // removes the first part and returns it. Returns nullptr if there are no parts. + geometry *pop_first_part(); + + const uint8_t *get_vertex_data() const; + uint8_t *get_vertex_data(); + void set_vertex_data(const uint8_t *data, uint32_t size); + void set_vertex_data(const char *data, uint32_t size); + + void allocate_vertex_data(allocator *alloc, uint32_t size); + // Requires that the vertex data has been allocated with allocate_vertex_data + void realloc_vertex_data(allocator *alloc, uint32_t new_size); + + size_t get_vertex_size() const; + vertex_xy get_vertex_xy(uint32_t n) const; + vertex_xyzm get_vertex_xyzm(uint32_t n) const; + + void set_vertex_xy(uint32_t n, const vertex_xy &vertex); + + static std::string type_to_string(geometry_type type); +}; + +} // namespace sgl + +//-------------------------------------------------------------------------- +// Implementation +//-------------------------------------------------------------------------- + +namespace sgl { + +inline geometry_type geometry::get_type() const { + return type; +} + +inline void geometry::set_type(const geometry_type type) { + this->type = type; +} + +inline bool geometry::has_z() const { + return flag & 0x01; +} + +inline bool geometry::has_m() const { + return flag & 0x02; +} + +inline bool geometry::set_z(const bool value) { + if (value) { + flag |= 0x01; + } else { + flag &= ~0x01; + } + return value; +} + +inline bool geometry::set_m(const bool value) { + if (value) { + flag |= 0x02; + } else { + flag &= ~0x02; + } + return value; +} + +inline bool geometry::is_single_part() const { + return type == geometry_type::POINT || type == geometry_type::LINESTRING; +} + +inline bool geometry::is_multi_part() const { + return type >= geometry_type::POLYGON && type <= geometry_type::MULTI_GEOMETRY; +} + +inline bool geometry::is_collection() const { + return type >= geometry_type::MULTI_POINT && type <= geometry_type::MULTI_GEOMETRY; +} + +inline uint32_t geometry::get_count() const { + return size; +} + +inline void geometry::set_count(const uint32_t count) { + size = count; +} + +inline bool geometry::is_empty() const { + return size == 0; +} + +inline const geometry *geometry::get_last_part() const { + SGL_ASSERT(is_multi_part() || type == geometry_type::INVALID); + const auto tail = static_cast(data); + return tail; +} + +inline const geometry *geometry::get_first_part() const { + const auto tail = get_last_part(); + return tail ? tail->next : nullptr; +} + +inline const geometry *geometry::get_nth_part(uint32_t n) const { + if (size == 0) { + SGL_ASSERT(data == nullptr); + return nullptr; + } + + auto part = get_first_part(); + SGL_ASSERT(part != nullptr); + + for (uint32_t i = 0; i < n; i++) { + part = part->next; + SGL_ASSERT(part != nullptr); + } + + return part; +} + +inline const geometry *geometry::get_next() const { + return next; +} + +inline const geometry *geometry::get_parent() const { + return prnt; +} + +inline geometry *geometry::get_last_part() { + return const_cast(static_cast(this)->get_last_part()); +} +inline geometry *geometry::get_first_part() { + return const_cast(static_cast(this)->get_first_part()); +} +inline geometry *geometry::get_nth_part(uint32_t n) { + return const_cast(static_cast(this)->get_nth_part(n)); +} +inline geometry *geometry::get_next() { + return const_cast(static_cast(this)->get_next()); +} +inline geometry *geometry::get_parent() { + return const_cast(static_cast(this)->get_parent()); +} + +inline void geometry::append_part(geometry *part) { + SGL_ASSERT(is_multi_part() || type == geometry_type::INVALID); + SGL_ASSERT(part != nullptr); + + const auto tail = static_cast(data); + + if (tail == nullptr) { + SGL_ASSERT(size == 0); + part->next = part; + } else { + SGL_ASSERT(size != 0); + const auto head = tail->next; + tail->next = part; + part->next = head; + } + + part->prnt = this; + data = part; + size++; +} + +// This needs testing +inline void geometry::filter_parts(void *state, select_func select, handle_func handle) { + auto tail = get_last_part(); + + if (!tail) { + return; + } + + auto prev = tail; + bool shrank = true; + + while (size > 0 && (prev != tail || shrank)) { + shrank = false; + auto curr = prev->next; + auto next = curr->next; + + if (select(state, curr)) { + + // Unlink the current part + prev->next = next; + size--; + shrank = true; + + if (curr == tail) { + // We removed the tail, update the tail pointer + tail = prev; + data = tail; + } + + // Before passing this to the handle function, + // null the relationship pointers + curr->prnt = nullptr; + curr->next = nullptr; + + // Pass on to the handle callback + handle(state, curr); + + } else { + prev = curr; + } + } + + if (size == 0) { + // We extracted everything. Reset the data pointer + data = nullptr; + } +} + +inline geometry *geometry::pop_first_part() { + const auto tail = get_last_part(); + + if (tail == nullptr) { + // No parts + SGL_ASSERT(size == 0); + return nullptr; + } + + const auto head = tail->next; + SGL_ASSERT(head != nullptr); + + // Unlink the head + tail->next = head->next; + head->prnt = nullptr; + head->next = nullptr; + + size--; + + if (tail == head) { + // Special case: this was the last element, reset the data pointer + SGL_ASSERT(size == 0); + data = nullptr; + } + + return head; +} + +inline const uint8_t *geometry::get_vertex_data() const { + SGL_ASSERT(is_single_part() || type == geometry_type::INVALID); + return static_cast(data); +} + +inline uint8_t *geometry::get_vertex_data() { + SGL_ASSERT(is_single_part() || type == geometry_type::INVALID); + return static_cast(data); +} + +inline void geometry::set_vertex_data(const uint8_t *data, uint32_t size) { + SGL_ASSERT(is_single_part() || type == geometry_type::INVALID); + // Points can have at most one vertex + SGL_ASSERT(type != geometry_type::POINT || size < 2); + this->data = const_cast(data); + this->size = size; +} + +inline void geometry::set_vertex_data(const char *data, uint32_t size) { + set_vertex_data(reinterpret_cast(data), size); +} + +inline size_t geometry::get_vertex_size() const { + return sizeof(double) * (2 + has_z() + has_m()); +} + +inline vertex_xy geometry::get_vertex_xy(const uint32_t n) const { + SGL_ASSERT(is_single_part() || type == geometry_type::INVALID); + SGL_ASSERT(n < size); + + const auto vertex_stride = get_vertex_size(); + const auto vertex_buffer = get_vertex_data(); + const auto vertex_offset = vertex_buffer + vertex_stride * n; + + vertex_xy vertex = {0}; + memcpy(&vertex, vertex_offset, sizeof(vertex_xy)); + return vertex; +} + +inline vertex_xyzm geometry::get_vertex_xyzm(const uint32_t n) const { + SGL_ASSERT(is_single_part() || type == geometry_type::INVALID); + SGL_ASSERT(n < size); + + const auto vertex_stride = get_vertex_size(); + const auto vertex_buffer = get_vertex_data(); + const auto vertex_offset = vertex_buffer + vertex_stride * n; + + vertex_xyzm vertex = {0}; + memcpy(&vertex, vertex_offset, vertex_stride); + return vertex; +} + +inline void geometry::set_vertex_xy(const uint32_t n, const vertex_xy &vertex) { + SGL_ASSERT(is_single_part() || type == geometry_type::INVALID); + SGL_ASSERT(n < size); + + const auto vertex_stride = get_vertex_size(); + const auto vertex_buffer = get_vertex_data(); + const auto vertex_offset = vertex_buffer + vertex_stride * n; + + memcpy(vertex_offset, &vertex, sizeof(vertex_xy)); +} + +inline std::string geometry::type_to_string(const geometry_type type) { + switch (type) { + case geometry_type::POINT: + return "POINT"; + case geometry_type::LINESTRING: + return "LINESTRING"; + case geometry_type::POLYGON: + return "POLYGON"; + case geometry_type::MULTI_POINT: + return "MULTIPOINT"; + case geometry_type::MULTI_LINESTRING: + return "MULTILINESTRING"; + case geometry_type::MULTI_POLYGON: + return "MULTIPOLYGON"; + case geometry_type::MULTI_GEOMETRY: + return "GEOMETRYCOLLECTION"; + default: + return "INVALID"; + } +} + +} // namespace sgl + +//-------------------------------------------------------------------------- +// Operations +//-------------------------------------------------------------------------- + +namespace sgl { + +namespace point { +inline geometry make_empty(bool has_z = false, bool has_m = false) { + return geometry(geometry_type::POINT, has_z, has_m); +} +} // namespace point + +namespace linestring { +inline geometry make_empty(bool has_z = false, bool has_m = false) { + return geometry(geometry_type::LINESTRING, has_z, has_m); +} + +inline bool is_closed(const geometry *geom) { + SGL_ASSERT(geom->get_type() == geometry_type::LINESTRING); + + if (geom->get_count() < 2) { + return false; + } + + const auto first = geom->get_vertex_xyzm(0); + const auto last = geom->get_vertex_xyzm(geom->get_count() - 1); + return first == last; +} + +inline double signed_area(const geometry *geom) { + SGL_ASSERT(geom->get_type() == geometry_type::LINESTRING); + SGL_ASSERT(is_closed(geom)); + + const auto count = geom->get_count(); + + if (count < 3) { + return 0.0; + } + + const auto vertex_data = geom->get_vertex_data(); + const auto vertex_size = geom->get_vertex_size(); + + auto area = 0.0; + + double x0 = 0.0; + double x1 = 0.0; + double y1 = 0.0; + double y2 = 0.0; + + const auto x_data = vertex_data; + const auto y_data = vertex_data + sizeof(double); + + memcpy(&x0, x_data, sizeof(double)); + + for (uint32_t i = 1; i < count - 1; i++) { + memcpy(&x1, x_data + (i + 0) * vertex_size, sizeof(double)); + memcpy(&y1, y_data + (i + 1) * vertex_size, sizeof(double)); + memcpy(&y2, y_data + (i - 1) * vertex_size, sizeof(double)); + + area += (x1 - x0) * (y2 - y1); + } + + return area * 0.5; +} + +inline double length(const geometry *geom) { + SGL_ASSERT(geom->get_type() == geometry_type::LINESTRING); + + const auto count = geom->get_count(); + if (count < 2) { + return 0.0; + } + + const auto vertex_data = geom->get_vertex_data(); + const auto vertex_size = geom->get_vertex_size(); + + auto length = 0.0; + + vertex_xy prev = {0}; + vertex_xy next = {0}; + + memcpy(&prev, vertex_data, sizeof(vertex_xy)); + + for (uint32_t i = 1; i < count; i++) { + memcpy(&next, vertex_data + i * vertex_size, sizeof(vertex_xy)); + const auto dx = next.x - prev.x; + const auto dy = next.y - prev.y; + length += std::hypot(dx, dy); + prev = next; + } + + return length; +} + +} // namespace linestring + +namespace polygon { + +inline geometry make_empty(bool has_z = false, bool has_m = false) { + return geometry(geometry_type::POLYGON, has_z, has_m); +} + +inline double area(const geometry *geom) { + SGL_ASSERT(geom->get_type() == geometry_type::POLYGON); + double area = 0.0; + + auto part = geom->get_first_part(); + if (!part) { + return area; + } + + area += std::abs(linestring::signed_area(part)); + + while (part != geom->get_last_part()) { + part = part->get_next(); + area -= std::abs(linestring::signed_area(part)); + } + + return area; +} + +inline double perimeter(const geometry *geom) { + SGL_ASSERT(geom->get_type() == geometry_type::POLYGON); + + const auto tail = geom->get_last_part(); + if (!tail) { + return 0.0; + } + + double perimeter = 0.0; + auto part = tail; + do { + part = part->get_next(); + perimeter += linestring::length(part); + } while (part != tail); + + return perimeter; +} + +inline sgl::geometry make_from_box(sgl::allocator *alloc, double minx, double miny, double maxx, double maxy) { + auto poly = sgl::polygon::make_empty(false, false); + + const auto ring_mem = alloc->alloc(sizeof(sgl::geometry)); + const auto ring_ptr = new (ring_mem) sgl::geometry(sgl::geometry_type::LINESTRING, false, false); + + const auto data_mem = alloc->alloc(2 * sizeof(double) * 5); + const auto data_ptr = static_cast(data_mem); + + data_ptr[0] = minx; + data_ptr[1] = miny; + + data_ptr[2] = minx; + data_ptr[3] = maxy; + + data_ptr[4] = maxx; + data_ptr[5] = maxy; + + data_ptr[6] = maxx; + data_ptr[7] = miny; + + data_ptr[8] = minx; + data_ptr[9] = miny; + + ring_ptr->set_vertex_data(static_cast(data_mem), 5); + poly.append_part(ring_ptr); + + return poly; +} + +} // namespace polygon + +namespace multi_point { + +inline geometry make_empty(bool has_z = false, bool has_m = false) { + return geometry(geometry_type::MULTI_POINT, has_z, has_m); +} + +} // namespace multi_point + +namespace multi_linestring { +inline geometry make_empty(bool has_z = false, bool has_m = false) { + return geometry(geometry_type::MULTI_LINESTRING, has_z, has_m); +} + +inline bool is_closed(const geometry *geom) { + SGL_ASSERT(geom->get_type() == geometry_type::MULTI_LINESTRING); + + const auto tail = geom->get_last_part(); + if (!tail) { + return false; + } + + auto part = tail; + do { + part = part->get_next(); + if (!linestring::is_closed(part)) { + return false; + } + } while (part != tail); + + return true; +} + +inline double length(const geometry *geom) { + SGL_ASSERT(geom->get_type() == geometry_type::MULTI_LINESTRING); + + const auto tail = geom->get_last_part(); + if (!tail) { + return 0.0; + } + + double length = 0.0; + auto part = tail; + do { + part = part->get_next(); + length += linestring::length(part); + } while (part != tail); + + return length; +} + +} // namespace multi_linestring + +namespace multi_polygon { +inline geometry make_empty(bool has_z = false, bool has_m = false) { + return geometry(geometry_type::MULTI_POLYGON, has_z, has_m); +} + +inline double area(const geometry *geom) { + SGL_ASSERT(geom->get_type() == geometry_type::MULTI_POLYGON); + + const auto tail = geom->get_last_part(); + if (!tail) { + return 0.0; + } + + double area = 0.0; + auto part = tail; + do { + part = part->get_next(); + area += polygon::area(part); + } while (part != tail); + + return area; +} + +inline double perimeter(const geometry *geom) { + SGL_ASSERT(geom->get_type() == geometry_type::MULTI_POLYGON); + + const auto tail = geom->get_last_part(); + if (!tail) { + return 0.0; + } + + double perimeter = 0.0; + auto part = tail; + do { + part = part->get_next(); + perimeter += polygon::perimeter(part); + } while (part != tail); + + return perimeter; +} + +} // namespace multi_polygon + +namespace multi_geometry { + +inline geometry make_empty(bool has_z = false, bool has_m = false) { + return geometry(geometry_type::MULTI_GEOMETRY, has_z, has_m); +} +inline double area(const geometry *geom); +inline double length(const geometry *geom); +inline double perimeter(const geometry *geom); + +} // namespace multi_geometry + +namespace ops { + +double area(const geometry *geom); +double perimeter(const geometry *geom); +double length(const geometry *geom); +size_t vertex_count(const geometry *geom); +int32_t max_surface_dimension(const geometry *geom, bool ignore_empty); + +double distance(const geometry* lhs, const geometry* rhs); + +typedef void (*visit_func)(void *state, const geometry *part); +void visit_by_dimension(const geometry *geom, int surface_dimension, void *state, visit_func func); + +typedef void (*map_vertex_func)(void *state, vertex_xyzm *vertex); +void replace_vertices(allocator *alloc, geometry *geom, void *state, map_vertex_func callback); + +box_xy extent_xy(const geometry *geom); +void force_zm(allocator &alloc, geometry *geom, bool has_z, bool has_m, double default_z, double default_m); + +size_t to_wkb_size(const geometry *geom); +size_t to_wkb(const geometry *geom, uint8_t *buffer, size_t size); + +enum SGL_WKB_READER_ERROR { + SGL_WKB_READER_OK = 0, + SGL_WKB_READER_UNSUPPORTED_TYPE = 1, + SGL_WKB_READER_OUT_OF_BOUNDS = 2, + SGL_WKB_READER_RECURSION_LIMIT = 3, + SGL_WKB_READER_MIXED_ZM = 4, + SGL_WKB_INVALID_CHILD_TYPE = 5, +}; + +struct wkb_reader { + // Set by the user + allocator *alloc; + const char *buf; + const char *end; + bool copy_vertices; + bool allow_mixed_zm; + bool nan_as_empty; + + uint32_t *stack_buf; + uint32_t stack_cap; + + // Set by the parser + const char *pos; + size_t depth; + SGL_WKB_READER_ERROR error; + + uint32_t type_id; + bool le; + bool has_mixed_zm; + bool has_any_z; + bool has_any_m; +}; + +bool wkb_reader_try_parse(wkb_reader *state, geometry *out); +std::string wkb_reader_get_error_message(const wkb_reader *state); + +// Does not require an allocator +bool wkb_reader_try_parse_stats(wkb_reader *state, box_xy *out_extent, size_t *out_vertex_count); + +struct wkt_reader { + // Set by the user + allocator *alloc; + const char *buf; + const char *end; + + // Set by the parser + const char *pos; + const char *error; +}; + +bool wkt_reader_try_parse(wkt_reader *state, geometry *out); +std::string wkt_reader_get_error_message(const wkt_reader *state); + +geometry extract_points(sgl::geometry *geom); +geometry extract_linestrings(sgl::geometry *geom); +geometry extract_polygons(sgl::geometry *geom); + +// TODO: this will only check that geometries have enough vertices to be valid. +// It does NOT check topological validity. +bool is_valid(const sgl::geometry *geom); + +} // namespace ops + +} // namespace sgl + +//-------------------------------------------------------------------------- +// Implementation +//-------------------------------------------------------------------------- + +namespace sgl { +namespace ops { +inline double area(const geometry *geom) { + switch (geom->get_type()) { + case geometry_type::POLYGON: { + return polygon::area(geom); + } + case geometry_type::MULTI_POLYGON: { + return multi_polygon::area(geom); + } + case geometry_type::MULTI_GEOMETRY: { + return multi_geometry::area(geom); + } + default: + return 0.0; + } +} + +inline double perimeter(const geometry *geom) { + switch (geom->get_type()) { + case geometry_type::POLYGON: { + return polygon::perimeter(geom); + } + case geometry_type::MULTI_POLYGON: { + return multi_polygon::perimeter(geom); + } + case geometry_type::MULTI_GEOMETRY: { + return multi_geometry::perimeter(geom); + } + default: + return 0.0; + } +} + +inline double length(const geometry *geom) { + switch (geom->get_type()) { + case geometry_type::LINESTRING: { + return linestring::length(geom); + } + case geometry_type::MULTI_LINESTRING: { + return multi_linestring::length(geom); + } + case geometry_type::MULTI_GEOMETRY: { + return multi_geometry::length(geom); + } + default: + return 0; + } +} + +inline size_t vertex_count(const geometry *geom) { + if (!geom) { + return 0; + } + + size_t count = 0; + const geometry *part = geom; + const geometry *root = part->get_parent(); + + while (true) { + switch (part->get_type()) { + case geometry_type::POINT: + case geometry_type::LINESTRING: + count += part->get_count(); + break; + case geometry_type::POLYGON: + case geometry_type::MULTI_POINT: + case geometry_type::MULTI_LINESTRING: + case geometry_type::MULTI_POLYGON: + case geometry_type::MULTI_GEOMETRY: + if (!part->is_empty()) { + part = part->get_first_part(); + continue; + } + break; + default: + SGL_ASSERT(false); + return 0; + } + + while (true) { + const auto parent = part->get_parent(); + if (parent == root) { + return count; + } + + if (part != parent->get_last_part()) { + part = part->get_next(); + break; + } + + part = parent; + } + } + return count; +} + +inline int32_t max_surface_dimension(const geometry *geom, bool ignore_empty) { + if (!geom) { + return 0; + } + + int32_t max_dim = 0; + const geometry *part = geom; + const geometry *root = part->get_parent(); + + while (true) { + if (!(part->is_empty() && ignore_empty)) { + switch (part->get_type()) { + case geometry_type::POINT: + case geometry_type::MULTI_POINT: + max_dim = std::max(max_dim, 0); + break; + case geometry_type::LINESTRING: + case geometry_type::MULTI_LINESTRING: + max_dim = std::max(max_dim, 1); + break; + case geometry_type::POLYGON: + case geometry_type::MULTI_POLYGON: + max_dim = std::max(max_dim, 2); + break; + case geometry_type::MULTI_GEOMETRY: + if (!part->is_empty()) { + part = part->get_first_part(); + continue; + } + break; + default: + SGL_ASSERT(false); + return 0; + } + } + + while (true) { + const auto parent = part->get_parent(); + if (parent == root) { + return max_dim; + } + + if (part != parent->get_last_part()) { + part = part->get_next(); + break; + } + + part = parent; + } + } +} + +inline void visit_by_dimension(const geometry *geom, int surface_dimension, void *state, visit_func func) { + if (!geom) { + return; + } + + const geometry *part = geom; + const geometry *root = part->get_parent(); + + while (true) { + switch (part->get_type()) { + case geometry_type::POINT: + case geometry_type::MULTI_POINT: + if(surface_dimension == 0) { + func(state, part); + } + break; + case geometry_type::LINESTRING: + case geometry_type::MULTI_LINESTRING: + if(surface_dimension == 1) { + func(state, part); + } + break; + case geometry_type::POLYGON: + case geometry_type::MULTI_POLYGON: + if(surface_dimension == 2) { + func(state, part); + } + break; + case geometry_type::MULTI_GEOMETRY: + if (!part->is_empty()) { + part = part->get_first_part(); + continue; + } + break; + default: + SGL_ASSERT(false); + return; + } + + while (true) { + const auto parent = part->get_parent(); + if (parent == root) { + return; + } + + if (part != parent->get_last_part()) { + part = part->get_next(); + break; + } + + part = parent; + } + } +} + +inline void replace_vertices(allocator *alloc, geometry *geom, void *state, map_vertex_func callback) { + if (!geom) { + return; + } + + geometry *part = geom; + const geometry *root = part->get_parent(); + + while (true) { + switch (part->get_type()) { + case geometry_type::POINT: + case geometry_type::LINESTRING: { + const auto vertex_count = part->get_count(); + if(vertex_count == 0) { + break; + } + const auto vertex_size = part->get_vertex_size(); + const auto old_vertex_data = part->get_vertex_data(); + const auto new_vertex_data = static_cast(alloc->alloc(part->get_count() * vertex_size)); + + vertex_xyzm vertex = {0, 0, 0, 0}; + for(uint32_t v_idx = 0; v_idx < part->get_count(); v_idx++) { + memcpy(&vertex, old_vertex_data + v_idx * vertex_size, vertex_size); + callback(state, &vertex); + memcpy(new_vertex_data + v_idx * vertex_size, &vertex, vertex_size); + } + part->set_vertex_data(new_vertex_data, vertex_count); + } break; + case geometry_type::POLYGON: + case geometry_type::MULTI_POINT: + case geometry_type::MULTI_LINESTRING: + case geometry_type::MULTI_POLYGON: + case geometry_type::MULTI_GEOMETRY: { + if (!part->is_empty()) { + part = part->get_first_part(); + continue; + } + } break; + default: + SGL_ASSERT(false); + return; + } + + while (true) { + const auto parent = part->get_parent(); + if (parent == root) { + // Done! + return; + } + + if (part != parent->get_last_part()) { + part = part->get_next(); + break; + } + + part = parent; + } + } +} + +template +inline void visit_vertices(const geometry *geom, F callback) { + switch (geom->get_type()) { + case geometry_type::POINT: + case geometry_type::LINESTRING: { + auto vertex_data = geom->get_vertex_data(); + if (vertex_data == nullptr) { + return; + } + auto vertex_size = geom->get_vertex_size(); + for (uint32_t i = 0; i < geom->get_count(); i++) { + callback(vertex_data + i * vertex_size); + } + } + return; + case geometry_type::POLYGON: + case geometry_type::MULTI_POINT: + case geometry_type::MULTI_LINESTRING: + case geometry_type::MULTI_POLYGON: + case geometry_type::MULTI_GEOMETRY: { + const auto tail = geom->get_last_part(); + if (!tail) { + return; + } + auto part = tail; + do { + part = part->get_next(); + visit_vertices(part, callback); + } while (part != tail); + } + return; + default: + return; + } +} + +// Non-recursive +inline bool try_get_extent_xy(const geometry *geom, box_xy *out) { + + auto part = geom; + if (part == nullptr) { + return false; + } + + box_xy result = box_xy::smallest(); + + const auto root = part->get_parent(); + bool has_any_vertices = false; + + while (true) { + switch (part->get_type()) { + case geometry_type::POINT: + case geometry_type::LINESTRING: { + const auto vertex_count = part->get_count(); + + has_any_vertices |= vertex_count > 0; + + for (uint32_t i = 0; i < vertex_count; i++) { + const auto vertex = part->get_vertex_xy(i); + result.min.x = std::min(result.min.x, vertex.x); + result.min.y = std::min(result.min.y, vertex.y); + result.max.x = std::max(result.max.x, vertex.x); + result.max.y = std::max(result.max.y, vertex.y); + } + } break; + case geometry_type::POLYGON: { + if (!part->is_empty()) { + const auto shell = part->get_first_part(); + const auto vertex_count = shell->get_count(); + has_any_vertices |= vertex_count > 0; + for (uint32_t i = 0; i < vertex_count; i++) { + const auto vertex = shell->get_vertex_xy(i); + result.min.x = std::min(result.min.x, vertex.x); + result.min.y = std::min(result.min.y, vertex.y); + result.max.x = std::max(result.max.x, vertex.x); + result.max.y = std::max(result.max.y, vertex.y); + } + } + } break; + case geometry_type::MULTI_POINT: + case geometry_type::MULTI_LINESTRING: + case geometry_type::MULTI_POLYGON: + case geometry_type::MULTI_GEOMETRY: { + if (!part->is_empty()) { + part = part->get_first_part(); + // continue the outer loop here! + continue; + } + } break; + default: + SGL_ASSERT(false); + return false; + } + + // Now go up/sideways + while (true) { + const auto parent = part->get_parent(); + if (parent == root) { + if (has_any_vertices) { + *out = result; + return true; + } + return false; + } + + if (part != parent->get_last_part()) { + // Go sideways + part = part->get_next(); + break; + } + + // Go up + part = parent; + } + } +} + +// Result for visit action +enum SGL_VISIT_RESULT { + // Continue the traversal, recurse down into child parts + SGL_VISIT_CONT = 0, + // Stop the traversal immediately + SGL_VISIT_EXIT = 1, + // Continue the traversal, but skip the current part and dont recurse down + SGL_VISIT_SKIP = 2, +}; + +struct visit_callbacks { + // Return false to stop the traversal + SGL_VISIT_RESULT (*on_enter_part)(void *state, const geometry *part, const geometry *parent) = nullptr; + SGL_VISIT_RESULT (*on_leave_part)(void *state, const geometry *part, const geometry *parent) = nullptr; +}; + +inline void visit(const geometry *geom, const geometry *root, const visit_callbacks *visitor, void *state) { + +#define HANDLE_ENTER_PART(PART, PARENT) \ + if (visitor->on_enter_part) { \ + SGL_VISIT_RESULT res = visitor->on_enter_part(state, PART, PARENT); \ + if (res == SGL_VISIT_EXIT) { \ + return; \ + } else if (res == SGL_VISIT_SKIP) { \ + break; \ + } \ + } +#define HANDLE_LEAVE_PART(PART, PARENT) \ + if (visitor->on_leave_part) { \ + SGL_VISIT_RESULT res = visitor->on_leave_part(state, PART, PARENT); \ + if (res == SGL_VISIT_EXIT) { \ + return; \ + } else if (res == SGL_VISIT_SKIP) { \ + break; \ + } \ + } +#define HANDLE_ENTER_CHILD_PART(PART, PARENT) \ + if (visitor->on_enter_part) { \ + SGL_VISIT_RESULT res = visitor->on_enter_part(state, PART, PARENT); \ + if (res == SGL_VISIT_EXIT) { \ + return; \ + } else if (res == SGL_VISIT_SKIP) { \ + continue; \ + } \ + } +#define HANDLE_LEAVE_CHILD_PART(PART, PARENT) \ + if (visitor->on_leave_part) { \ + SGL_VISIT_RESULT res = visitor->on_leave_part(state, PART, PARENT); \ + if (res == SGL_VISIT_EXIT) { \ + return; \ + } else if (res == SGL_VISIT_SKIP) { \ + continue; \ + } \ + } + + auto part = geom; + if (part == nullptr) { + return; + } + + auto parent = part->get_parent(); + + while (true) { + switch (part->get_type()) { + case geometry_type::POINT: + case geometry_type::LINESTRING: { + HANDLE_ENTER_PART(part, parent); + HANDLE_LEAVE_PART(part, parent); + } break; + case geometry_type::POLYGON: { + + HANDLE_ENTER_PART(part, parent); + + const auto tail = part->get_last_part(); + if (tail != nullptr) { + auto head = tail; + do { + SGL_ASSERT(head != nullptr); + SGL_ASSERT(head->get_type() == geometry_type::LINESTRING); + + head = head->get_next(); + + HANDLE_ENTER_CHILD_PART(head, part); + HANDLE_LEAVE_CHILD_PART(head, part); + + } while (head != tail); + } + + HANDLE_LEAVE_PART(part, parent); + } break; + case geometry_type::MULTI_POINT: { + HANDLE_ENTER_PART(part, parent); + + const auto tail = part->get_last_part(); + if (tail != nullptr) { + auto head = tail; + do { + SGL_ASSERT(head != nullptr); + SGL_ASSERT(head->get_type() == geometry_type::POINT); + + head = head->get_next(); + + HANDLE_ENTER_CHILD_PART(head, part); + HANDLE_LEAVE_CHILD_PART(head, part); + + } while (head != tail); + } + HANDLE_LEAVE_PART(part, parent); + + } break; + case geometry_type::MULTI_LINESTRING: { + HANDLE_ENTER_PART(part, parent); + + const auto tail = part->get_last_part(); + if (tail != nullptr) { + auto head = tail; + do { + SGL_ASSERT(head != nullptr); + SGL_ASSERT(head->get_type() == geometry_type::LINESTRING); + + head = head->get_next(); + + HANDLE_ENTER_CHILD_PART(head, part); + HANDLE_LEAVE_CHILD_PART(head, part); + + } while (head != tail); + } + + HANDLE_LEAVE_PART(part, parent); + } break; + case geometry_type::MULTI_POLYGON: { + HANDLE_ENTER_PART(part, parent); + + const auto tail = part->get_last_part(); + if (tail != nullptr) { + auto head = tail; + do { + SGL_ASSERT(head != nullptr); + SGL_ASSERT(head->get_type() == geometry_type::POLYGON); + + head = head->get_next(); + + HANDLE_ENTER_CHILD_PART(head, part); + + const auto ring_tail = head->get_last_part(); + if (ring_tail != nullptr) { + auto ring_head = ring_tail; + do { + SGL_ASSERT(ring_head != nullptr); + SGL_ASSERT(ring_head->get_type() == geometry_type::LINESTRING); + + ring_head = ring_head->get_next(); + + HANDLE_ENTER_CHILD_PART(ring_head, head); + HANDLE_LEAVE_CHILD_PART(ring_head, head); + + } while (ring_head != ring_tail); + } + + HANDLE_LEAVE_CHILD_PART(head, part); + + } while (head != part->get_last_part()); + } + + HANDLE_LEAVE_PART(part, parent); + } break; + case geometry_type::MULTI_GEOMETRY: { + HANDLE_ENTER_PART(part, parent); + if (!part->is_empty()) { + // Recurse down + part = part->get_first_part(); + continue; + } + // Otherwise, end the multi geometry + HANDLE_LEAVE_PART(part, parent); + } break; + default: { + // Unknown type! + SGL_ASSERT(false); + return; + } + } + + // Now go up/sideways + while (true) { + + if (parent == root) { + return; + } + + if (part != parent->get_last_part()) { + // We should only get here if we are in a multi geometry + SGL_ASSERT(parent->get_type() == geometry_type::MULTI_GEOMETRY); + + // Go sideways + part = part->get_next(); + break; + } + + // Go up + part = parent; + parent = part->get_parent(); + + SGL_ASSERT(part->get_type() == geometry_type::MULTI_GEOMETRY); + + // We just visited the last child of a multi geometry. We should end it. + // we use LEAVE_CHILD_PART here to trigger a continue in case we skip. + HANDLE_LEAVE_CHILD_PART(part, parent); + } + } + +#undef HANDLE_ENTER_PART +#undef HANDLE_LEAVE_PART +#undef HANDLE_ENTER_CHILD_PART +#undef HANDLE_LEAVE_CHILD_PART +} + +} // namespace ops + +} // namespace sgl + +namespace sgl { +namespace multi_geometry { + +inline double area(const geometry *geom) { + SGL_ASSERT(geom->get_type() == geometry_type::MULTI_GEOMETRY); + + const auto tail = geom->get_last_part(); + if (!tail) { + return 0.0; + } + + double area = 0.0; + auto part = tail; + do { + part = part->get_next(); + area += ops::area(part); + } while (part != tail); + + return area; +} + +inline double length(const geometry *geom) { + SGL_ASSERT(geom->get_type() == geometry_type::MULTI_GEOMETRY); + + const auto tail = geom->get_last_part(); + if (!tail) { + return 0.0; + } + double length = 0.0; + auto part = tail; + do { + part = part->get_next(); + length += ops::length(part); + } while (part != tail); + return length; +} + +inline double perimeter(const geometry *geom) { + SGL_ASSERT(geom->get_type() == geometry_type::MULTI_GEOMETRY); + + const auto tail = geom->get_last_part(); + if (!tail) { + return 0.0; + } + double perimeter = 0.0; + auto part = tail; + do { + part = part->get_next(); + perimeter += ops::perimeter(part); + } while (part != tail); + return perimeter; +} + +} // namespace multi_geometry + +namespace util { + +inline double haversine_distance(const double lat1_p, const double lon1_p, const double lat2_p, const double lon2_p) { + // Radius of the earth in km + constexpr auto R = 6371000.0; + constexpr auto PI = 3.14159265358979323846; + + // Convert to radians + const auto lat1 = lat1_p * PI / 180.0; + const auto lon1 = lon1_p * PI / 180.0; + const auto lat2 = lat2_p * PI / 180.0; + const auto lon2 = lon2_p * PI / 180.0; + + const auto dlat = lat2 - lat1; + const auto dlon = lon2 - lon1; + + const auto a = + std::pow(std::sin(dlat / 2.0), 2.0) + std::cos(lat1) * std::cos(lat2) * std::pow(std::sin(dlon / 2.0), 2.0); + const auto c = 2.0 * std::atan2(std::sqrt(a), std::sqrt(1.0 - a)); + + return R * c; +} + +} // namespace util + +} // namespace sgl diff --git a/src/spatial/CMakeLists.txt b/src/spatial/CMakeLists.txt new file mode 100644 index 00000000..fbf39102 --- /dev/null +++ b/src/spatial/CMakeLists.txt @@ -0,0 +1,12 @@ +add_subdirectory(util) +add_subdirectory(modules) +add_subdirectory(geometry) +add_subdirectory(index) + +set(EXTENSION_SOURCES + ${EXTENSION_SOURCES} + ${CMAKE_CURRENT_SOURCE_DIR}/spatial_extension.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/spatial_types.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/spatial_optimizers.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/spatial_geoarrow.cpp +PARENT_SCOPE) \ No newline at end of file diff --git a/spatial/src/spatial/core/geometry/CMakeLists.txt b/src/spatial/geometry/CMakeLists.txt similarity index 56% rename from spatial/src/spatial/core/geometry/CMakeLists.txt rename to src/spatial/geometry/CMakeLists.txt index 0367fdb7..9c582ff3 100644 --- a/spatial/src/spatial/core/geometry/CMakeLists.txt +++ b/src/spatial/geometry/CMakeLists.txt @@ -1,10 +1,6 @@ set(EXTENSION_SOURCES ${EXTENSION_SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/geometry.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/geometry_serialization.cpp ${CMAKE_CURRENT_SOURCE_DIR}/geometry_processor.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/wkb_reader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wkb_writer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/wkt_reader.cpp - PARENT_SCOPE -) \ No newline at end of file + ${CMAKE_CURRENT_SOURCE_DIR}/geometry_serialization.cpp + PARENT_SCOPE) \ No newline at end of file diff --git a/spatial/include/spatial/core/geometry/bbox.hpp b/src/spatial/geometry/bbox.hpp similarity index 93% rename from spatial/include/spatial/core/geometry/bbox.hpp rename to src/spatial/geometry/bbox.hpp index cd1393f3..b919e1e9 100644 --- a/spatial/include/spatial/core/geometry/bbox.hpp +++ b/src/spatial/geometry/bbox.hpp @@ -1,11 +1,9 @@ #pragma once -#include "spatial/common.hpp" -#include "spatial/core/geometry/vertex.hpp" +#include "spatial/geometry/vertex.hpp" +#include "duckdb/common/limits.hpp" -namespace spatial { - -namespace core { +namespace duckdb { template struct Box { @@ -93,6 +91,4 @@ struct Box { template using Box2D = Box>; -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/geometry/geometry_processor.cpp b/src/spatial/geometry/geometry_processor.cpp new file mode 100644 index 00000000..d663b8b1 --- /dev/null +++ b/src/spatial/geometry/geometry_processor.cpp @@ -0,0 +1,7 @@ +#include "spatial/geometry/geometry_processor.hpp" + +namespace duckdb { + +constexpr double VertexData::EMPTY_DATA; + +} // namespace duckdb diff --git a/spatial/include/spatial/core/geometry/geometry_processor.hpp b/src/spatial/geometry/geometry_processor.hpp similarity index 97% rename from spatial/include/spatial/core/geometry/geometry_processor.hpp rename to src/spatial/geometry/geometry_processor.hpp index c70b763c..a807a019 100644 --- a/spatial/include/spatial/core/geometry/geometry_processor.hpp +++ b/src/spatial/geometry/geometry_processor.hpp @@ -1,13 +1,9 @@ #pragma once -#include "spatial/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/util/cursor.hpp" -#include "spatial/core/geometry/geometry_type.hpp" +#include "spatial/util/cursor.hpp" +#include "spatial/geometry/geometry_type.hpp" -namespace spatial { - -namespace core { +namespace duckdb { //------------------------------------------------------------------------ // GeometryProcessor @@ -338,6 +334,4 @@ class GeometryProcessor { } }; -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/spatial/include/spatial/core/geometry/geometry_properties.hpp b/src/spatial/geometry/geometry_properties.hpp similarity index 93% rename from spatial/include/spatial/core/geometry/geometry_properties.hpp rename to src/spatial/geometry/geometry_properties.hpp index a3d042c9..971b4f1d 100644 --- a/spatial/include/spatial/core/geometry/geometry_properties.hpp +++ b/src/spatial/geometry/geometry_properties.hpp @@ -1,9 +1,6 @@ #pragma once -#include "spatial/common.hpp" -namespace spatial { - -namespace core { +namespace duckdb { static constexpr const uint8_t GEOMETRY_VERSION = 0; @@ -62,6 +59,4 @@ struct GeometryProperties { } }; -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/geometry/geometry_serialization.cpp b/src/spatial/geometry/geometry_serialization.cpp new file mode 100644 index 00000000..2ed132b1 --- /dev/null +++ b/src/spatial/geometry/geometry_serialization.cpp @@ -0,0 +1,331 @@ +#include "spatial/geometry/geometry_serialization.hpp" +#include "spatial/util/binary_reader.hpp" +#include "spatial/util/binary_writer.hpp" +#include "spatial/util/math.hpp" +#include "spatial/geometry/sgl.hpp" + +#include "duckdb/common/exception.hpp" +#include "duckdb/storage/arena_allocator.hpp" + +namespace duckdb { + +// TODO: Make non-recursive + +static size_t GetRequiredSizeInternal(const sgl::geometry *geom) { + const auto vertex_size = geom->get_vertex_size(); + const auto part_count = geom->get_count(); + + switch (geom->get_type()) { + case sgl::geometry_type::POINT: + case sgl::geometry_type::LINESTRING: + // 4 bytes for the type + // 4 bytes for the length + // sizeof(vertex) * count; + return 4 + 4 + part_count * vertex_size; + case sgl::geometry_type::POLYGON: { + // Polygons are special because they may pad between the rings and the ring data + // 4 bytes for the type + // 4 bytes for the length + // sizeof(vertex) * count; + size_t size = 4 + 4; + + const auto tail = geom->get_last_part(); + if (!tail) { + return size; + } + auto part = tail; + do { + part = part->get_next(); + size += 4 + part->get_count() * vertex_size; + } while (part != tail); + + if (part_count % 2 == 1) { + size += 4; + } + return size; + } + case sgl::geometry_type::MULTI_POINT: + case sgl::geometry_type::MULTI_LINESTRING: + case sgl::geometry_type::MULTI_POLYGON: + case sgl::geometry_type::MULTI_GEOMETRY: { + // 4 bytes for the type + // 4 bytes for the length + // recursive call for each part + size_t size = 4 + 4; + const auto tail = geom->get_last_part(); + if (!tail) { + return size; + } + auto part = tail; + do { + part = part->get_next(); + size += GetRequiredSizeInternal(part); + } while (part != tail); + return size; + } + default: + D_ASSERT(false); + return 0; + } +} + +size_t Serde::GetRequiredSize(const sgl::geometry &geom) { + const auto type = geom.get_type(); + + const auto has_bbox = type != sgl::geometry_type::POINT && !geom.is_empty(); + const auto has_z = geom.has_z(); + const auto has_m = geom.has_m(); + + const auto dims = 2 + (has_z ? 1 : 0) + (has_m ? 1 : 0); + + const auto head_size = 4 + 4; // type + props + padding + const auto geom_size = GetRequiredSizeInternal(&geom); + const auto bbox_size = has_bbox ? dims * sizeof(float) * 2 : 0; + + const auto full_size = head_size + geom_size + bbox_size; + + // Check that the size is a multiple of 8 + D_ASSERT(full_size % 8 == 0); + + return full_size; +} + +static void SerializeVertices(BinaryWriter &cursor, const sgl::geometry *geom, const uint32_t count, const bool has_z, + const bool has_m, const bool has_bbox, const uint32_t vsize, sgl::box_xyzm &bbox) { + + const auto verts = geom->get_vertex_data(); + + // Copy the vertices to the cursor + const auto dst = cursor.Reserve(count * vsize); + + if (!has_bbox) { + // Fast path, issue on memcpy to the cursor + memcpy(dst, verts, count * vsize); + return; + } + + sgl::vertex_xyzm vertex = {0}; + for (uint32_t i = 0; i < count; i++) { + + // Load the vertex from the geometry + memcpy(&vertex, verts + i * vsize, vsize); + + // Copy the vertex to the cursor + memcpy(dst + i * vsize, &vertex, vsize); + + bbox.min.x = std::min(bbox.min.x, vertex.x); + bbox.min.y = std::min(bbox.min.y, vertex.y); + bbox.max.x = std::max(bbox.max.x, vertex.x); + bbox.max.y = std::max(bbox.max.y, vertex.y); + + if (has_z) { + bbox.min.zm = std::min(bbox.min.zm, vertex.zm); + bbox.max.zm = std::max(bbox.max.zm, vertex.zm); + } + if (has_m) { + bbox.min.m = std::min(bbox.min.m, vertex.m); + bbox.max.m = std::max(bbox.max.m, vertex.m); + } + } +} + +static void SerializeRecursive(BinaryWriter &cursor, const sgl::geometry *geom, const bool has_z, const bool has_m, + const bool has_bbox, const uint32_t vsize, sgl::box_xyzm &bbox) { + const auto type = geom->get_type(); + const auto count = geom->get_count(); + + if (type < sgl::geometry_type::POINT || type > sgl::geometry_type::MULTI_GEOMETRY) { + throw InvalidInputException("Cannot serialize geometry of type %d", static_cast(type)); + } + + // The GeometryType enum used to start with POINT = 0 + // but now it starts with INVALID = 0, so we need to subtract 1 + cursor.Write(static_cast(type) - 1); + cursor.Write(count); + + switch (type) { + case sgl::geometry_type::POINT: + case sgl::geometry_type::LINESTRING: + SerializeVertices(cursor, geom, count, has_z, has_m, has_bbox, vsize, bbox); + break; + case sgl::geometry_type::POLYGON: { + auto ring_cursor = cursor; + cursor.Skip((count * 4) + (count % 2 == 1 ? 4 : 0), true); + + const auto tail = geom->get_last_part(); + if (!tail) { + break; + } + + auto ring = tail; + do { + ring = ring->get_next(); + ring_cursor.Write(ring->get_count()); + SerializeVertices(cursor, ring, ring->get_count(), has_z, has_m, has_bbox, vsize, bbox); + } while (ring != tail); + + } break; + case sgl::geometry_type::MULTI_POINT: + case sgl::geometry_type::MULTI_LINESTRING: + case sgl::geometry_type::MULTI_POLYGON: + case sgl::geometry_type::MULTI_GEOMETRY: { + const auto tail = geom->get_last_part(); + if (!tail) { + break; + } + + auto part = tail; + do { + part = part->get_next(); + SerializeRecursive(cursor, part, has_z, has_m, has_bbox, vsize, bbox); + } while (part != tail); + } break; + default: + D_ASSERT(false); + } +} + +void Serde::Serialize(const sgl::geometry &geom, char *buffer, size_t buffer_size) { + const auto type = geom.get_type(); + + const auto has_bbox = type != sgl::geometry_type::POINT && !geom.is_empty(); + const auto has_z = geom.has_z(); + const auto has_m = geom.has_m(); + + // Set flags + uint8_t flags = 0; + flags |= has_z ? 0x01 : 0; + flags |= has_m ? 0x02 : 0; + flags |= has_bbox ? 0x04 : 0; + + BinaryWriter cursor(buffer, buffer_size); + + if (type == sgl::geometry_type::INVALID) { + throw InvalidInputException("Cannot serialize geometry of type INVALID"); + } + + // The GeometryType enum used to start with POINT = 0 + // but now it starts with INVALID = 0, so we need to subtract 1 + cursor.Write(static_cast(type) - 1); + cursor.Write(flags); + cursor.Write(0); // unused for now + cursor.Write(0); // padding + + const auto dims = 2 + (has_z ? 1 : 0) + (has_m ? 1 : 0); + const auto vert_size = dims * sizeof(double); + const auto bbox_size = has_bbox ? dims * sizeof(float) * 2 : 0; + + // Setup a bbox to store the min/max values + sgl::box_xyzm bbox = sgl::box_xyzm::smallest(); + + auto bbox_cursor = cursor; + cursor.Skip(bbox_size, true); + + SerializeRecursive(cursor, &geom, has_z, has_m, has_bbox, vert_size, bbox); + + if (has_bbox) { + bbox_cursor.Write(MathUtil::DoubleToFloatDown(bbox.min.x)); // xmin + bbox_cursor.Write(MathUtil::DoubleToFloatDown(bbox.min.y)); // ymin + bbox_cursor.Write(MathUtil::DoubleToFloatUp(bbox.max.x)); // xmax + bbox_cursor.Write(MathUtil::DoubleToFloatUp(bbox.max.y)); // ymax + + if (has_z) { + bbox_cursor.Write(MathUtil::DoubleToFloatDown(bbox.min.zm)); // zmin + bbox_cursor.Write(MathUtil::DoubleToFloatUp(bbox.max.zm)); // zmax + } + + if (has_m) { + bbox_cursor.Write(MathUtil::DoubleToFloatDown(bbox.min.m)); // mmin + bbox_cursor.Write(MathUtil::DoubleToFloatUp(bbox.max.m)); // mmax + } + } +} + +static void DeserializeRecursive(BinaryReader &cursor, sgl::geometry &geom, const bool has_z, const bool has_m, + ArenaAllocator &arena) { + const auto count = cursor.Read(); + switch (geom.get_type()) { + case sgl::geometry_type::POINT: + case sgl::geometry_type::LINESTRING: { + const auto verts = cursor.Reserve(count * geom.get_vertex_size()); + geom.set_vertex_data(verts, count); + } break; + case sgl::geometry_type::POLYGON: { + auto ring_cursor = cursor; + cursor.Skip((count * 4) + (count % 2 == 1 ? 4 : 0)); + for (uint32_t i = 0; i < count; i++) { + const auto ring_count = ring_cursor.Read(); + const auto verts = cursor.Reserve(ring_count * geom.get_vertex_size()); + + auto ring_mem = arena.AllocateAligned(sizeof(sgl::geometry)); + const auto ring = new (ring_mem) sgl::geometry(sgl::geometry_type::LINESTRING); + + ring->set_z(has_z); + ring->set_m(has_m); + ring->set_vertex_data(verts, ring_count); + + geom.append_part(ring); + } + } break; + case sgl::geometry_type::MULTI_POINT: + case sgl::geometry_type::MULTI_LINESTRING: + case sgl::geometry_type::MULTI_POLYGON: + case sgl::geometry_type::MULTI_GEOMETRY: { + for (uint32_t i = 0; i < count; i++) { + const auto part_type = static_cast(cursor.Read() + 1); + auto part_mem = arena.AllocateAligned(sizeof(sgl::geometry)); + const auto part = new (part_mem) sgl::geometry(part_type); + part->set_z(has_z); + part->set_m(has_m); + DeserializeRecursive(cursor, *part, has_z, has_m, arena); + + geom.append_part(part); + } + } break; + default: + break; + } +} + +void Serde::Deserialize(sgl::geometry &result, ArenaAllocator &arena, const char *buffer, size_t buffer_size) { + + BinaryReader cursor(buffer, buffer_size); + + const auto type = static_cast(cursor.Read() + 1); + const auto flags = cursor.Read(); + cursor.Skip(sizeof(uint16_t)); + cursor.Skip(sizeof(uint32_t)); // padding + + // Parse flags + const auto has_z = (flags & 0x01) != 0; + const auto has_m = (flags & 0x02) != 0; + const auto has_bbox = (flags & 0x04) != 0; + + const auto format_v1 = (flags & 0x40) != 0; + const auto format_v0 = (flags & 0x80) != 0; + + if (format_v1 || format_v0) { + // Unsupported version, throw an error + throw NotImplementedException( + "This geometry seems to be written with a newer version of the DuckDB spatial library that is not " + "compatible with this version. Please upgrade your DuckDB installation."); + } + + if (has_bbox) { + // Skip past bbox if present + cursor.Skip(sizeof(float) * 2 * (2 + has_z + has_m)); + } + + // Create root geometry + result.set_type(type); + result.set_z(has_z); + result.set_m(has_m); + + // Read the first type + cursor.Read(); + + // Deserialize the geometry + DeserializeRecursive(cursor, result, has_z, has_m, arena); +} + +} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/geometry/geometry_serialization.hpp b/src/spatial/geometry/geometry_serialization.hpp new file mode 100644 index 00000000..4fb45032 --- /dev/null +++ b/src/spatial/geometry/geometry_serialization.hpp @@ -0,0 +1,20 @@ +#pragma once + +#include + +namespace sgl { +class geometry; +} + +namespace duckdb { + +class ArenaAllocator; + +// todo: +struct Serde { + static size_t GetRequiredSize(const sgl::geometry &geom); + static void Serialize(const sgl::geometry &geom, char *buffer, size_t buffer_size); + static void Deserialize(sgl::geometry &result, ArenaAllocator &arena, const char *buffer, size_t buffer_size); +}; + +} // namespace duckdb \ No newline at end of file diff --git a/spatial/include/spatial/core/geometry/geometry_type.hpp b/src/spatial/geometry/geometry_type.hpp similarity index 93% rename from spatial/include/spatial/core/geometry/geometry_type.hpp rename to src/spatial/geometry/geometry_type.hpp index 377ccd32..20c0c309 100644 --- a/spatial/include/spatial/core/geometry/geometry_type.hpp +++ b/src/spatial/geometry/geometry_type.hpp @@ -1,12 +1,13 @@ #pragma once -#include "spatial/common.hpp" -#include "spatial/core/geometry/bbox.hpp" -#include "spatial/core/geometry/geometry_properties.hpp" -#include "spatial/core/util/cursor.hpp" -namespace spatial { +#include "spatial/geometry/bbox.hpp" +#include "spatial/geometry/geometry_properties.hpp" +#include "spatial/util/cursor.hpp" -namespace core { +#include "duckdb/common/types/string_type.hpp" +#include "duckdb/common/string_util.hpp" + +namespace duckdb { enum class GeometryType : uint8_t { POINT = 0, @@ -148,6 +149,4 @@ class geometry_t { static_assert(sizeof(geometry_t) == sizeof(string_t), "geometry_t should be the same size as string_t"); -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/geometry/sgl.hpp b/src/spatial/geometry/sgl.hpp new file mode 100644 index 00000000..16f5f8c3 --- /dev/null +++ b/src/spatial/geometry/sgl.hpp @@ -0,0 +1,32 @@ +#pragma once + +// Wrapper around SGL that injects the DuckDB assert macro +#include "duckdb/common/assert.hpp" +#define SGL_ASSERT(x) D_ASSERT(x) +#include "sgl/sgl.hpp" + +#include "duckdb/storage/arena_allocator.hpp" + +namespace duckdb { + +// sgl::allocator that uses a DuckDB ArenaAllocator to allocate memory +class GeometryAllocator final : public sgl::allocator { +public: + explicit GeometryAllocator(ArenaAllocator &arena_p) : arena(arena_p) { + } + + void *alloc(size_t size) override { + return arena.AllocateAligned(size); + } + void dealloc(void *ptr, size_t size) override { + arena.ReallocateAligned(data_ptr_cast(ptr), size, 0); + } + void *realloc(void *ptr, size_t old_size, size_t new_size) override { + return arena.ReallocateAligned(data_ptr_cast(ptr), old_size, new_size); + } + +private: + ArenaAllocator &arena; +}; + +} // namespace duckdb \ No newline at end of file diff --git a/spatial/include/spatial/core/geometry/vertex.hpp b/src/spatial/geometry/vertex.hpp similarity index 97% rename from spatial/include/spatial/core/geometry/vertex.hpp rename to src/spatial/geometry/vertex.hpp index c3e0a325..0f79b71f 100644 --- a/spatial/include/spatial/core/geometry/vertex.hpp +++ b/src/spatial/geometry/vertex.hpp @@ -1,10 +1,10 @@ #pragma once -#include "spatial/common.hpp" +#include +#include "duckdb/common/typedefs.hpp" +#include "duckdb/common/assert.hpp" -namespace spatial { - -namespace core { +namespace duckdb { template struct PointXY { @@ -201,6 +201,4 @@ struct VertexXYZM : public PointXYZM { } }; -} // namespace core - -} // namespace spatial +} // namespace duckdb \ No newline at end of file diff --git a/spatial/src/spatial/core/geometry/wkb_writer.cpp b/src/spatial/geometry/wkb_writer.cpp similarity index 84% rename from spatial/src/spatial/core/geometry/wkb_writer.cpp rename to src/spatial/geometry/wkb_writer.cpp index 0b94baca..58aecc34 100644 --- a/spatial/src/spatial/core/geometry/wkb_writer.cpp +++ b/src/spatial/geometry/wkb_writer.cpp @@ -1,11 +1,11 @@ -#include "spatial/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/geometry/wkb_writer.hpp" -#include "spatial/core/geometry/geometry_processor.hpp" +#include "spatial/geometry/wkb_writer.hpp" +#include "spatial/geometry/geometry_processor.hpp" -namespace spatial { +#include "duckdb/common/types/vector.hpp" -namespace core { +namespace duckdb { + +namespace { //------------------------------------------------------------------------------ // Size Calculator @@ -43,6 +43,8 @@ class WKBSizeCalculator final : GeometryProcessor { } public: + virtual ~WKBSizeCalculator() = default; + uint32_t Execute(const geometry_t &geometry) { return Process(geometry); } @@ -130,6 +132,7 @@ class WKBSerializer final : GeometryProcessor { } public: + virtual ~WKBSerializer() = default; void Execute(const geometry_t &geometry, data_ptr_t start, data_ptr_t end) { Cursor cursor(start, end); Process(geometry, cursor); @@ -141,6 +144,11 @@ class WKBSerializer final : GeometryProcessor { } }; +} // namespace + +//------------------------------------------------------------------------------ +// WKB Writer +//------------------------------------------------------------------------------ string_t WKBWriter::Write(const geometry_t &geometry, Vector &result) { WKBSizeCalculator size_processor; WKBSerializer serializer; @@ -150,6 +158,11 @@ string_t WKBWriter::Write(const geometry_t &geometry, Vector &result) { return blob; } +string_t WKBWriter::Write(const string_t &geometry, Vector &result) { + const geometry_t geom(geometry); + return Write(geom, result); +} + void WKBWriter::Write(const geometry_t &geometry, vector &buffer) { WKBSizeCalculator size_processor; WKBSerializer serializer; @@ -158,6 +171,11 @@ void WKBWriter::Write(const geometry_t &geometry, vector &buffer) { serializer.Execute(geometry, buffer.data(), buffer.data() + size); } +void WKBWriter::Write(const string_t &geometry, vector &buffer) { + const geometry_t geom(geometry); + Write(geom, buffer); +} + const_data_ptr_t WKBWriter::Write(const geometry_t &geometry, uint32_t *size, ArenaAllocator &allocator) { WKBSizeCalculator size_processor; WKBSerializer serializer; @@ -168,6 +186,9 @@ const_data_ptr_t WKBWriter::Write(const geometry_t &geometry, uint32_t *size, Ar return blob; } -} // namespace core +const_data_ptr_t WKBWriter::Write(const string_t &geometry, uint32_t *size, ArenaAllocator &allocator) { + const geometry_t geom(geometry); + return WKBWriter::Write(geom, size, allocator); +} -} // namespace spatial +} // namespace duckdb \ No newline at end of file diff --git a/spatial/include/spatial/core/geometry/wkb_writer.hpp b/src/spatial/geometry/wkb_writer.hpp similarity index 53% rename from spatial/include/spatial/core/geometry/wkb_writer.hpp rename to src/spatial/geometry/wkb_writer.hpp index 18f19ba3..55d64e77 100644 --- a/spatial/include/spatial/core/geometry/wkb_writer.hpp +++ b/src/spatial/geometry/wkb_writer.hpp @@ -1,22 +1,24 @@ #pragma once -#include "spatial/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -namespace spatial { +#include "spatial/geometry/geometry_type.hpp" +#include "duckdb/common/types/string_type.hpp" -namespace core { +namespace duckdb { + +class ArenaAllocator; struct WKBWriter { // Write a geometry to a WKB blob attached to a vector static string_t Write(const geometry_t &geometry, Vector &result); + static string_t Write(const string_t &geometry, Vector &result); // Write a geometry to a WKB blob into a buffer static void Write(const geometry_t &geometry, vector &buffer); + static void Write(const string_t &geometry, vector &buffer); // Write a geometry to a WKB blob into an arena allocator static const_data_ptr_t Write(const geometry_t &geometry, uint32_t *size, ArenaAllocator &allocator); + static const_data_ptr_t Write(const string_t &geometry, uint32_t *size, ArenaAllocator &allocator); }; -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/spatial/src/spatial/core/index/CMakeLists.txt b/src/spatial/index/CMakeLists.txt similarity index 100% rename from spatial/src/spatial/core/index/CMakeLists.txt rename to src/spatial/index/CMakeLists.txt diff --git a/spatial/src/spatial/core/index/rtree/CMakeLists.txt b/src/spatial/index/rtree/CMakeLists.txt similarity index 100% rename from spatial/src/spatial/core/index/rtree/CMakeLists.txt rename to src/spatial/index/rtree/CMakeLists.txt diff --git a/spatial/src/spatial/core/index/rtree/rtree.cpp b/src/spatial/index/rtree/rtree.cpp similarity index 99% rename from spatial/src/spatial/core/index/rtree/rtree.cpp rename to src/spatial/index/rtree/rtree.cpp index 39ae02e5..0b1e9b69 100644 --- a/spatial/src/spatial/core/index/rtree/rtree.cpp +++ b/src/spatial/index/rtree/rtree.cpp @@ -1,9 +1,7 @@ -#include "spatial/core/index/rtree/rtree.hpp" +#include "spatial/index/rtree/rtree.hpp" #include "duckdb/common/printer.hpp" -namespace spatial { - -namespace core { +namespace duckdb { struct InsertResult { // Whether or not the node was split @@ -657,6 +655,4 @@ void RTree::Print() const { Printer::Print(ToString()); } -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/spatial/include/spatial/core/index/rtree/rtree.hpp b/src/spatial/index/rtree/rtree.hpp similarity index 95% rename from spatial/include/spatial/core/index/rtree/rtree.hpp rename to src/spatial/index/rtree/rtree.hpp index fa8b69e1..e35f3031 100644 --- a/spatial/include/spatial/core/index/rtree/rtree.hpp +++ b/src/spatial/index/rtree/rtree.hpp @@ -1,12 +1,11 @@ #pragma once -#include "spatial/core/index/rtree/rtree_node.hpp" +#include "spatial/index/rtree/rtree_node.hpp" + #include "duckdb/execution/index/fixed_size_allocator.hpp" #include "duckdb/storage/block_manager.hpp" -namespace spatial { - -namespace core { +namespace duckdb { struct InsertResult; struct DeleteResult; @@ -116,6 +115,4 @@ struct RTree { const RTreeConfig config; }; -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/spatial/src/spatial/core/index/rtree/rtree_index.cpp b/src/spatial/index/rtree/rtree_index.cpp similarity index 96% rename from spatial/src/spatial/core/index/rtree/rtree_index.cpp rename to src/spatial/index/rtree/rtree_index.cpp index b1893d77..bd50e20d 100644 --- a/spatial/src/spatial/core/index/rtree/rtree_index.cpp +++ b/src/spatial/index/rtree/rtree_index.cpp @@ -1,18 +1,18 @@ -#include "spatial/core/index/rtree/rtree_index.hpp" -#include "spatial/core/index/rtree/rtree_scanner.hpp" +#include "spatial/index/rtree/rtree_index.hpp" #include "duckdb/common/serializer/binary_deserializer.hpp" #include "duckdb/common/serializer/binary_serializer.hpp" #include "duckdb/execution/index/fixed_size_allocator.hpp" #include "duckdb/storage/table/scan_state.hpp" -#include "spatial/core/geometry/geometry_type.hpp" -#include "spatial/core/index/rtree/rtree_module.hpp" -#include "spatial/core/index/rtree/rtree_node.hpp" -#include "spatial/core/util/math.hpp" +#include "duckdb/main/database.hpp" -namespace spatial { +#include "spatial/geometry/geometry_type.hpp" +#include "spatial/index/rtree/rtree_module.hpp" +#include "spatial/index/rtree/rtree_node.hpp" +#include "spatial/index/rtree/rtree_scanner.hpp" +#include "spatial/util/math.hpp" -namespace core { +namespace duckdb { //------------------------------------------------------------------------------ // RTree Index Scan State @@ -305,6 +305,4 @@ void RTreeModule::RegisterIndex(DatabaseInstance &db) { db.config.GetIndexTypes().RegisterIndexType(index_type); } -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/spatial/include/spatial/core/index/rtree/rtree_index.hpp b/src/spatial/index/rtree/rtree_index.hpp similarity index 92% rename from spatial/include/spatial/core/index/rtree/rtree_index.hpp rename to src/spatial/index/rtree/rtree_index.hpp index 643c433b..34626139 100644 --- a/spatial/include/spatial/core/index/rtree/rtree_index.hpp +++ b/src/spatial/index/rtree/rtree_index.hpp @@ -1,20 +1,16 @@ #pragma once +#include "spatial/geometry/bbox.hpp" +#include "spatial/index/rtree/rtree_node.hpp" +#include "spatial/index/rtree/rtree.hpp" + #include "duckdb/execution/index/bound_index.hpp" #include "duckdb/execution/index/fixed_size_allocator.hpp" #include "duckdb/execution/index/index_pointer.hpp" -#include "spatial/common.hpp" -#include "spatial/core/geometry/bbox.hpp" -#include "spatial/core/index/rtree/rtree_node.hpp" -#include "spatial/core/index/rtree/rtree.hpp" namespace duckdb { -class PhysicalOperator; -} - -namespace spatial { -namespace core { +class PhysicalOperator; class RTreeIndex final : public BoundIndex { public: @@ -73,6 +69,4 @@ class RTreeIndex final : public BoundIndex { } }; -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/spatial/src/spatial/core/index/rtree/rtree_index_create_logical.cpp b/src/spatial/index/rtree/rtree_index_create_logical.cpp similarity index 97% rename from spatial/src/spatial/core/index/rtree/rtree_index_create_logical.cpp rename to src/spatial/index/rtree/rtree_index_create_logical.cpp index af1e10f6..9c891e36 100644 --- a/spatial/src/spatial/core/index/rtree/rtree_index_create_logical.cpp +++ b/src/spatial/index/rtree/rtree_index_create_logical.cpp @@ -1,4 +1,7 @@ -#include "spatial/core/index/rtree/rtree_index_create_logical.hpp" +#include "spatial/index/rtree/rtree_index_create_logical.hpp" +#include "spatial/index/rtree/rtree_index.hpp" +#include "spatial/index/rtree/rtree_index_create_physical.hpp" +#include "spatial/spatial_types.hpp" #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" #include "duckdb/execution/column_binding_resolver.hpp" @@ -12,13 +15,7 @@ #include "duckdb/planner/operator/logical_create_index.hpp" #include "duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp" -#include "spatial/core/index/rtree/rtree_index.hpp" -#include "spatial/core/index/rtree/rtree_index_create_physical.hpp" -#include "spatial/core/types.hpp" - -namespace spatial { - -namespace core { +namespace duckdb { LogicalCreateRTreeIndex::LogicalCreateRTreeIndex(unique_ptr info_p, vector> expressions_p, @@ -284,6 +281,4 @@ unique_ptr LogicalCreateRTreeIndex::CreatePlan(ClientContext & return std::move(physical_create_index); } -} // namespace core - -} // namespace spatial +} // namespace duckdb diff --git a/spatial/include/spatial/core/index/rtree/rtree_index_create_logical.hpp b/src/spatial/index/rtree/rtree_index_create_logical.hpp similarity index 95% rename from spatial/include/spatial/core/index/rtree/rtree_index_create_logical.hpp rename to src/spatial/index/rtree/rtree_index_create_logical.hpp index 0a63ae85..e9600693 100644 --- a/spatial/include/spatial/core/index/rtree/rtree_index_create_logical.hpp +++ b/src/spatial/index/rtree/rtree_index_create_logical.hpp @@ -1,12 +1,11 @@ #pragma once -#include "duckdb/planner/operator/logical_extension_operator.hpp" -#include "duckdb/parser/parsed_data/create_index_info.hpp" #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" +#include "duckdb/parser/parsed_data/create_index_info.hpp" +#include "duckdb/planner/operator/logical_extension_operator.hpp" +#include "duckdb/common/serializer/serializer.hpp" +#include "duckdb/common/serializer/deserializer.hpp" -#include "spatial/common.hpp" -namespace spatial { - -namespace core { +namespace duckdb { class LogicalCreateRTreeIndex final : public LogicalExtensionOperator { public: @@ -81,6 +80,4 @@ class LogicalCreateRTreeIndexOperatorExtension final : public OperatorExtension } }; -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/spatial/src/spatial/core/index/rtree/rtree_index_create_physical.cpp b/src/spatial/index/rtree/rtree_index_create_physical.cpp similarity index 97% rename from spatial/src/spatial/core/index/rtree/rtree_index_create_physical.cpp rename to src/spatial/index/rtree/rtree_index_create_physical.cpp index ed2455fa..e27fb026 100644 --- a/spatial/src/spatial/core/index/rtree/rtree_index_create_physical.cpp +++ b/src/spatial/index/rtree/rtree_index_create_physical.cpp @@ -1,4 +1,7 @@ -#include "spatial/core/index/rtree/rtree_index_create_physical.hpp" +#include "spatial/index/rtree/rtree_index_create_physical.hpp" +#include "spatial/index/rtree/rtree_index.hpp" +#include "spatial/index/rtree/rtree_node.hpp" +#include "spatial/util/managed_collection.hpp" #include "duckdb/catalog/catalog_entry/duck_index_entry.hpp" #include "duckdb/catalog/catalog_entry/duck_table_entry.hpp" @@ -7,16 +10,10 @@ #include "duckdb/main/attached_database.hpp" #include "duckdb/storage/storage_manager.hpp" #include "duckdb/storage/table_io_manager.hpp" -#include "spatial/core/index/rtree/rtree_index.hpp" -#include "spatial/core/index/rtree/rtree_node.hpp" -#include "spatial/core/util/managed_collection.hpp" - #include "duckdb/common/sort/sort.hpp" #include "duckdb/parallel/base_pipeline_event.hpp" -namespace spatial { - -namespace core { +namespace duckdb { //------------------------------------------------------------- // Physical Create RTree Index @@ -365,6 +362,4 @@ SinkFinalizeType PhysicalCreateRTreeIndex::Finalize(Pipeline &pipeline, Event &e return SinkFinalizeType::READY; } -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/spatial/include/spatial/core/index/rtree/rtree_index_create_physical.hpp b/src/spatial/index/rtree/rtree_index_create_physical.hpp similarity index 93% rename from spatial/include/spatial/core/index/rtree/rtree_index_create_physical.hpp rename to src/spatial/index/rtree/rtree_index_create_physical.hpp index 26a2774c..68a181da 100644 --- a/spatial/include/spatial/core/index/rtree/rtree_index_create_physical.hpp +++ b/src/spatial/index/rtree/rtree_index_create_physical.hpp @@ -1,15 +1,10 @@ #pragma once #include "duckdb/execution/physical_operator.hpp" #include "duckdb/storage/data_table.hpp" -#include "spatial/common.hpp" namespace duckdb { -class DuckTableEntry; -} - -namespace spatial { -namespace core { +class DuckTableEntry; class PhysicalCreateRTreeIndex final : public PhysicalOperator { public: @@ -54,6 +49,4 @@ class PhysicalCreateRTreeIndex final : public PhysicalOperator { } }; -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/spatial/src/spatial/core/index/rtree/rtree_index_plan_create.cpp b/src/spatial/index/rtree/rtree_index_plan_create.cpp similarity index 59% rename from spatial/src/spatial/core/index/rtree/rtree_index_plan_create.cpp rename to src/spatial/index/rtree/rtree_index_plan_create.cpp index 52f96f70..821d9cfb 100644 --- a/spatial/src/spatial/core/index/rtree/rtree_index_plan_create.cpp +++ b/src/spatial/index/rtree/rtree_index_plan_create.cpp @@ -1,12 +1,11 @@ #include "duckdb/parser/parsed_data/create_index_info.hpp" -#include "spatial/core/index/rtree/rtree_module.hpp" -#include "spatial/core/index/rtree/rtree_index.hpp" -#include "spatial/core/index/rtree/rtree_index_create_logical.hpp" +#include "spatial/index/rtree/rtree_index.hpp" +#include "spatial/index/rtree/rtree_index_create_logical.hpp" +#include "spatial/index/rtree/rtree_module.hpp" -namespace spatial { - -namespace core { +#include "duckdb/main/database.hpp" +namespace duckdb { //------------------------------------------------------------- // Register //------------------------------------------------------------- @@ -15,6 +14,4 @@ void RTreeModule::RegisterIndexPlanCreate(DatabaseInstance &db) { db.config.operator_extensions.push_back(make_uniq()); } -} // namespace core - -} // namespace spatial +} // namespace duckdb \ No newline at end of file diff --git a/spatial/src/spatial/core/index/rtree/rtree_index_plan_scan.cpp b/src/spatial/index/rtree/rtree_index_plan_scan.cpp similarity index 93% rename from spatial/src/spatial/core/index/rtree/rtree_index_plan_scan.cpp rename to src/spatial/index/rtree/rtree_index_plan_scan.cpp index 23ddb2d6..38fa3e57 100644 --- a/spatial/src/spatial/core/index/rtree/rtree_index_plan_scan.cpp +++ b/src/spatial/index/rtree/rtree_index_plan_scan.cpp @@ -1,7 +1,9 @@ #include "duckdb/catalog/catalog_entry/duck_table_entry.hpp" +#include "duckdb/optimizer/column_binding_replacer.hpp" #include "duckdb/optimizer/column_lifetime_analyzer.hpp" #include "duckdb/optimizer/matcher/expression_matcher.hpp" #include "duckdb/optimizer/matcher/function_matcher.hpp" +#include "duckdb/optimizer/optimizer.hpp" #include "duckdb/optimizer/optimizer_extension.hpp" #include "duckdb/optimizer/remove_unused_columns.hpp" #include "duckdb/planner/expression/bound_constant_expression.hpp" @@ -9,24 +11,21 @@ #include "duckdb/planner/expression/bound_reference_expression.hpp" #include "duckdb/planner/operator/logical_filter.hpp" #include "duckdb/planner/operator/logical_get.hpp" +#include "duckdb/planner/operator/logical_projection.hpp" #include "duckdb/planner/operator_extension.hpp" #include "duckdb/storage/data_table.hpp" -#include "spatial/core/geometry/bbox.hpp" -#include "spatial/core/geometry/geometry_type.hpp" -#include "spatial/core/index/rtree/rtree_index.hpp" -#include "spatial/core/index/rtree/rtree_index_create_logical.hpp" -#include "spatial/core/index/rtree/rtree_index_scan.hpp" -#include "spatial/core/index/rtree/rtree_module.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/util/math.hpp" - -#include -#include -#include - -namespace spatial { - -namespace core { +#include "duckdb/main/database.hpp" + +#include "spatial/geometry/bbox.hpp" +#include "spatial/geometry/geometry_type.hpp" +#include "spatial/index/rtree/rtree_index.hpp" +#include "spatial/index/rtree/rtree_index_create_logical.hpp" +#include "spatial/index/rtree/rtree_index_scan.hpp" +#include "spatial/index/rtree/rtree_module.hpp" +#include "spatial/spatial_types.hpp" +#include "spatial/util/math.hpp" + +namespace duckdb { //----------------------------------------------------------------------------- // Plan rewriter //----------------------------------------------------------------------------- @@ -254,6 +253,4 @@ void RTreeModule::RegisterIndexPlanScan(DatabaseInstance &db) { db.config.optimizer_extensions.push_back(RTreeIndexScanOptimizer()); } -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/spatial/src/spatial/core/index/rtree/rtree_index_pragmas.cpp b/src/spatial/index/rtree/rtree_index_pragmas.cpp similarity index 96% rename from spatial/src/spatial/core/index/rtree/rtree_index_pragmas.cpp rename to src/spatial/index/rtree/rtree_index_pragmas.cpp index 3dc473ac..4aa428e6 100644 --- a/spatial/src/spatial/core/index/rtree/rtree_index_pragmas.cpp +++ b/src/spatial/index/rtree/rtree_index_pragmas.cpp @@ -1,3 +1,9 @@ +#include "spatial/spatial_types.hpp" +#include "spatial/index/rtree/rtree_index.hpp" +#include "spatial/index/rtree/rtree_module.hpp" +#include "spatial/index/rtree/rtree_node.hpp" +#include "spatial/index/rtree/rtree_scanner.hpp" + #include "duckdb/catalog/catalog_entry/duck_index_entry.hpp" #include "duckdb/catalog/catalog_entry/duck_table_entry.hpp" #include "duckdb/catalog/dependency_list.hpp" @@ -12,15 +18,7 @@ #include "duckdb/transaction/duck_transaction.hpp" #include "duckdb/transaction/local_storage.hpp" -#include "spatial/core/types.hpp" -#include "spatial/core/index/rtree/rtree_index.hpp" -#include "spatial/core/index/rtree/rtree_module.hpp" -#include "spatial/core/index/rtree/rtree_node.hpp" -#include "spatial/core/index/rtree/rtree_scanner.hpp" - -namespace spatial { - -namespace core { +namespace duckdb { // BIND static unique_ptr RTreeindexInfoBind(ClientContext &context, TableFunctionBindInput &input, @@ -244,6 +242,4 @@ void RTreeModule::RegisterIndexPragmas(DatabaseInstance &db) { ExtensionUtil::RegisterFunction(db, dump_function); } -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/spatial/src/spatial/core/index/rtree/rtree_index_scan.cpp b/src/spatial/index/rtree/rtree_index_scan.cpp similarity index 97% rename from spatial/src/spatial/core/index/rtree/rtree_index_scan.cpp rename to src/spatial/index/rtree/rtree_index_scan.cpp index 101ac45d..318f4146 100644 --- a/spatial/src/spatial/core/index/rtree/rtree_index_scan.cpp +++ b/src/spatial/index/rtree/rtree_index_scan.cpp @@ -1,3 +1,7 @@ +#include "spatial/index/rtree/rtree_module.hpp" +#include "spatial/index/rtree/rtree_index.hpp" +#include "spatial/index/rtree/rtree_index_scan.hpp" + #include "duckdb/catalog/catalog_entry/duck_table_entry.hpp" #include "duckdb/catalog/dependency_list.hpp" #include "duckdb/common/mutex.hpp" @@ -12,13 +16,7 @@ #include "duckdb/catalog/catalog_entry/duck_index_entry.hpp" #include "duckdb/storage/data_table.hpp" -#include "spatial/core/index/rtree/rtree_module.hpp" -#include "spatial/core/index/rtree/rtree_index.hpp" -#include "spatial/core/index/rtree/rtree_index_scan.hpp" - -namespace spatial { - -namespace core { +namespace duckdb { BindInfo RTreeIndexScanBindInfo(const optional_ptr bind_data_p) { auto &bind_data = bind_data_p->Cast(); @@ -225,6 +223,4 @@ void RTreeModule::RegisterIndexScan(DatabaseInstance &db) { ExtensionUtil::RegisterFunction(db, RTreeIndexScanFunction::GetFunction()); } -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/spatial/include/spatial/core/index/rtree/rtree_index_scan.hpp b/src/spatial/index/rtree/rtree_index_scan.hpp similarity index 82% rename from spatial/include/spatial/core/index/rtree/rtree_index_scan.hpp rename to src/spatial/index/rtree/rtree_index_scan.hpp index 83b4576c..b59e6d69 100644 --- a/spatial/include/spatial/core/index/rtree/rtree_index_scan.hpp +++ b/src/spatial/index/rtree/rtree_index_scan.hpp @@ -1,16 +1,11 @@ #pragma once +#include "spatial/index/rtree/rtree_node.hpp" #include "duckdb/function/table_function.hpp" -#include "spatial/core/index/rtree/rtree_node.hpp" namespace duckdb { class DuckTableEntry; class Index; -} // namespace duckdb - -namespace spatial { - -namespace core { // This is created by the optimizer rule struct RTreeIndexScanBindData final : public TableFunctionData { @@ -38,6 +33,4 @@ struct RTreeIndexScanFunction { static TableFunction GetFunction(); }; -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/spatial/include/spatial/core/index/rtree/rtree_module.hpp b/src/spatial/index/rtree/rtree_module.hpp similarity index 74% rename from spatial/include/spatial/core/index/rtree/rtree_module.hpp rename to src/spatial/index/rtree/rtree_module.hpp index 155b793e..09f170a0 100644 --- a/spatial/include/spatial/core/index/rtree/rtree_module.hpp +++ b/src/spatial/index/rtree/rtree_module.hpp @@ -1,10 +1,8 @@ #pragma once -#include "spatial/common.hpp" +namespace duckdb { -namespace spatial { - -namespace core { +class DatabaseInstance; struct RTreeModule { static void RegisterIndex(DatabaseInstance &db); @@ -14,6 +12,4 @@ struct RTreeModule { static void RegisterIndexPragmas(DatabaseInstance &db); }; -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/spatial/include/spatial/core/index/rtree/rtree_node.hpp b/src/spatial/index/rtree/rtree_node.hpp similarity index 96% rename from spatial/include/spatial/core/index/rtree/rtree_node.hpp rename to src/spatial/index/rtree/rtree_node.hpp index 74d1cde1..9c35aa13 100644 --- a/spatial/include/spatial/core/index/rtree/rtree_node.hpp +++ b/src/spatial/index/rtree/rtree_node.hpp @@ -1,12 +1,11 @@ #pragma once +#include "spatial/geometry/bbox.hpp" #include "duckdb/execution/index/index_pointer.hpp" -#include "spatial/common.hpp" -#include "spatial/core/geometry/bbox.hpp" -namespace spatial { +#include -namespace core { +namespace duckdb { //------------------------------------------------------------- // RTree Pointer @@ -190,6 +189,4 @@ struct alignas(RTreeEntry) RTreeNode { uint8_t _unused[20] = {}; }; -} // namespace core - -} // namespace spatial +} // namespace duckdb \ No newline at end of file diff --git a/spatial/include/spatial/core/index/rtree/rtree_scanner.hpp b/src/spatial/index/rtree/rtree_scanner.hpp similarity index 93% rename from spatial/include/spatial/core/index/rtree/rtree_scanner.hpp rename to src/spatial/index/rtree/rtree_scanner.hpp index f3caac52..3b388d6a 100644 --- a/spatial/include/spatial/core/index/rtree/rtree_scanner.hpp +++ b/src/spatial/index/rtree/rtree_scanner.hpp @@ -1,10 +1,8 @@ #pragma once -#include "spatial/core/index/rtree/rtree.hpp" +#include "spatial/index/rtree/rtree.hpp" -namespace spatial { - -namespace core { +namespace duckdb { class RTreeScanner { public: @@ -84,6 +82,4 @@ inline void RTreeScanner::Scan(const RTree &tree, FUNC &&handler) { } } -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/modules/CMakeLists.txt b/src/spatial/modules/CMakeLists.txt new file mode 100644 index 00000000..94a5d99e --- /dev/null +++ b/src/spatial/modules/CMakeLists.txt @@ -0,0 +1,10 @@ +add_subdirectory(main) +add_subdirectory(proj) +add_subdirectory(gdal) +add_subdirectory(geos) +add_subdirectory(osm) +add_subdirectory(shapefile) + +set(EXTENSION_SOURCES + ${EXTENSION_SOURCES} +PARENT_SCOPE) \ No newline at end of file diff --git a/src/spatial/modules/gdal/CMakeLists.txt b/src/spatial/modules/gdal/CMakeLists.txt new file mode 100644 index 00000000..bdfd0b2d --- /dev/null +++ b/src/spatial/modules/gdal/CMakeLists.txt @@ -0,0 +1,4 @@ +set(EXTENSION_SOURCES + ${EXTENSION_SOURCES} + ${CMAKE_CURRENT_SOURCE_DIR}/gdal_module.cpp + PARENT_SCOPE) \ No newline at end of file diff --git a/src/spatial/modules/gdal/gdal_module.cpp b/src/spatial/modules/gdal/gdal_module.cpp new file mode 100644 index 00000000..3f262001 --- /dev/null +++ b/src/spatial/modules/gdal/gdal_module.cpp @@ -0,0 +1,2080 @@ +#include "spatial/modules/gdal/gdal_module.hpp" + +// Spatial +#include "spatial/spatial_types.hpp" +#include "spatial/geometry/sgl.hpp" +#include "spatial/geometry/wkb_writer.hpp" +#include "spatial/geometry/geometry_serialization.hpp" + +// DuckDB +#include "duckdb/main/database.hpp" +#include "duckdb/common/enums/file_glob_options.hpp" +#include "duckdb/common/multi_file_reader.hpp" +#include "duckdb/function/table/arrow.hpp" +#include "duckdb/main/extension_util.hpp" +#include "duckdb/parser/parsed_data/create_table_function_info.hpp" +#include "duckdb/common/types/uuid.hpp" +#include "duckdb/function/copy_function.hpp" +#include "duckdb/parser/tableref/table_function_ref.hpp" +#include "duckdb/parser/parsed_expression.hpp" +#include "duckdb/parser/expression/function_expression.hpp" + +// GDAL +#include "cpl_string.h" +#include "cpl_vsi.h" +#include "cpl_vsi_error.h" +#include "cpl_vsi_virtual.h" +#include "ogrsf_frmts.h" + +#include + +namespace duckdb { + +namespace { + +//###################################################################################################################### +// DuckDB GDAL VFS +//###################################################################################################################### +// This implements a GDAL "VFS" (Virtual File System) that allows GDAL to read and write files from DuckDB's file system +// TODO: Make another pass at this, we should be able to clean it up a bit more. + +class DuckDBFileHandle final : public VSIVirtualHandle { +private: + unique_ptr file_handle; + bool is_eof; + +public: + explicit DuckDBFileHandle(unique_ptr file_handle_p) + : file_handle(std::move(file_handle_p)), is_eof(false) { + } + + vsi_l_offset Tell() override { + return static_cast(file_handle->SeekPosition()); + } + int Seek(vsi_l_offset nOffset, int nWhence) override { + is_eof = false; + + if (nWhence == SEEK_SET && nOffset == 0) { + // Use the reset function instead to allow compressed file handles to rewind + // even if they don't support seeking + file_handle->Reset(); + return 0; + } + switch (nWhence) { + case SEEK_SET: + file_handle->Seek(nOffset); + break; + case SEEK_CUR: + file_handle->Seek(file_handle->SeekPosition() + nOffset); + break; + case SEEK_END: + file_handle->Seek(file_handle->GetFileSize() + nOffset); + break; + default: + throw InternalException("Unknown seek type"); + } + return 0; + } + + size_t Read(void *pBuffer, size_t nSize, size_t nCount) override { + auto remaining_bytes = nSize * nCount; + try { + while (remaining_bytes > 0) { + auto read_bytes = file_handle->Read(pBuffer, remaining_bytes); + if (read_bytes == 0) { + break; + } + remaining_bytes -= read_bytes; + // Note we performed a cast back to void* + pBuffer = static_cast(pBuffer) + read_bytes; + } + } catch (...) { + } + + if (remaining_bytes != 0) { + if (file_handle->SeekPosition() == file_handle->GetFileSize()) { + // Is at EOF! + is_eof = true; + } + // else, error! + // unfortunately, this version of GDAL cant distinguish between errors and reading less bytes + // its avaiable in 3.9.2, but we're stuck on 3.8.5 for now. + } + + return nCount - (remaining_bytes / nSize); + } + + int Eof() override { + return is_eof ? TRUE : FALSE; + } + + size_t Write(const void *pBuffer, size_t nSize, size_t nCount) override { + size_t written_bytes = 0; + try { + written_bytes = file_handle->Write(const_cast(pBuffer), nSize * nCount); + } catch (...) { + } + // Return the number of items written + return static_cast(written_bytes / nSize); + } + + int Flush() override { + file_handle->Sync(); + return 0; + } + int Truncate(vsi_l_offset nNewSize) override { + file_handle->Truncate(static_cast(nNewSize)); + return 0; + } + int Close() override { + file_handle->Close(); + return 0; + } + + // int ReadMultiRange(int nRanges, void **ppData, const vsi_l_offset *panOffsets, const size_t *panSizes) override; + // void AdviseRead(int nRanges, const vsi_l_offset *panOffsets, const size_t *panSizes) override; + // VSIRangeStatus GetRangeStatus(vsi_l_offset nOffset, vsi_l_offset nLength) override; +}; + +//-------------------------------------------------------------------------- +// GDAL DuckDB File system wrapper +//-------------------------------------------------------------------------- +bool IsStdCharDev(const char *file_name) { + return !strcmp(file_name, "/dev/stdin") || !strcmp(file_name, "/dev/stdout") || !strcmp(file_name, "/dev/stderr") || + !strcmp(file_name, "/dev/null") || !strcmp(file_name, "/dev/zero"); +} + +class DuckDBFileSystemHandler final : public VSIFilesystemHandler { +private: + string client_prefix; + ClientContext &context; + +public: + DuckDBFileSystemHandler(string client_prefix, ClientContext &context) + : client_prefix(std::move(client_prefix)), context(context) {}; + + const char *StripPrefix(const char *pszFilename) { + return pszFilename + client_prefix.size(); + } + + string AddPrefix(const string &value) { + return client_prefix + value; + } + + VSIVirtualHandle *Open(const char *prefixed_file_name, const char *access, bool bSetError, + CSLConstList /* papszOptions */) override { + auto file_name = StripPrefix(prefixed_file_name); + auto file_name_str = string(file_name); + auto &fs = FileSystem::GetFileSystem(context); + + // TODO: Double check that this is correct + FileOpenFlags flags; + auto len = strlen(access); + if (access[0] == 'r') { + flags = FileFlags::FILE_FLAGS_READ; + if (len > 1 && access[1] == '+') { + flags |= FileFlags::FILE_FLAGS_WRITE; + } + if (len > 2 && access[2] == '+') { + // might be "rb+" + flags |= FileFlags::FILE_FLAGS_WRITE; + } + } else if (access[0] == 'w') { + flags = FileFlags::FILE_FLAGS_WRITE; + if (!IsStdCharDev(file_name)) { + flags |= FileFlags::FILE_FLAGS_FILE_CREATE_NEW; + } + if (len > 1 && access[1] == '+') { + flags |= FileFlags::FILE_FLAGS_READ; + } + if (len > 2 && access[2] == '+') { + // might be "wb+" + flags |= FileFlags::FILE_FLAGS_READ; + } + } else if (access[0] == 'a') { + flags = FileFlags::FILE_FLAGS_APPEND; + if (len > 1 && access[1] == '+') { + flags |= FileFlags::FILE_FLAGS_READ; + } + if (len > 2 && access[2] == '+') { + // might be "ab+" + flags |= FileFlags::FILE_FLAGS_READ; + } + } else { + throw InternalException("Unknown file access type"); + } + + try { + // Check if the file is a directory + +#ifdef _WIN32 + if (!FileSystem::IsRemoteFile(file_name) && fs.DirectoryExists(file_name_str) && (flags.OpenForReading())) { + // We can't open a directory for reading on windows without special flags + // so just open nul instead, gdal will reject it when it tries to read + auto file = fs.OpenFile("nul", flags); + return new DuckDBFileHandle(std::move(file)); + } +#endif + + // If the file is remote and NOT in write mode, we can cache it. + if (FileSystem::IsRemoteFile(file_name_str) && !flags.OpenForWriting() && !flags.OpenForAppending()) { + + // Pass the direct IO flag to the file system since we use GDAL's caching instead + flags |= FileFlags::FILE_FLAGS_DIRECT_IO; + + auto file = fs.OpenFile(file_name, flags | FileCompressionType::AUTO_DETECT); + return VSICreateCachedFile(new DuckDBFileHandle(std::move(file))); + } else { + auto file = fs.OpenFile(file_name, flags | FileCompressionType::AUTO_DETECT); + return new DuckDBFileHandle(std::move(file)); + } + } catch (std::exception &ex) { + // Failed to open file via DuckDB File System. If this doesnt have a VSI prefix we can return an error here. + if (strncmp(file_name, "/vsi", 4) != 0 && !IsStdCharDev(file_name)) { + if (bSetError) { + VSIError(VSIE_FileError, "Failed to open file %s: %s", file_name, ex.what()); + } + return nullptr; + } + + // Fall back to GDAL instead (if external access is enabled) + if (!context.db->config.options.enable_external_access) { + if (bSetError) { + VSIError(VSIE_FileError, "Failed to open file %s with GDAL: External access is disabled", + file_name); + } + return nullptr; + } + + const auto handler = VSIFileManager::GetHandler(file_name); + if (!handler) { + if (bSetError) { + VSIError(VSIE_FileError, "Failed to open file %s: %s", file_name, ex.what()); + } + return nullptr; + } + + return handler->Open(file_name, access); + } + } + + int Stat(const char *prefixed_file_name, VSIStatBufL *pstatbuf, int n_flags) override { + auto file_name = StripPrefix(prefixed_file_name); + auto &fs = FileSystem::GetFileSystem(context); + + memset(pstatbuf, 0, sizeof(VSIStatBufL)); + + if (IsStdCharDev(file_name)) { + pstatbuf->st_mode = S_IFCHR; + return 0; + } + + if (!(fs.FileExists(file_name) || (!FileSystem::IsRemoteFile(file_name) && fs.DirectoryExists(file_name)))) { + return -1; + } + +#ifdef _WIN32 + if (!FileSystem::IsRemoteFile(file_name) && fs.DirectoryExists(file_name)) { + pstatbuf->st_mode = S_IFDIR; + return 0; + } +#endif + + unique_ptr file; + try { + file = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ | FileCompressionType::AUTO_DETECT | + FileFlags::FILE_FLAGS_NULL_IF_NOT_EXISTS); + } catch (std::exception &ex) { + return -1; + } + if (!file) { + return -1; + } + + pstatbuf->st_size = static_cast(fs.GetFileSize(*file)); + pstatbuf->st_mtime = fs.GetLastModifiedTime(*file); + + auto type = file->GetType(); + switch (type) { + // These are the only three types present on all platforms + case FileType::FILE_TYPE_REGULAR: + pstatbuf->st_mode = S_IFREG; + break; + case FileType::FILE_TYPE_DIR: + pstatbuf->st_mode = S_IFDIR; + break; + case FileType::FILE_TYPE_CHARDEV: + pstatbuf->st_mode = S_IFCHR; + break; + default: + // HTTPFS returns invalid type for everything basically. + if (FileSystem::IsRemoteFile(file_name)) { + pstatbuf->st_mode = S_IFREG; + } else { + return -1; + } + } + + return 0; + } + + bool IsLocal(const char *prefixed_file_name) override { + auto file_name = StripPrefix(prefixed_file_name); + return !FileSystem::IsRemoteFile(file_name); + } + + int Mkdir(const char *prefixed_dir_name, long mode) override { + auto dir_name = StripPrefix(prefixed_dir_name); + auto &fs = FileSystem::GetFileSystem(context); + + fs.CreateDirectory(dir_name); + return 0; + } + + int Rmdir(const char *prefixed_dir_name) override { + auto dir_name = StripPrefix(prefixed_dir_name); + auto &fs = FileSystem::GetFileSystem(context); + + fs.RemoveDirectory(dir_name); + return 0; + } + + int RmdirRecursive(const char *prefixed_dir_name) override { + auto dir_name = StripPrefix(prefixed_dir_name); + auto &fs = FileSystem::GetFileSystem(context); + + fs.RemoveDirectory(dir_name); + return 0; + } + + char **ReadDirEx(const char *prefixed_dir_name, int max_files) override { + auto dir_name = StripPrefix(prefixed_dir_name); + auto &fs = FileSystem::GetFileSystem(context); + + CPLStringList files; + auto files_count = 0; + fs.ListFiles(dir_name, [&](const string &file_name, bool is_dir) { + if (files_count >= max_files) { + return; + } + const auto tmp = AddPrefix(file_name); + files.AddString(tmp.c_str()); + files_count++; + }); + return files.StealList(); + } + + char **SiblingFiles(const char *prefixed_file_name) override { + auto file_name = StripPrefix(prefixed_file_name); + + auto &fs = FileSystem::GetFileSystem(context); + CPLStringList files; + + auto file_name_without_ext = + fs.JoinPath(StringUtil::GetFilePath(file_name), StringUtil::GetFileStem(file_name)); + auto file_glob = file_name_without_ext + ".*"; + + auto file_vector = fs.Glob(file_glob); + for (auto &file : file_vector) { + auto tmp = AddPrefix(file); + files.AddString(tmp.c_str()); + } + return files.StealList(); + } + + int HasOptimizedReadMultiRange(const char *pszPath) override { + return 0; + } + + int Unlink(const char *prefixed_file_name) override { + auto file_name = StripPrefix(prefixed_file_name); + auto &fs = FileSystem::GetFileSystem(context); + try { + fs.RemoveFile(file_name); + return 0; + } catch (std::exception &ex) { + return -1; + } + } +}; + +//###################################################################################################################### +// Context State +//###################################################################################################################### +// We give every client a unique prefix so that multiple connections can use their own attached file systems. +// This is necessary because GDAL is not otherwise aware of the connection context. + +class GDALClientContextState final : public ClientContextState { + ClientContext &context; + string client_prefix; + DuckDBFileSystemHandler *fs_handler; + +public: + explicit GDALClientContextState(ClientContext &context); + ~GDALClientContextState() override; + void QueryEnd() override; + string GetPrefix(const string &value) const; + static GDALClientContextState &GetOrCreate(ClientContext &context); +}; + +GDALClientContextState::GDALClientContextState(ClientContext &context) : context(context) { + + // Create a new random prefix for this client + client_prefix = StringUtil::Format("/vsiduckdb-%s/", UUID::ToString(UUID::GenerateRandomUUID())); + + // Create a new file handler responding to this prefix + fs_handler = new DuckDBFileSystemHandler(client_prefix, context); + + // Register the file handler + VSIFileManager::InstallHandler(client_prefix, fs_handler); + + // Also pass a reference to the client context +} + +GDALClientContextState::~GDALClientContextState() { + // Uninstall the file handler for this prefix + VSIFileManager::RemoveHandler(client_prefix); + + // Delete the file handler + delete fs_handler; +} + +void GDALClientContextState::QueryEnd() { + +} + +string GDALClientContextState::GetPrefix(const string &value) const { + // If the user explicitly asked for a VSI prefix, we don't add our own + if (StringUtil::StartsWith(value, "/vsi")) { + if (!context.db->config.options.enable_external_access) { + throw PermissionException("Cannot open file '%s' with VSI prefix: External access is disabled", value); + } + return value; + } + return client_prefix + value; +} + +GDALClientContextState &GDALClientContextState::GetOrCreate(ClientContext &context) { + auto gdal_state = context.registered_state->GetOrCreate("gdal", context); + return *gdal_state; +} + +//###################################################################################################################### +// Functions +//###################################################################################################################### + +//====================================================================================================================== +// ST_Read +//====================================================================================================================== + +struct ST_Read : ArrowTableFunction { + + //------------------------------------------------------------------------------------------------------------------ + // Misc + //------------------------------------------------------------------------------------------------------------------ + enum class SpatialFilterType { Wkb, Rectangle }; + + struct SpatialFilter { + SpatialFilterType type; + explicit SpatialFilter(SpatialFilterType type_p) : type(type_p) {}; + }; + + struct RectangleSpatialFilter : SpatialFilter { + double min_x, min_y, max_x, max_y; + RectangleSpatialFilter(double min_x_p, double min_y_p, double max_x_p, double max_y_p) + : SpatialFilter(SpatialFilterType::Rectangle), min_x(min_x_p), min_y(min_y_p), max_x(max_x_p), + max_y(max_y_p) { + } + }; + + struct WKBSpatialFilter : SpatialFilter { + OGRGeometryH geom; + explicit WKBSpatialFilter(const string &wkb_p) : SpatialFilter(SpatialFilterType::Wkb), geom(nullptr) { + auto ok = OGR_G_CreateFromWkb(wkb_p.c_str(), nullptr, &geom, (int)wkb_p.size()); + if (ok != OGRERR_NONE) { + throw InvalidInputException("WKBSpatialFilter: could not create geometry from WKB"); + } + } + ~WKBSpatialFilter() { + OGR_G_DestroyGeometry(geom); + } + }; + + static void TryApplySpatialFilter(OGRLayer *layer, SpatialFilter *spatial_filter) { + if (spatial_filter != nullptr) { + if (spatial_filter->type == SpatialFilterType::Rectangle) { + auto &rect = static_cast(*spatial_filter); + layer->SetSpatialFilterRect(rect.min_x, rect.min_y, rect.max_x, rect.max_y); + } else if (spatial_filter->type == SpatialFilterType::Wkb) { + auto &filter = static_cast(*spatial_filter); + layer->SetSpatialFilter(OGRGeometry::FromHandle(filter.geom)); + } + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Bind + //------------------------------------------------------------------------------------------------------------------ + struct BindData final : TableFunctionData { + + int layer_idx = 0; + bool sequential_layer_scan = false; + bool keep_wkb = false; + unordered_set geometry_column_ids = {}; + unique_ptr spatial_filter = nullptr; + + // before they are renamed + vector all_names = {}; + vector all_types = {}; + ArrowTableType arrow_table = {}; + + bool has_approximate_feature_count = false; + idx_t approximate_feature_count = 0; + string raw_file_name; + string prefixed_file_name; + CPLStringList dataset_open_options; + CPLStringList dataset_allowed_drivers; + CPLStringList dataset_sibling_files; + CPLStringList layer_creation_options; + }; + + static unique_ptr Bind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + + // Result + auto result = make_uniq(); + + auto options_param = input.named_parameters.find("open_options"); + if (options_param != input.named_parameters.end()) { + for (auto ¶m : ListValue::GetChildren(options_param->second)) { + result->dataset_open_options.AddString(StringValue::Get(param).c_str()); + } + } + + auto drivers_param = input.named_parameters.find("allowed_drivers"); + if (drivers_param != input.named_parameters.end()) { + for (auto ¶m : ListValue::GetChildren(drivers_param->second)) { + result->dataset_allowed_drivers.AddString(StringValue::Get(param).c_str()); + } + } + + // Now we can open the dataset + auto &ctx_state = GDALClientContextState::GetOrCreate(context); + + auto siblings_params = input.named_parameters.find("sibling_files"); + if (siblings_params != input.named_parameters.end()) { + for (auto ¶m : ListValue::GetChildren(siblings_params->second)) { + result->dataset_sibling_files.AddString(ctx_state.GetPrefix(StringValue::Get(param)).c_str()); + } + } + + result->raw_file_name = input.inputs[0].GetValue(); + result->prefixed_file_name = ctx_state.GetPrefix(result->raw_file_name); + + auto dataset = GDALDatasetUniquePtr(GDALDataset::Open( + result->prefixed_file_name.c_str(), GDAL_OF_VECTOR | GDAL_OF_VERBOSE_ERROR, result->dataset_allowed_drivers, + result->dataset_open_options, result->dataset_sibling_files)); + + if (dataset == nullptr) { + auto error = string(CPLGetLastErrorMsg()); + throw IOException("Could not open file: " + result->raw_file_name + " (" + error + ")"); + } + + // Double check that the dataset have any layers + if (dataset->GetLayerCount() <= 0) { + throw IOException("Dataset does not contain any layers"); + } + + // Now we can bind the additonal options + bool max_batch_size_set = false; + for (auto &kv : input.named_parameters) { + auto loption = StringUtil::Lower(kv.first); + if (loption == "layer") { + + // Find layer by index + if (kv.second.type() == LogicalType::INTEGER) { + auto layer_idx = IntegerValue::Get(kv.second); + if (layer_idx < 0) { + throw BinderException("Layer index must be positive"); + } + if (layer_idx > dataset->GetLayerCount()) { + throw BinderException( + StringUtil::Format("Layer index too large (%s > %s)", layer_idx, dataset->GetLayerCount())); + } + result->layer_idx = layer_idx; + } + + // Find layer by name + if (kv.second.type() == LogicalTypeId::VARCHAR) { + auto name = StringValue::Get(kv.second).c_str(); + bool found = false; + for (auto layer_idx = 0; layer_idx < dataset->GetLayerCount(); layer_idx++) { + if (strcmp(dataset->GetLayer(layer_idx)->GetName(), name) == 0) { + result->layer_idx = layer_idx; + found = true; + break; + } + } + if (!found) { + throw BinderException(StringUtil::Format("Layer '%s' could not be found in dataset", name)); + } + } + } + + if (loption == "spatial_filter_box" && kv.second.type() == GeoTypes::BOX_2D()) { + if (result->spatial_filter) { + throw BinderException("Only one spatial filter can be specified"); + } + auto &children = StructValue::GetChildren(kv.second); + auto minx = DoubleValue::Get(children[0]); + auto miny = DoubleValue::Get(children[1]); + auto maxx = DoubleValue::Get(children[2]); + auto maxy = DoubleValue::Get(children[3]); + result->spatial_filter = make_uniq(minx, miny, maxx, maxy); + } + + if (loption == "spatial_filter" && kv.second.type() == GeoTypes::WKB_BLOB()) { + if (result->spatial_filter) { + throw BinderException("Only one spatial filter can be specified"); + } + auto wkb = StringValue::Get(kv.second); + result->spatial_filter = make_uniq(wkb); + } + + if (loption == "sequential_layer_scan") { + result->sequential_layer_scan = BooleanValue::Get(kv.second); + } + + if (loption == "max_batch_size") { + auto max_batch_size = IntegerValue::Get(kv.second); + if (max_batch_size <= 0) { + throw BinderException("'max_batch_size' parameter must be positive"); + } + auto str = StringUtil::Format("MAX_FEATURES_IN_BATCH=%d", max_batch_size); + result->layer_creation_options.AddString(str.c_str()); + max_batch_size_set = true; + } + + if (loption == "keep_wkb") { + result->keep_wkb = BooleanValue::Get(kv.second); + } + } + + // Defaults + result->layer_creation_options.AddString("INCLUDE_FID=NO"); + if (!max_batch_size_set) { + // Set default max batch size to standard vector size + auto str = StringUtil::Format("MAX_FEATURES_IN_BATCH=%d", STANDARD_VECTOR_SIZE); + result->layer_creation_options.AddString(str.c_str()); + } + + // Get the schema for the selected layer + auto layer = dataset->GetLayer(result->layer_idx); + + TryApplySpatialFilter(layer, result->spatial_filter.get()); + + // Check if we can get an approximate feature count + result->approximate_feature_count = 0; + result->has_approximate_feature_count = false; + if (!result->sequential_layer_scan) { + // Dont force compute the count if its expensive + auto count = layer->GetFeatureCount(false); + if (count > -1) { + result->approximate_feature_count = count; + result->has_approximate_feature_count = true; + } + } + + struct ArrowArrayStream stream; + if (!layer->GetArrowStream(&stream, result->layer_creation_options)) { + // layer is owned by GDAL, we do not need to destory it + throw IOException("Could not get arrow stream from layer"); + } + + struct ArrowSchema schema; + if (stream.get_schema(&stream, &schema) != 0) { + if (stream.release) { + stream.release(&stream); + } + throw IOException("Could not get arrow schema from layer"); + } + + // The Arrow API will return attributes in this order + // 1. FID column + // 2. all ogr field attributes + // 3. all geometry columns + + auto attribute_count = schema.n_children; + auto attributes = schema.children; + + result->all_names.reserve(attribute_count + 1); + names.reserve(attribute_count + 1); + + for (idx_t col_idx = 0; col_idx < (idx_t)attribute_count; col_idx++) { + auto &attribute = *attributes[col_idx]; + + const char ogc_flag[] = {'\x01', '\0', '\0', '\0', '\x14', '\0', '\0', '\0', 'A', 'R', 'R', 'O', 'W', + ':', 'e', 'x', 't', 'e', 'n', 's', 'i', 'o', 'n', ':', 'n', 'a', + 'm', 'e', '\a', '\0', '\0', '\0', 'o', 'g', 'c', '.', 'w', 'k', 'b'}; + + auto arrow_type = ArrowType::GetArrowLogicalType(DBConfig::GetConfig(context), attribute); + + auto column_name = string(attribute.name); + auto duckdb_type = arrow_type->GetDuckType(); + + if (duckdb_type.id() == LogicalTypeId::BLOB && attribute.metadata != nullptr && + strncmp(attribute.metadata, ogc_flag, sizeof(ogc_flag)) == 0) { + // This is a WKB geometry blob + result->arrow_table.AddColumn(col_idx, std::move(arrow_type)); + + if (result->keep_wkb) { + return_types.emplace_back(GeoTypes::WKB_BLOB()); + } else { + return_types.emplace_back(GeoTypes::GEOMETRY()); + if (column_name == "wkb_geometry") { + column_name = "geom"; + } + } + result->geometry_column_ids.insert(col_idx); + + } else if (attribute.dictionary) { + auto dictionary_type = ArrowType::GetArrowLogicalType(DBConfig::GetConfig(context), attribute); + return_types.emplace_back(dictionary_type->GetDuckType()); + arrow_type->SetDictionary(std::move(dictionary_type)); + result->arrow_table.AddColumn(col_idx, std::move(arrow_type)); + } else { + return_types.emplace_back(arrow_type->GetDuckType()); + result->arrow_table.AddColumn(col_idx, std::move(arrow_type)); + } + + // keep these around for projection/filter pushdown later + // does GDAL even allow duplicate/missing names? + result->all_names.push_back(column_name); + + if (column_name.empty()) { + names.push_back("v" + to_string(col_idx)); + } else { + names.push_back(column_name); + } + } + + result->all_types = return_types; + + schema.release(&schema); + stream.release(&stream); + + // Rename columns if they are duplicates + unordered_map name_map; + for (auto &column_name : names) { + // put it all lower_case + auto low_column_name = StringUtil::Lower(column_name); + if (name_map.find(low_column_name) == name_map.end()) { + // Name does not exist yet + name_map[low_column_name]++; + } else { + // Name already exists, we add _x where x is the repetition number + string new_column_name = column_name + "_" + std::to_string(name_map[low_column_name]); + auto new_column_name_low = StringUtil::Lower(new_column_name); + while (name_map.find(new_column_name_low) != name_map.end()) { + // This name is already here due to a previous definition + name_map[low_column_name]++; + new_column_name = column_name + "_" + std::to_string(name_map[low_column_name]); + new_column_name_low = StringUtil::Lower(new_column_name); + } + column_name = new_column_name; + name_map[new_column_name_low]++; + } + } + + return std::move(result); + } + + //------------------------------------------------------------------------------------------------------------------ + // Init Global + //------------------------------------------------------------------------------------------------------------------ + struct GlobalState final : ArrowScanGlobalState { + GDALDatasetUniquePtr dataset; + atomic lines_read; + + explicit GlobalState(GDALDatasetUniquePtr dataset) : dataset(std::move(dataset)), lines_read(0) { + } + }; + + static unique_ptr InitGlobal(ClientContext &context, TableFunctionInitInput &input) { + auto &data = input.bind_data->Cast(); + + auto dataset = GDALDatasetUniquePtr(GDALDataset::Open( + data.prefixed_file_name.c_str(), GDAL_OF_VECTOR | GDAL_OF_VERBOSE_ERROR | GDAL_OF_READONLY, + data.dataset_allowed_drivers, data.dataset_open_options, data.dataset_sibling_files)); + if (dataset == nullptr) { + const auto error = string(CPLGetLastErrorMsg()); + throw IOException("Could not open file: " + data.raw_file_name + " (" + error + ")"); + } + + auto global_state = make_uniq(std::move(dataset)); + auto &gstate = *global_state; + + // Open the layer + OGRLayer *layer = nullptr; + if (data.sequential_layer_scan) { + // Get the layer from the dataset by scanning through the layers + for (int i = 0; i < gstate.dataset->GetLayerCount(); i++) { + layer = gstate.dataset->GetLayer(i); + if (i == data.layer_idx) { + // desired layer found + break; + } + // else scan through and empty the layer + OGRFeature *feature; + while ((feature = layer->GetNextFeature()) != nullptr) { + OGRFeature::DestroyFeature(feature); + } + } + } else { + // Otherwise get the layer directly + layer = gstate.dataset->GetLayer(data.layer_idx); + } + if (!layer) { + throw IOException("Could not get layer"); + } + + // Apply spatial filter (if we got one) + TryApplySpatialFilter(layer, data.spatial_filter.get()); + // TODO: Apply projection pushdown + + // Create arrow stream from layer + + gstate.stream = make_uniq(); + + // set layer options + if (!layer->GetArrowStream(&gstate.stream->arrow_array_stream, data.layer_creation_options)) { + throw IOException("Could not get arrow stream"); + } + + // Set max 1 thread + gstate.max_threads = 1; + + if (input.CanRemoveFilterColumns()) { + gstate.projection_ids = input.projection_ids; + for (const auto &col_idx : input.column_ids) { + if (col_idx == COLUMN_IDENTIFIER_ROW_ID) { + gstate.scanned_types.emplace_back(LogicalType::ROW_TYPE); + } else { + gstate.scanned_types.push_back(data.all_types[col_idx]); + } + } + } + + return std::move(global_state); + } + + //------------------------------------------------------------------------------------------------------------------ + // Init Local + //------------------------------------------------------------------------------------------------------------------ + struct LocalState final : ArrowScanLocalState { + ArenaAllocator arena; + GeometryAllocator alloc; + + static constexpr auto MAX_WKB_STACK_DEPTH = 128; + uint32_t wkb_stack[MAX_WKB_STACK_DEPTH] = {}; + sgl::ops::wkb_reader wkb_reader = {}; + + explicit LocalState(unique_ptr current_chunk, ClientContext &context) + : ArrowScanLocalState(std::move(current_chunk), context), arena(BufferAllocator::Get(context)), + alloc(arena) { + + // Setup WKB reader + wkb_reader.copy_vertices = false; + wkb_reader.alloc = &alloc; + wkb_reader.allow_mixed_zm = true; + wkb_reader.nan_as_empty = false; + + // Setup stack buffer + wkb_reader.stack_buf = wkb_stack; + wkb_reader.stack_cap = MAX_WKB_STACK_DEPTH; + } + + void ConvertWKB(Vector &source, Vector &target, idx_t count) { + + // Reset allocator + arena.Reset(); + + UnaryExecutor::Execute(source, target, count, [&](const string_t &wkb) { + wkb_reader.buf = wkb.GetDataUnsafe(); + wkb_reader.end = wkb_reader.buf + wkb.GetSize(); + + sgl::geometry geom(sgl::geometry_type::INVALID); + + if (!sgl::ops::wkb_reader_try_parse(&wkb_reader, &geom)) { + const auto error = sgl::ops::wkb_reader_get_error_message(&wkb_reader); + throw InvalidInputException("Could not parse WKB input: %s", error); + } + + // Enforce that we have a cohesive ZM layout + if (wkb_reader.has_mixed_zm) { + sgl::ops::force_zm(alloc, &geom, wkb_reader.has_any_z, wkb_reader.has_any_m, 0, 0); + } + + // Serialize the geometry into a blob + const auto size = Serde::GetRequiredSize(geom); + auto blob = StringVector::EmptyString(target, size); + Serde::Serialize(geom, blob.GetDataWriteable(), size); + blob.Finalize(); + return blob; + }); + } + }; + + static unique_ptr InitLocal(ExecutionContext &context, TableFunctionInitInput &input, + GlobalTableFunctionState *gstate_p) { + + auto &gstate = gstate_p->Cast(); + auto current_chunk = make_uniq(); + auto result = make_uniq(std::move(current_chunk), context.client); + + result->column_ids = input.column_ids; + result->filters = input.filters.get(); + + if (input.CanRemoveFilterColumns()) { + result->all_columns.Initialize(context.client, gstate.scanned_types); + } + + if (!ArrowTableFunction::ArrowScanParallelStateNext(context.client, input.bind_data.get(), *result, gstate)) { + return nullptr; + } + + return std::move(result); + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute + //------------------------------------------------------------------------------------------------------------------ + static void Execute(ClientContext &context, TableFunctionInput &input, DataChunk &output) { + if (!input.local_state) { + return; + } + + auto &data = input.bind_data->Cast(); + auto &state = input.local_state->Cast(); + auto &gstate = input.global_state->Cast(); + + //! Out of tuples in this chunk + if (state.chunk_offset >= static_cast(state.chunk->arrow_array.length)) { + if (!ArrowTableFunction::ArrowScanParallelStateNext(context, input.bind_data.get(), state, gstate)) { + return; + } + } + + auto output_size = MinValue(STANDARD_VECTOR_SIZE, state.chunk->arrow_array.length - state.chunk_offset); + gstate.lines_read += output_size; + + if (gstate.CanRemoveFilterColumns()) { + state.all_columns.Reset(); + state.all_columns.SetCardinality(output_size); + ArrowTableFunction::ArrowToDuckDB(state, data.arrow_table.GetColumns(), state.all_columns, + gstate.lines_read - output_size, false); + output.ReferenceColumns(state.all_columns, gstate.projection_ids); + } else { + output.SetCardinality(output_size); + ArrowTableFunction::ArrowToDuckDB(state, data.arrow_table.GetColumns(), output, + gstate.lines_read - output_size, false); + } + + if (!data.keep_wkb) { + // Find the geometry columns + for (idx_t col_idx = 0; col_idx < state.column_ids.size(); col_idx++) { + auto mapped_idx = state.column_ids[col_idx]; + if (data.geometry_column_ids.find(mapped_idx) != data.geometry_column_ids.end()) { + // Found a geometry column + // Convert the WKB columns to a geometry column + + Vector geom_vec(GeoTypes::GEOMETRY(), output_size); + state.ConvertWKB(output.data[col_idx], geom_vec, output_size); + + output.data[col_idx].ReferenceAndSetType(geom_vec); + } + } + } + + output.Verify(); + state.chunk_offset += output.size(); + } + + //------------------------------------------------------------------------------------------------------------------ + // Cardinality + //------------------------------------------------------------------------------------------------------------------ + static unique_ptr Cardinality(ClientContext &context, const FunctionData *data) { + auto &bind_data = data->Cast(); + auto result = make_uniq(); + + if (bind_data.has_approximate_feature_count) { + result->has_estimated_cardinality = true; + result->estimated_cardinality = bind_data.approximate_feature_count; + } + return result; + } + + //------------------------------------------------------------------------------------------------------------------ + // Replacement Scan + //------------------------------------------------------------------------------------------------------------------ + static unique_ptr ReplacementScan(ClientContext &, ReplacementScanInput &input, + optional_ptr) { + auto &table_name = input.table_name; + auto lower_name = StringUtil::Lower(table_name); + // Check if the table name ends with some common geospatial file extensions + if (StringUtil::EndsWith(lower_name, ".gpkg") || StringUtil::EndsWith(lower_name, ".fgb")) { + + auto table_function = make_uniq(); + vector> children; + children.push_back(make_uniq(Value(table_name))); + table_function->function = make_uniq("ST_Read", std::move(children)); + return std::move(table_function); + } + // else not something we can replace + return nullptr; + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DOCUMENTATION = R"( + Read and import a variety of geospatial file formats using the GDAL library. + + The `ST_Read` table function is based on the [GDAL](https://gdal.org/index.html) translator library and enables reading spatial data from a variety of geospatial vector file formats as if they were DuckDB tables. + + > See [ST_Drivers](#st_drivers) for a list of supported file formats and drivers. + + Except for the `path` parameter, all parameters are optional. + + | Parameter | Type | Description | + | --------- | -----| ----------- | + | `path` | VARCHAR | The path to the file to read. Mandatory | + | `sequential_layer_scan` | BOOLEAN | If set to true, the table function will scan through all layers sequentially and return the first layer that matches the given layer name. This is required for some drivers to work properly, e.g., the OSM driver. | + | `spatial_filter` | WKB_BLOB | If set to a WKB blob, the table function will only return rows that intersect with the given WKB geometry. Some drivers may support efficient spatial filtering natively, in which case it will be pushed down. Otherwise the filtering is done by GDAL which may be much slower. | + | `open_options` | VARCHAR[] | A list of key-value pairs that are passed to the GDAL driver to control the opening of the file. E.g., the GeoJSON driver supports a FLATTEN_NESTED_ATTRIBUTES=YES option to flatten nested attributes. | + | `layer` | VARCHAR | The name of the layer to read from the file. If NULL, the first layer is returned. Can also be a layer index (starting at 0). | + | `allowed_drivers` | VARCHAR[] | A list of GDAL driver names that are allowed to be used to open the file. If empty, all drivers are allowed. | + | `sibling_files` | VARCHAR[] | A list of sibling files that are required to open the file. E.g., the ESRI Shapefile driver requires a .shx file to be present. Although most of the time these can be discovered automatically. | + | `spatial_filter_box` | BOX_2D | If set to a BOX_2D, the table function will only return rows that intersect with the given bounding box. Similar to spatial_filter. | + | `keep_wkb` | BOOLEAN | If set, the table function will return geometries in a wkb_geometry column with the type WKB_BLOB (which can be cast to BLOB) instead of GEOMETRY. This is useful if you want to use DuckDB with more exotic geometry subtypes that DuckDB spatial doesnt support representing in the GEOMETRY type yet. | + + Note that GDAL is single-threaded, so this table function will not be able to make full use of parallelism. + + By using `ST_Read`, the spatial extension also provides “replacement scans” for common geospatial file formats, allowing you to query files of these formats as if they were tables directly. + + ```sql + SELECT * FROM './path/to/some/shapefile/dataset.shp'; + ``` + + In practice this is just syntax-sugar for calling ST_Read, so there is no difference in performance. If you want to pass additional options, you should use the ST_Read table function directly. + + The following formats are currently recognized by their file extension: + + | Format | Extension | + | ------ | --------- | + | ESRI ShapeFile | .shp | + | GeoPackage | .gpkg | + | FlatGeoBuf | .fgb | + )"; + + static constexpr auto EXAMPLE = R"( + -- Read a Shapefile + SELECT * FROM ST_Read('some/file/path/filename.shp'); + + -- Read a GeoJSON file + CREATE TABLE my_geojson_table AS SELECT * FROM ST_Read('some/file/path/filename.json'); + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + TableFunction func("ST_Read", {LogicalType::VARCHAR}, Execute, Bind, InitGlobal, InitLocal); + + func.cardinality = Cardinality; + func.get_partition_data = ArrowTableFunction::ArrowGetPartitionData; + + func.projection_pushdown = true; + + func.named_parameters["open_options"] = LogicalType::LIST(LogicalType::VARCHAR); + func.named_parameters["allowed_drivers"] = LogicalType::LIST(LogicalType::VARCHAR); + func.named_parameters["sibling_files"] = LogicalType::LIST(LogicalType::VARCHAR); + func.named_parameters["spatial_filter_box"] = GeoTypes::BOX_2D(); + func.named_parameters["spatial_filter"] = GeoTypes::WKB_BLOB(); + func.named_parameters["layer"] = LogicalType::VARCHAR; + func.named_parameters["sequential_layer_scan"] = LogicalType::BOOLEAN; + func.named_parameters["max_batch_size"] = LogicalType::INTEGER; + func.named_parameters["keep_wkb"] = LogicalType::BOOLEAN; + ExtensionUtil::RegisterFunction(db, func); + + FunctionBuilder::AddTableFunctionDocs(db, "ST_Read", DOCUMENTATION, EXAMPLE); + + // Replacement scan + auto &config = DBConfig::GetConfig(db); + config.replacement_scans.emplace_back(ReplacementScan); + } +}; + +//====================================================================================================================== +// ST_Read_Meta +//====================================================================================================================== +const auto GEOMETRY_FIELD_TYPE = LogicalType::STRUCT({ + {"name", LogicalType::VARCHAR}, + {"type", LogicalType::VARCHAR}, + {"nullable", LogicalType::BOOLEAN}, + {"crs", LogicalType::STRUCT({ + {"name", LogicalType::VARCHAR}, + {"auth_name", LogicalType::VARCHAR}, + {"auth_code", LogicalType::VARCHAR}, + {"wkt", LogicalType::VARCHAR}, + {"proj4", LogicalType::VARCHAR}, + {"projjson", LogicalType::VARCHAR}, + })}, +}); + +const auto STANDARD_FIELD_TYPE = LogicalType::STRUCT({ + {"name", LogicalType::VARCHAR}, + {"type", LogicalType::VARCHAR}, + {"subtype", LogicalType::VARCHAR}, + {"nullable", LogicalType::BOOLEAN}, + {"unique", LogicalType::BOOLEAN}, + {"width", LogicalType::BIGINT}, + {"precision", LogicalType::BIGINT}, +}); + +const auto LAYER_TYPE = LogicalType::STRUCT({ + {"name", LogicalType::VARCHAR}, + {"feature_count", LogicalType::BIGINT}, + {"geometry_fields", LogicalType::LIST(GEOMETRY_FIELD_TYPE)}, + {"fields", LogicalType::LIST(STANDARD_FIELD_TYPE)}, +}); + +struct ST_Read_Meta { + + //------------------------------------------------------------------------------------------------------------------ + // Bind + //------------------------------------------------------------------------------------------------------------------ + struct BindData final : TableFunctionData { + vector file_names; + + explicit BindData(vector file_names_p) : file_names(std::move(file_names_p)) { + } + }; + + static unique_ptr Bind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + + names.push_back("file_name"); + return_types.push_back(LogicalType::VARCHAR); + + names.push_back("driver_short_name"); + return_types.push_back(LogicalType::VARCHAR); + + names.push_back("driver_long_name"); + return_types.push_back(LogicalType::VARCHAR); + + names.push_back("layers"); + return_types.push_back(LogicalType::LIST(LAYER_TYPE)); + + // TODO: Add metadata, domains, relationships + + // Get the filename list + const auto mfreader = MultiFileReader::Create(input.table_function); + const auto mflist = mfreader->CreateFileList(context, input.inputs[0], FileGlobOptions::ALLOW_EMPTY); + return make_uniq_base(mflist->GetAllFiles()); + } + + //------------------------------------------------------------------------------------------------------------------ + // Init + //------------------------------------------------------------------------------------------------------------------ + struct State final : GlobalTableFunctionState { + idx_t current_idx; + explicit State() : current_idx(0) { + } + }; + + static unique_ptr Init(ClientContext &context, TableFunctionInitInput &input) { + return make_uniq_base(); + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute + //------------------------------------------------------------------------------------------------------------------ + static Value GetLayerData(const GDALDatasetUniquePtr &dataset) { + + vector layer_values; + for (const auto &layer : dataset->GetLayers()) { + child_list_t layer_value_fields; + + layer_value_fields.emplace_back("name", Value(layer->GetName())); + layer_value_fields.emplace_back("feature_count", Value(static_cast(layer->GetFeatureCount()))); + + vector geometry_fields; + for (const auto &field : layer->GetLayerDefn()->GetGeomFields()) { + child_list_t geometry_field_value_fields; + auto field_name = field->GetNameRef(); + if (std::strlen(field_name) == 0) { + field_name = "geom"; + } + geometry_field_value_fields.emplace_back("name", Value(field_name)); + geometry_field_value_fields.emplace_back("type", Value(OGRGeometryTypeToName(field->GetType()))); + geometry_field_value_fields.emplace_back("nullable", Value(static_cast(field->IsNullable()))); + + const auto crs = field->GetSpatialRef(); + if (crs != nullptr) { + child_list_t crs_value_fields; + crs_value_fields.emplace_back("name", Value(crs->GetName())); + crs_value_fields.emplace_back("auth_name", Value(crs->GetAuthorityName(nullptr))); + crs_value_fields.emplace_back("auth_code", Value(crs->GetAuthorityCode(nullptr))); + + char *wkt_ptr = nullptr; + crs->exportToWkt(&wkt_ptr); + crs_value_fields.emplace_back("wkt", wkt_ptr ? Value(wkt_ptr) : Value()); + CPLFree(wkt_ptr); + + char *proj4_ptr = nullptr; + crs->exportToProj4(&proj4_ptr); + crs_value_fields.emplace_back("proj4", proj4_ptr ? Value(proj4_ptr) : Value()); + CPLFree(proj4_ptr); + + char *projjson_ptr = nullptr; + crs->exportToPROJJSON(&projjson_ptr, nullptr); + crs_value_fields.emplace_back("projjson", projjson_ptr ? Value(projjson_ptr) : Value()); + CPLFree(projjson_ptr); + + geometry_field_value_fields.emplace_back("crs", Value::STRUCT(crs_value_fields)); + } + + geometry_fields.push_back(Value::STRUCT(geometry_field_value_fields)); + } + layer_value_fields.emplace_back("geometry_fields", + Value::LIST(GEOMETRY_FIELD_TYPE, std::move(geometry_fields))); + + vector standard_fields; + for (const auto &field : layer->GetLayerDefn()->GetFields()) { + child_list_t standard_field_value_fields; + standard_field_value_fields.emplace_back("name", Value(field->GetNameRef())); + standard_field_value_fields.emplace_back("type", Value(OGR_GetFieldTypeName(field->GetType()))); + standard_field_value_fields.emplace_back("subtype", + Value(OGR_GetFieldSubTypeName(field->GetSubType()))); + standard_field_value_fields.emplace_back("nullable", Value(field->IsNullable())); + standard_field_value_fields.emplace_back("unique", Value(field->IsUnique())); + standard_field_value_fields.emplace_back("width", Value(field->GetWidth())); + standard_field_value_fields.emplace_back("precision", Value(field->GetPrecision())); + standard_fields.push_back(Value::STRUCT(standard_field_value_fields)); + } + layer_value_fields.emplace_back("fields", Value::LIST(STANDARD_FIELD_TYPE, std::move(standard_fields))); + + layer_values.push_back(Value::STRUCT(layer_value_fields)); + } + + return Value::LIST(LAYER_TYPE, std::move(layer_values)); + } + + static void Execute(ClientContext &context, TableFunctionInput &input, DataChunk &output) { + auto &bind_data = input.bind_data->Cast(); + auto &state = input.global_state->Cast(); + + auto out_size = MinValue(STANDARD_VECTOR_SIZE, bind_data.file_names.size() - state.current_idx); + + for (idx_t out_idx = 0; out_idx < out_size; out_idx++, state.current_idx++) { + auto file_name = bind_data.file_names[state.current_idx]; + auto prefixed_file_name = GDALClientContextState::GetOrCreate(context).GetPrefix(file_name); + + GDALDatasetUniquePtr dataset; + try { + dataset = GDALDatasetUniquePtr( + GDALDataset::Open(prefixed_file_name.c_str(), GDAL_OF_VECTOR | GDAL_OF_VERBOSE_ERROR)); + } catch (...) { + // Just skip anything we cant open + out_idx--; + out_size--; + continue; + } + + output.data[0].SetValue(out_idx, file_name); + output.data[1].SetValue(out_idx, dataset->GetDriver()->GetDescription()); + output.data[2].SetValue(out_idx, dataset->GetDriver()->GetMetadataItem(GDAL_DMD_LONGNAME)); + output.data[3].SetValue(out_idx, GetLayerData(dataset)); + } + + output.SetCardinality(out_size); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + // static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}}; + + static constexpr auto DESCRIPTION = R"( + Read the metadata from a variety of geospatial file formats using the GDAL library. + + The `ST_Read_Meta` table function accompanies the `ST_Read` table function, but instead of reading the contents of a file, this function scans the metadata instead. + Since the data model of the underlying GDAL library is quite flexible, most of the interesting metadata is within the returned `layers` column, which is a somewhat complex nested structure of DuckDB `STRUCT` and `LIST` types. + )"; + + static constexpr auto EXAMPLE = R"( + -- Find the coordinate reference system authority name and code for the first layers first geometry column in the file + SELECT + layers[1].geometry_fields[1].crs.auth_name as name, + layers[1].geometry_fields[1].crs.auth_code as code + FROM st_read_meta('../../tmp/data/amsterdam_roads.fgb'); + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + const TableFunction func("ST_Read_Meta", {LogicalType::VARCHAR}, Execute, Bind, Init); + ExtensionUtil::RegisterFunction(db, MultiFileReader::CreateFunctionSet(func)); + + FunctionBuilder::AddTableFunctionDocs(db, "ST_Read_Meta", DESCRIPTION, EXAMPLE); + } +}; + +//====================================================================================================================== +// ST_Drivers +//====================================================================================================================== + +struct ST_Drivers { + + //------------------------------------------------------------------------------------------------------------------ + // Bind + //------------------------------------------------------------------------------------------------------------------ + struct BindData final : TableFunctionData { + idx_t driver_count; + explicit BindData(const idx_t driver_count_p) : driver_count(driver_count_p) { + } + }; + + static unique_ptr Bind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + + return_types.emplace_back(LogicalType::VARCHAR); + return_types.emplace_back(LogicalType::VARCHAR); + return_types.emplace_back(LogicalType::BOOLEAN); + return_types.emplace_back(LogicalType::BOOLEAN); + return_types.emplace_back(LogicalType::BOOLEAN); + return_types.emplace_back(LogicalType::VARCHAR); + names.emplace_back("short_name"); + names.emplace_back("long_name"); + names.emplace_back("can_create"); + names.emplace_back("can_copy"); + names.emplace_back("can_open"); + names.emplace_back("help_url"); + + return make_uniq_base(GDALGetDriverCount()); + } + + //------------------------------------------------------------------------------------------------------------------ + // Init + //------------------------------------------------------------------------------------------------------------------ + struct State final : GlobalTableFunctionState { + idx_t current_idx; + explicit State() : current_idx(0) { + } + }; + + static unique_ptr Init(ClientContext &context, TableFunctionInitInput &input) { + return make_uniq_base(); + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute + //------------------------------------------------------------------------------------------------------------------ + static void Execute(ClientContext &context, TableFunctionInput &input, DataChunk &output) { + auto &state = input.global_state->Cast(); + auto &bind_data = input.bind_data->Cast(); + + idx_t count = 0; + auto next_idx = MinValue(state.current_idx + STANDARD_VECTOR_SIZE, bind_data.driver_count); + + for (; state.current_idx < next_idx; state.current_idx++) { + auto driver = GDALGetDriver(static_cast(state.current_idx)); + + // Check if the driver is a vector driver + if (GDALGetMetadataItem(driver, GDAL_DCAP_VECTOR, nullptr) == nullptr) { + continue; + } + + auto short_name = Value::CreateValue(GDALGetDriverShortName(driver)); + auto long_name = Value::CreateValue(GDALGetDriverLongName(driver)); + + const char *create_flag = GDALGetMetadataItem(driver, GDAL_DCAP_CREATE, nullptr); + auto create_value = Value::CreateValue(create_flag != nullptr); + + const char *copy_flag = GDALGetMetadataItem(driver, GDAL_DCAP_CREATECOPY, nullptr); + auto copy_value = Value::CreateValue(copy_flag != nullptr); + const char *open_flag = GDALGetMetadataItem(driver, GDAL_DCAP_OPEN, nullptr); + auto open_value = Value::CreateValue(open_flag != nullptr); + + auto help_topic_flag = GDALGetDriverHelpTopic(driver); + auto help_topic_value = help_topic_flag == nullptr + ? Value(LogicalType::VARCHAR) + : Value(StringUtil::Format("https://gdal.org/%s", help_topic_flag)); + + output.data[0].SetValue(count, short_name); + output.data[1].SetValue(count, long_name); + output.data[2].SetValue(count, create_value); + output.data[3].SetValue(count, copy_value); + output.data[4].SetValue(count, open_value); + output.data[5].SetValue(count, help_topic_value); + count++; + } + output.SetCardinality(count); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + + // static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}}; + + static constexpr auto DESCRIPTION = R"( + Returns the list of supported GDAL drivers and file formats + + Note that far from all of these drivers have been tested properly. + Some may require additional options to be passed to work as expected. + If you run into any issues please first consult the [consult the GDAL docs](https://gdal.org/drivers/vector/index.html). + )"; + + static constexpr auto EXAMPLE = R"( + SELECT * FROM ST_Drivers(); + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + const TableFunction func("ST_Drivers", {}, Execute, Bind, Init); + ExtensionUtil::RegisterFunction(db, func); + + FunctionBuilder::AddTableFunctionDocs(db, "ST_Drivers", DESCRIPTION, EXAMPLE); + } +}; + +//====================================================================================================================== +// ST_Write +//====================================================================================================================== +// TODO: This currently uses slow "Value" row-by-row conversions. GDAL now supports writing through arrow, so we should +// move into using that in the future. + +struct ST_Write { + + //------------------------------------------------------------------------------------------------------------------ + // Bind + //------------------------------------------------------------------------------------------------------------------ + struct BindData final : TableFunctionData { + + string file_path; + vector field_sql_types; + vector field_names; + string driver_name; + string layer_name; + CPLStringList dataset_creation_options; + CPLStringList layer_creation_options; + string target_srs; + OGRwkbGeometryType geometry_type = wkbUnknown; + + BindData(string file_path, vector field_sql_types, vector field_names) + : file_path(std::move(file_path)), field_sql_types(std::move(field_sql_types)), + field_names(std::move(field_names)) { + } + }; + + static unique_ptr Bind(ClientContext &context, CopyFunctionBindInput &input, + const vector &names, const vector &sql_types) { + + auto bind_data = make_uniq(input.info.file_path, sql_types, names); + + // check all the options in the copy info + // and set + for (auto &option : input.info.options) { + if (StringUtil::Upper(option.first) == "DRIVER") { + auto set = option.second.front(); + if (set.type().id() == LogicalTypeId::VARCHAR) { + bind_data->driver_name = set.GetValue(); + } else { + throw BinderException("Driver name must be a string"); + } + } else if (StringUtil::Upper(option.first) == "LAYER_NAME") { + auto set = option.second.front(); + if (set.type().id() == LogicalTypeId::VARCHAR) { + bind_data->layer_name = set.GetValue(); + } else { + throw BinderException("Layer name must be a string"); + } + } else if (StringUtil::Upper(option.first) == "LAYER_CREATION_OPTIONS") { + auto set = option.second; + for (auto &s : set) { + if (s.type().id() != LogicalTypeId::VARCHAR) { + throw BinderException("Layer creation options must be strings"); + } + auto str = s.GetValue(); + bind_data->layer_creation_options.AddString(str.c_str()); + } + } else if (StringUtil::Upper(option.first) == "DATASET_CREATION_OPTIONS") { + auto set = option.second; + for (auto &s : set) { + if (s.type().id() != LogicalTypeId::VARCHAR) { + throw BinderException("Dataset creation options must be strings"); + } + auto str = s.GetValue(); + bind_data->dataset_creation_options.AddString(str.c_str()); + } + } else if (StringUtil::Upper(option.first) == "GEOMETRY_TYPE") { + auto &set = option.second.front(); + if (set.type().id() == LogicalTypeId::VARCHAR) { + auto type = set.GetValue(); + if (StringUtil::CIEquals(type, "POINT")) { + bind_data->geometry_type = wkbPoint; + } else if (StringUtil::CIEquals(type, "LINESTRING")) { + bind_data->geometry_type = wkbLineString; + } else if (StringUtil::CIEquals(type, "POLYGON")) { + bind_data->geometry_type = wkbPolygon; + } else if (StringUtil::CIEquals(type, "MULTIPOINT")) { + bind_data->geometry_type = wkbMultiPoint; + } else if (StringUtil::CIEquals(type, "MULTILINESTRING")) { + bind_data->geometry_type = wkbMultiLineString; + } else if (StringUtil::CIEquals(type, "MULTIPOLYGON")) { + bind_data->geometry_type = wkbMultiPolygon; + } else if (StringUtil::CIEquals(type, "GEOMETRYCOLLECTION")) { + bind_data->geometry_type = wkbGeometryCollection; + } else { + throw BinderException("Unknown geometry type '%s', expected one of 'POINT', 'LINESTRING', " + "'POLYGON', 'MULTIPOINT', " + "'MULTILINESTRING', 'MULTIPOLYGON', 'GEOMETRYCOLLECTION'", + type); + } + } else { + throw BinderException("Geometry type must be a string"); + } + } else if (StringUtil::Upper(option.first) == "SRS") { + auto &set = option.second.front(); + if (set.type().id() == LogicalTypeId::VARCHAR) { + bind_data->target_srs = set.GetValue(); + } else { + throw BinderException("SRS must be a string"); + } + } else { + throw BinderException("Unknown option '%s'", option.first); + } + // save dataset open options.. i guess? + } + + if (bind_data->driver_name.empty()) { + throw BinderException("Driver name must be specified"); + } + + if (bind_data->layer_name.empty()) { + // Default to the base name of the file + auto &fs = FileSystem::GetFileSystem(context); + bind_data->layer_name = fs.ExtractBaseName(bind_data->file_path); + } + + auto driver = GetGDALDriverManager()->GetDriverByName(bind_data->driver_name.c_str()); + if (!driver) { + throw BinderException("Unknown driver '%s'", bind_data->driver_name); + } + + // Try get the file extension from the driver + auto file_ext = driver->GetMetadataItem(GDAL_DMD_EXTENSION); + if (file_ext) { + input.file_extension = file_ext; + } else { + // Space separated list of file extensions + auto file_exts = driver->GetMetadataItem(GDAL_DMD_EXTENSIONS); + if (file_exts) { + auto exts = StringUtil::Split(file_exts, ' '); + if (!exts.empty()) { + input.file_extension = exts[0]; + } + } + } + + // Driver specific checks + if (bind_data->driver_name == "OpenFileGDB" && bind_data->geometry_type == wkbUnknown) { + throw BinderException("OpenFileGDB requires 'GEOMETRY_TYPE' parameter to be set when writing!"); + } + + return std::move(bind_data); + } + + //------------------------------------------------------------------------------------------------------------------ + // Global State + //------------------------------------------------------------------------------------------------------------------ + struct GlobalState final : GlobalFunctionData { + mutex lock; + GDALDatasetUniquePtr dataset; + OGRLayer *layer; + vector> field_defs; + + GlobalState(GDALDatasetUniquePtr dataset, OGRLayer *layer, vector> field_defs) + : dataset(std::move(dataset)), layer(layer), field_defs(std::move(field_defs)) { + } + }; + + static bool IsGeometryType(const LogicalType &type) { + return type == GeoTypes::WKB_BLOB() || type == GeoTypes::POINT_2D() || type == GeoTypes::GEOMETRY(); + } + + static unique_ptr OGRFieldTypeFromLogicalType(const string &name, const LogicalType &type) { + // TODO: Set OGRFieldSubType for integers and integer lists + // TODO: Set string width? + + switch (type.id()) { + case LogicalTypeId::BOOLEAN: { + auto field = make_uniq(name.c_str(), OFTInteger); + field->SetSubType(OFSTBoolean); + return field; + } + case LogicalTypeId::TINYINT: { + // There is no subtype for byte? + return make_uniq(name.c_str(), OFTInteger); + } + case LogicalTypeId::SMALLINT: { + auto field = make_uniq(name.c_str(), OFTInteger); + field->SetSubType(OFSTInt16); + return field; + } + case LogicalTypeId::INTEGER: { + return make_uniq(name.c_str(), OFTInteger); + } + case LogicalTypeId::BIGINT: + return make_uniq(name.c_str(), OFTInteger64); + case LogicalTypeId::FLOAT: { + auto field = make_uniq(name.c_str(), OFTReal); + field->SetSubType(OFSTFloat32); + return field; + } + case LogicalTypeId::DOUBLE: + return make_uniq(name.c_str(), OFTReal); + case LogicalTypeId::VARCHAR: + return make_uniq(name.c_str(), OFTString); + case LogicalTypeId::BLOB: + return make_uniq(name.c_str(), OFTBinary); + case LogicalTypeId::DATE: + return make_uniq(name.c_str(), OFTDate); + case LogicalTypeId::TIME: + return make_uniq(name.c_str(), OFTTime); + case LogicalTypeId::TIMESTAMP: + case LogicalTypeId::TIMESTAMP_NS: + case LogicalTypeId::TIMESTAMP_MS: + case LogicalTypeId::TIMESTAMP_SEC: + case LogicalTypeId::TIMESTAMP_TZ: + return make_uniq(name.c_str(), OFTDateTime); + case LogicalTypeId::LIST: { + auto child_type = ListType::GetChildType(type); + switch (child_type.id()) { + case LogicalTypeId::BOOLEAN: { + auto field = make_uniq(name.c_str(), OFTIntegerList); + field->SetSubType(OFSTBoolean); + return field; + } + case LogicalTypeId::TINYINT: { + // There is no subtype for byte? + return make_uniq(name.c_str(), OFTIntegerList); + } + case LogicalTypeId::SMALLINT: { + auto field = make_uniq(name.c_str(), OFTIntegerList); + field->SetSubType(OFSTInt16); + return field; + } + case LogicalTypeId::INTEGER: + return make_uniq(name.c_str(), OFTIntegerList); + case LogicalTypeId::BIGINT: + return make_uniq(name.c_str(), OFTInteger64List); + case LogicalTypeId::FLOAT: { + auto field = make_uniq(name.c_str(), OFTRealList); + field->SetSubType(OFSTFloat32); + return field; + } + case LogicalTypeId::DOUBLE: + return make_uniq(name.c_str(), OFTRealList); + case LogicalTypeId::VARCHAR: + return make_uniq(name.c_str(), OFTStringList); + default: + throw NotImplementedException("Unsupported type for OGR: %s", type.ToString()); + } + } + default: + throw NotImplementedException("Unsupported type for OGR: %s", type.ToString()); + } + } + + static unique_ptr InitGlobal(ClientContext &context, FunctionData &bind_data, + const string &file_path) { + + auto &gdal_data = bind_data.Cast(); + GDALDriver *driver = GetGDALDriverManager()->GetDriverByName(gdal_data.driver_name.c_str()); + if (!driver) { + throw IOException("Could not open driver"); + } + + // Create the dataset + auto &client_ctx = GDALClientContextState::GetOrCreate(context); + auto prefixed_path = client_ctx.GetPrefix(file_path); + auto dataset = GDALDatasetUniquePtr( + driver->Create(prefixed_path.c_str(), 0, 0, 0, GDT_Unknown, gdal_data.dataset_creation_options)); + if (!dataset) { + throw IOException("Could not open dataset"); + } + + // Set the SRS if provided + OGRSpatialReference srs; + if (!gdal_data.target_srs.empty()) { + srs.SetFromUserInput(gdal_data.target_srs.c_str()); + } + // Not all GDAL drivers check if the SRS is empty (cough cough GeoJSONSeq) + // so we have to pass nullptr if we want the default behavior. + OGRSpatialReference *srs_ptr = gdal_data.target_srs.empty() ? nullptr : &srs; + + auto layer = dataset->CreateLayer(gdal_data.layer_name.c_str(), srs_ptr, gdal_data.geometry_type, + gdal_data.layer_creation_options); + if (!layer) { + throw IOException("Could not create layer"); + } + + // Create the layer field definitions + idx_t geometry_field_count = 0; + vector> field_defs; + for (idx_t i = 0; i < gdal_data.field_names.size(); i++) { + auto &name = gdal_data.field_names[i]; + auto &type = gdal_data.field_sql_types[i]; + + if (IsGeometryType(type)) { + geometry_field_count++; + if (geometry_field_count > 1) { + throw NotImplementedException("Multiple geometry fields not supported yet"); + } + } else { + auto field = OGRFieldTypeFromLogicalType(name, type); + if (layer->CreateField(field.get()) != OGRERR_NONE) { + throw IOException("Could not create attribute field"); + } + // TODO: ^ Like we do here vvv + field_defs.push_back(std::move(field)); + } + } + auto global_data = make_uniq(std::move(dataset), layer, std::move(field_defs)); + + return std::move(global_data); + } + + //------------------------------------------------------------------------------------------------------------------ + // Local State + //------------------------------------------------------------------------------------------------------------------ + struct LocalState final : public LocalFunctionData { + ArenaAllocator arena; + explicit LocalState(ClientContext &context) : arena(BufferAllocator::Get(context)) { + } + }; + + static unique_ptr InitLocal(ExecutionContext &context, FunctionData &bind_data) { + auto local_data = make_uniq(context.client); + return std::move(local_data); + } + + //------------------------------------------------------------------------------------------------------------------ + // Sink + //------------------------------------------------------------------------------------------------------------------ + static OGRGeometryUniquePtr OGRGeometryFromValue(const LogicalType &type, const Value &value, + ArenaAllocator &arena) { + if (value.IsNull()) { + return nullptr; + } + + if (type == GeoTypes::WKB_BLOB()) { + const auto str = value.GetValueUnsafe(); + OGRGeometry *ptr; + size_t consumed; + const auto ok = OGRGeometryFactory::createFromWkb(str.GetDataUnsafe(), nullptr, &ptr, str.GetSize(), + wkbVariantIso, consumed); + + if (ok != OGRERR_NONE) { + throw IOException("Could not parse WKB"); + } + return OGRGeometryUniquePtr(ptr); + } + + if (type == GeoTypes::GEOMETRY()) { + const auto blob = value.GetValueUnsafe(); + uint32_t size; + const auto wkb = WKBWriter::Write(blob, &size, arena); + OGRGeometry *ptr; + const auto ok = OGRGeometryFactory::createFromWkb(wkb, nullptr, &ptr, size, wkbVariantIso); + if (ok != OGRERR_NONE) { + throw IOException("Could not parse WKB"); + } + return OGRGeometryUniquePtr(ptr); + } + + if (type == GeoTypes::POINT_2D()) { + auto children = StructValue::GetChildren(value); + auto x = children[0].GetValue(); + auto y = children[1].GetValue(); + auto ogr_point = new OGRPoint(x, y); + return OGRGeometryUniquePtr(ogr_point); + } + + throw NotImplementedException("Unsupported geometry type"); + } + + static void SetOgrFieldFromValue(OGRFeature *feature, int field_idx, const LogicalType &type, const Value &value) { + // TODO: Set field by index always instead of by name for performance. + if (value.IsNull()) { + feature->SetFieldNull(field_idx); + return; + } + switch (type.id()) { + case LogicalTypeId::BOOLEAN: + feature->SetField(field_idx, value.GetValue()); + break; + case LogicalTypeId::TINYINT: + feature->SetField(field_idx, value.GetValue()); + break; + case LogicalTypeId::SMALLINT: + feature->SetField(field_idx, value.GetValue()); + break; + case LogicalTypeId::INTEGER: + feature->SetField(field_idx, value.GetValue()); + break; + case LogicalTypeId::BIGINT: + feature->SetField(field_idx, (GIntBig)value.GetValue()); + break; + case LogicalTypeId::FLOAT: + feature->SetField(field_idx, value.GetValue()); + break; + case LogicalTypeId::DOUBLE: + feature->SetField(field_idx, value.GetValue()); + break; + case LogicalTypeId::VARCHAR: + case LogicalTypeId::BLOB: { + auto str = value.GetValueUnsafe(); + feature->SetField(field_idx, (int)str.GetSize(), str.GetDataUnsafe()); + } break; + case LogicalTypeId::DATE: { + auto date = value.GetValueUnsafe(); + auto year = Date::ExtractYear(date); + auto month = Date::ExtractMonth(date); + auto day = Date::ExtractDay(date); + feature->SetField(field_idx, year, month, day, 0, 0, 0, 0); + } break; + case LogicalTypeId::TIME: { + auto time = value.GetValueUnsafe(); + auto hour = static_cast(time.micros / Interval::MICROS_PER_HOUR); + auto minute = static_cast((time.micros % Interval::MICROS_PER_HOUR) / Interval::MICROS_PER_MINUTE); + auto second = static_cast(static_cast(time.micros % Interval::MICROS_PER_MINUTE) / + static_cast(Interval::MICROS_PER_SEC)); + feature->SetField(field_idx, 0, 0, 0, hour, minute, second, 0); + } break; + case LogicalTypeId::TIMESTAMP: { + auto timestamp = value.GetValueUnsafe(); + auto date = Timestamp::GetDate(timestamp); + auto time = Timestamp::GetTime(timestamp); + auto year = Date::ExtractYear(date); + auto month = Date::ExtractMonth(date); + auto day = Date::ExtractDay(date); + auto hour = static_cast((time.micros % Interval::MICROS_PER_DAY) / Interval::MICROS_PER_HOUR); + auto minute = static_cast((time.micros % Interval::MICROS_PER_HOUR) / Interval::MICROS_PER_MINUTE); + auto second = static_cast(static_cast(time.micros % Interval::MICROS_PER_MINUTE) / + static_cast(Interval::MICROS_PER_SEC)); + feature->SetField(field_idx, year, month, day, hour, minute, second, 0); + } break; + case LogicalTypeId::TIMESTAMP_NS: { + auto timestamp = value.GetValueUnsafe(); + timestamp = Timestamp::FromEpochNanoSeconds(timestamp.value); + auto date = Timestamp::GetDate(timestamp); + auto time = Timestamp::GetTime(timestamp); + auto year = Date::ExtractYear(date); + auto month = Date::ExtractMonth(date); + auto day = Date::ExtractDay(date); + auto hour = static_cast((time.micros % Interval::MICROS_PER_DAY) / Interval::MICROS_PER_HOUR); + auto minute = static_cast((time.micros % Interval::MICROS_PER_HOUR) / Interval::MICROS_PER_MINUTE); + auto second = static_cast(static_cast(time.micros % Interval::MICROS_PER_MINUTE) / + static_cast(Interval::MICROS_PER_SEC)); + feature->SetField(field_idx, year, month, day, hour, minute, second, 0); + } break; + case LogicalTypeId::TIMESTAMP_MS: { + auto timestamp = value.GetValueUnsafe(); + timestamp = Timestamp::FromEpochMs(timestamp.value); + auto date = Timestamp::GetDate(timestamp); + auto time = Timestamp::GetTime(timestamp); + auto year = Date::ExtractYear(date); + auto month = Date::ExtractMonth(date); + auto day = Date::ExtractDay(date); + auto hour = static_cast((time.micros % Interval::MICROS_PER_DAY) / Interval::MICROS_PER_HOUR); + auto minute = static_cast((time.micros % Interval::MICROS_PER_HOUR) / Interval::MICROS_PER_MINUTE); + auto second = static_cast(static_cast(time.micros % Interval::MICROS_PER_MINUTE) / + static_cast(Interval::MICROS_PER_SEC)); + feature->SetField(field_idx, year, month, day, hour, minute, second, 0); + } break; + case LogicalTypeId::TIMESTAMP_SEC: { + auto timestamp = value.GetValueUnsafe(); + timestamp = Timestamp::FromEpochSeconds(timestamp.value); + auto date = Timestamp::GetDate(timestamp); + auto time = Timestamp::GetTime(timestamp); + auto year = Date::ExtractYear(date); + auto month = Date::ExtractMonth(date); + auto day = Date::ExtractDay(date); + auto hour = static_cast((time.micros % Interval::MICROS_PER_DAY) / Interval::MICROS_PER_HOUR); + auto minute = static_cast((time.micros % Interval::MICROS_PER_HOUR) / Interval::MICROS_PER_MINUTE); + auto second = static_cast(static_cast(time.micros % Interval::MICROS_PER_MINUTE) / + static_cast(Interval::MICROS_PER_SEC)); + feature->SetField(field_idx, year, month, day, hour, minute, second, 0); + } break; + case LogicalTypeId::TIMESTAMP_TZ: { + // Not sure what to with the timezone, just let GDAL parse it? + auto timestamp = value.GetValueUnsafe(); + auto time_str = Timestamp::ToString(timestamp); + feature->SetField(field_idx, time_str.c_str()); + } break; + default: + // TODO: Handle list types + throw NotImplementedException("Unsupported field type"); + } + } + + static void Sink(ExecutionContext &context, FunctionData &bdata, GlobalFunctionData &gstate, + LocalFunctionData &lstate, DataChunk &input) { + + auto &bind_data = bdata.Cast(); + auto &global_state = gstate.Cast(); + auto &local_state = lstate.Cast(); + local_state.arena.Reset(); + + lock_guard d_lock(global_state.lock); + auto layer = global_state.layer; + + // Create the feature + input.Flatten(); + for (idx_t row_idx = 0; row_idx < input.size(); row_idx++) { + + auto feature = OGRFeatureUniquePtr(OGRFeature::CreateFeature(layer->GetLayerDefn())); + + // Geometry fields do not count towards the field index, so we need to keep track of them separately. + idx_t field_idx = 0; + for (idx_t col_idx = 0; col_idx < input.ColumnCount(); col_idx++) { + auto &type = bind_data.field_sql_types[col_idx]; + auto value = input.GetValue(col_idx, row_idx); + + if (IsGeometryType(type)) { + // TODO: check how many geometry fields there are and use the correct one. + auto geom = OGRGeometryFromValue(type, value, local_state.arena); + if (geom && bind_data.geometry_type != wkbUnknown && + geom->getGeometryType() != bind_data.geometry_type) { + auto got_name = StringUtil::Replace( + StringUtil::Upper(OGRGeometryTypeToName(geom->getGeometryType())), " ", ""); + auto expected_name = StringUtil::Replace( + StringUtil::Upper(OGRGeometryTypeToName(bind_data.geometry_type)), " ", ""); + throw InvalidInputException( + "Expected all geometries to be of type '%s', but got one of type '%s'", expected_name, + got_name); + } + + if (feature->SetGeometry(geom.get()) != OGRERR_NONE) { + throw IOException("Could not set geometry"); + } + } else { + SetOgrFieldFromValue(feature.get(), static_cast(field_idx), type, value); + field_idx++; + } + } + if (layer->CreateFeature(feature.get()) != OGRERR_NONE) { + throw IOException("Could not create feature"); + } + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Combine + //------------------------------------------------------------------------------------------------------------------ + static void Combine(ExecutionContext &context, FunctionData &bind_data, GlobalFunctionData &gstate, + LocalFunctionData &lstate) { + } + + //------------------------------------------------------------------------------------------------------------------ + // Finalize + //------------------------------------------------------------------------------------------------------------------ + static void Finalize(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate) { + const auto &global_state = gstate.Cast(); + global_state.dataset->FlushCache(); + global_state.dataset->Close(); + } + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + CopyFunction info("GDAL"); + info.copy_to_bind = Bind; + info.copy_to_initialize_local = InitLocal; + info.copy_to_initialize_global = InitGlobal; + info.copy_to_sink = Sink; + info.copy_to_combine = Combine; + info.copy_to_finalize = Finalize; + info.extension = "gdal"; + ExtensionUtil::RegisterFunction(db, info); + } +}; + +} // namespace + +//###################################################################################################################### +// Register Module +//###################################################################################################################### +void RegisterGDALModule(DatabaseInstance &db) { + + // Load GDAL (once) + static std::once_flag loaded; + std::call_once(loaded, [&]() { + // Register all embedded drivers (dont go looking for plugins) + OGRRegisterAllInternal(); + + // Set GDAL error handler + CPLSetErrorHandler([](CPLErr e, int code, const char *raw_msg) { + // DuckDB doesnt do warnings, so we only throw on errors + if (e != CE_Failure && e != CE_Fatal) { + return; + } + + // If the error contains a /vsiduckdb-/ prefix, + // try to strip it off to make the errors more readable + auto msg = string(raw_msg); + auto path_pos = msg.find("/vsiduckdb-"); + if (path_pos != string::npos) { + // We found a path, strip it off + msg.erase(path_pos, 48); + } + + switch (code) { + case CPLE_NoWriteAccess: + throw PermissionException("GDAL Error (%d): %s", code, msg); + case CPLE_UserInterrupt: + throw InterruptException(); + case CPLE_OutOfMemory: + throw OutOfMemoryException("GDAL Error (%d): %s", code, msg); + case CPLE_NotSupported: + throw NotImplementedException("GDAL Error (%d): %s", code, msg); + case CPLE_AssertionFailed: + case CPLE_ObjectNull: + throw InternalException("GDAL Error (%d): %s", code, msg); + case CPLE_IllegalArg: + throw InvalidInputException("GDAL Error (%d): %s", code, msg); + case CPLE_AppDefined: + case CPLE_HttpResponse: + case CPLE_FileIO: + case CPLE_OpenFailed: + default: + throw IOException("GDAL Error (%d): %s", code, msg); + } + }); + }); + + ST_Read::Register(db); + ST_Read_Meta::Register(db); + ST_Drivers::Register(db); + ST_Write::Register(db); +} + +} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/modules/gdal/gdal_module.hpp b/src/spatial/modules/gdal/gdal_module.hpp new file mode 100644 index 00000000..8485d529 --- /dev/null +++ b/src/spatial/modules/gdal/gdal_module.hpp @@ -0,0 +1,9 @@ +#pragma once + +namespace duckdb { + +class DatabaseInstance; + +void RegisterGDALModule(DatabaseInstance &db); + +} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/modules/geos/CMakeLists.txt b/src/spatial/modules/geos/CMakeLists.txt new file mode 100644 index 00000000..383f808d --- /dev/null +++ b/src/spatial/modules/geos/CMakeLists.txt @@ -0,0 +1,5 @@ +set(EXTENSION_SOURCES + ${EXTENSION_SOURCES} + ${CMAKE_CURRENT_SOURCE_DIR}/geos_module.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/geos_serde.cpp + PARENT_SCOPE) \ No newline at end of file diff --git a/src/spatial/modules/geos/geos_geometry.hpp b/src/spatial/modules/geos/geos_geometry.hpp new file mode 100644 index 00000000..df5c9eb4 --- /dev/null +++ b/src/spatial/modules/geos/geos_geometry.hpp @@ -0,0 +1,389 @@ +#pragma once + +#include "geos_c.h" + +namespace duckdb { + +class GeosGeometry; +class PreparedGeosGeometry; + +class GeosGeometry { + friend class PreparedGeosGeometry; + +public: + // constructor + GeosGeometry(GEOSContextHandle_t handle_p, GEOSGeometry *geom_p); + + // disable copy + GeosGeometry(const GeosGeometry &) = delete; + GeosGeometry &operator=(const GeosGeometry &) = delete; + + // support move + GeosGeometry(GeosGeometry &&other) noexcept; + GeosGeometry &operator=(GeosGeometry &&other) noexcept; + + // destructor + ~GeosGeometry(); + +public: + GEOSGeomTypes type() const; + const GEOSGeometry *get_raw() const; + + bool is_simple() const; + bool is_ring() const; + bool is_valid() const; + + GeosGeometry get_boundary() const; + GeosGeometry get_centroid() const; + GeosGeometry get_convex_hull() const; + GeosGeometry get_envelope() const; + GeosGeometry get_reversed() const; + GeosGeometry get_point_on_surface() const; + GeosGeometry get_made_valid() const; + + bool contains(const GeosGeometry &other) const; + bool covers(const GeosGeometry &other) const; + bool covered_by(const GeosGeometry &other) const; + bool crosses(const GeosGeometry &other) const; + bool disjoint(const GeosGeometry &other) const; + bool equals(const GeosGeometry &other) const; + bool intersects(const GeosGeometry &other) const; + bool overlaps(const GeosGeometry &other) const; + bool touches(const GeosGeometry &other) const; + bool within(const GeosGeometry &other) const; + bool distance_within(const GeosGeometry &other, double distance) const; + + double distance_to(const GeosGeometry &other) const; + + void normalize_in_place() const; + + GeosGeometry get_difference(const GeosGeometry &other) const; + GeosGeometry get_intersection(const GeosGeometry &other) const; + GeosGeometry get_union(const GeosGeometry &other) const; + GeosGeometry get_shortest_line(const GeosGeometry &other) const; + + GeosGeometry get_simplified(double tolerance) const; + GeosGeometry get_simplified_topo(double tolerance) const; + GeosGeometry get_without_repeated_points(double tolerance) const; + GeosGeometry get_reduced_precision(double tolerance) const; + GeosGeometry get_linemerged(bool directed) const; + GeosGeometry get_buffer(double distance, int quadsegs) const; + GeosGeometry get_buffer_style(double distance, int quadsegs, int endcap_style, int join_style, + double mitre_limit) const; + + PreparedGeosGeometry get_prepared() const; + +private: + GEOSContextHandle_t handle; + GEOSGeometry *geom; +}; + +class PreparedGeosGeometry { + friend class GeosGeometry; + +public: + // constructor + PreparedGeosGeometry(GEOSContextHandle_t handle_p, const GeosGeometry &geom); + + // disable copy + PreparedGeosGeometry(const PreparedGeosGeometry &) = delete; + PreparedGeosGeometry &operator=(const PreparedGeosGeometry &) = delete; + + // support move + PreparedGeosGeometry(PreparedGeosGeometry &&other) noexcept; + PreparedGeosGeometry &operator=(PreparedGeosGeometry &&other) noexcept; + + ~PreparedGeosGeometry(); + +public: + bool contains(const GeosGeometry &other) const; + bool contains_properly(const GeosGeometry &other) const; + bool covers(const GeosGeometry &other) const; + bool covered_by(const GeosGeometry &other) const; + bool crosses(const GeosGeometry &other) const; + bool disjoint(const GeosGeometry &other) const; + bool intersects(const GeosGeometry &other) const; + bool overlaps(const GeosGeometry &other) const; + bool touches(const GeosGeometry &other) const; + bool within(const GeosGeometry &other) const; + + double distance_to(const GeosGeometry &other) const; + bool distance_within(const GeosGeometry &other, double distance) const; + +private: + GEOSContextHandle_t handle; + const GEOSPreparedGeometry *prepared; +}; + +//------------------------------------------------------------------------------ +// Lifecycle methods +//------------------------------------------------------------------------------ + +//-- GeosGeometry --// +inline GeosGeometry::GeosGeometry(GEOSContextHandle_t handle_p, GEOSGeometry *geom_p) : handle(handle_p), geom(geom_p) { +} +inline GeosGeometry::GeosGeometry(GeosGeometry &&other) noexcept : handle(other.handle), geom(other.geom) { + other.geom = nullptr; +} +inline GeosGeometry &GeosGeometry::operator=(GeosGeometry &&other) noexcept { + if (this != &other) { + if (geom) { + GEOSGeom_destroy_r(handle, geom); + } + handle = other.handle; + geom = other.geom; + other.geom = nullptr; + } + return *this; +} + +inline GeosGeometry::~GeosGeometry() { + if (geom) { + GEOSGeom_destroy_r(handle, geom); + } +} + +//-- PreparedGeosGeometry --// +inline PreparedGeosGeometry::PreparedGeosGeometry(GEOSContextHandle_t handle_p, const GeosGeometry &geom) + : handle(handle_p) { + prepared = GEOSPrepare_r(handle, geom.geom); +} +inline PreparedGeosGeometry::PreparedGeosGeometry(PreparedGeosGeometry &&other) noexcept + : handle(other.handle), prepared(other.prepared) { + other.prepared = nullptr; +} + +inline PreparedGeosGeometry &PreparedGeosGeometry::operator=(PreparedGeosGeometry &&other) noexcept { + if (this != &other) { + if (prepared) { + GEOSPreparedGeom_destroy_r(handle, prepared); + } + handle = other.handle; + prepared = other.prepared; + other.prepared = nullptr; + } + return *this; +} + +inline PreparedGeosGeometry::~PreparedGeosGeometry() { + if (prepared) { + GEOSPreparedGeom_destroy_r(handle, prepared); + } +} + +//------------------------------------------------------------------------------ +// Methods +//------------------------------------------------------------------------------ + +//-- GeosGeometry --// +inline GEOSGeomTypes GeosGeometry::type() const { + return static_cast(GEOSGeomTypeId_r(handle, geom)); +} + +inline const GEOSGeometry *GeosGeometry::get_raw() const { + return geom; +} + +inline bool GeosGeometry::is_simple() const { + return GEOSisSimple_r(handle, geom); +} + +inline bool GeosGeometry::is_ring() const { + return GEOSisRing_r(handle, geom); +} + +inline bool GeosGeometry::is_valid() const { + return GEOSisValid_r(handle, geom); +} + +inline GeosGeometry GeosGeometry::get_boundary() const { + return GeosGeometry(handle, GEOSBoundary_r(handle, geom)); +} + +inline GeosGeometry GeosGeometry::get_centroid() const { + return GeosGeometry(handle, GEOSGetCentroid_r(handle, geom)); +} + +inline GeosGeometry GeosGeometry::get_convex_hull() const { + return GeosGeometry(handle, GEOSConvexHull_r(handle, geom)); +} + +inline GeosGeometry GeosGeometry::get_envelope() const { + return GeosGeometry(handle, GEOSEnvelope_r(handle, geom)); +} + +inline GeosGeometry GeosGeometry::get_reversed() const { + return GeosGeometry(handle, GEOSReverse_r(handle, geom)); +} + +inline GeosGeometry GeosGeometry::get_point_on_surface() const { + return GeosGeometry(handle, GEOSPointOnSurface_r(handle, geom)); +} + +inline GeosGeometry GeosGeometry::get_made_valid() const { + return GeosGeometry(handle, GEOSMakeValid_r(handle, geom)); +} + +inline bool GeosGeometry::contains(const GeosGeometry &other) const { + return GEOSContains_r(handle, geom, other.geom); +} + +inline bool GeosGeometry::covers(const GeosGeometry &other) const { + return GEOSCovers_r(handle, geom, other.geom); +} + +inline bool GeosGeometry::covered_by(const GeosGeometry &other) const { + return GEOSCoveredBy_r(handle, geom, other.geom); +} + +inline bool GeosGeometry::crosses(const GeosGeometry &other) const { + return GEOSCrosses_r(handle, geom, other.geom); +} + +inline bool GeosGeometry::disjoint(const GeosGeometry &other) const { + return GEOSDisjoint_r(handle, geom, other.geom); +} + +inline bool GeosGeometry::equals(const GeosGeometry &other) const { + return GEOSEquals_r(handle, geom, other.geom); +} + +inline bool GeosGeometry::intersects(const GeosGeometry &other) const { + return GEOSIntersects_r(handle, geom, other.geom); +} + +inline bool GeosGeometry::overlaps(const GeosGeometry &other) const { + return GEOSOverlaps_r(handle, geom, other.geom); +} + +inline bool GeosGeometry::touches(const GeosGeometry &other) const { + return GEOSTouches_r(handle, geom, other.geom); +} + +inline bool GeosGeometry::within(const GeosGeometry &other) const { + return GEOSWithin_r(handle, geom, other.geom); +} + +inline bool GeosGeometry::distance_within(const GeosGeometry &other, double distance) const { + return GEOSDistanceWithin_r(handle, geom, other.geom, distance); +} + +inline double GeosGeometry::distance_to(const GeosGeometry &other) const { + double distance = 0; + GEOSDistance_r(handle, geom, other.geom, &distance); + return distance; +} + +inline void GeosGeometry::normalize_in_place() const { + GEOSNormalize_r(handle, geom); +} + +inline GeosGeometry GeosGeometry::get_difference(const GeosGeometry &other) const { + return GeosGeometry(handle, GEOSDifference_r(handle, geom, other.geom)); +} + +inline GeosGeometry GeosGeometry::get_intersection(const GeosGeometry &other) const { + return GeosGeometry(handle, GEOSIntersection_r(handle, geom, other.geom)); +} + +inline GeosGeometry GeosGeometry::get_union(const GeosGeometry &other) const { + return GeosGeometry(handle, GEOSUnion_r(handle, geom, other.geom)); +} + +inline GeosGeometry GeosGeometry::get_shortest_line(const GeosGeometry &other) const { + const auto line = GEOSNearestPoints_r(handle, geom, other.geom); + const auto line_geom = GEOSGeom_createLineString_r(handle, line); + return GeosGeometry(handle, line_geom); +} + +inline GeosGeometry GeosGeometry::get_simplified(double tolerance) const { + return GeosGeometry(handle, GEOSSimplify_r(handle, geom, tolerance)); +} + +inline GeosGeometry GeosGeometry::get_simplified_topo(double tolerance) const { + return GeosGeometry(handle, GEOSTopologyPreserveSimplify_r(handle, geom, tolerance)); +} + +inline GeosGeometry GeosGeometry::get_without_repeated_points(double tolerance) const { + const auto simplified = GEOSRemoveRepeatedPoints_r(handle, geom, tolerance); + return GeosGeometry(handle, simplified); +} + +inline GeosGeometry GeosGeometry::get_reduced_precision(double tolerance) const { + const auto reduced = GEOSGeom_setPrecision_r(handle, geom, tolerance, 0); + return GeosGeometry(handle, reduced); +} + +inline GeosGeometry GeosGeometry::get_linemerged(bool directed) const { + const auto merged = directed ? GEOSLineMergeDirected_r(handle, geom) : GEOSLineMerge_r(handle, geom); + return GeosGeometry(handle, merged); +} + +inline GeosGeometry GeosGeometry::get_buffer(double distance, int quadsegs) const { + const auto buffer = GEOSBuffer_r(handle, geom, distance, quadsegs); + return GeosGeometry(handle, buffer); +} + +inline GeosGeometry GeosGeometry::get_buffer_style(double distance, int quadsegs, int endcap_style, int join_style, + double mitre_limit) const { + const auto buffer = GEOSBufferWithStyle_r(handle, geom, distance, quadsegs, endcap_style, join_style, mitre_limit); + return GeosGeometry(handle, buffer); +} + +inline PreparedGeosGeometry GeosGeometry::get_prepared() const { + return PreparedGeosGeometry(handle, *this); +} + +//-- PreparedGeosGeometry --// + +inline bool PreparedGeosGeometry::contains(const GeosGeometry &other) const { + return GEOSPreparedContains_r(handle, prepared, other.geom); +} + +inline bool PreparedGeosGeometry::contains_properly(const GeosGeometry &other) const { + return GEOSPreparedContainsProperly_r(handle, prepared, other.geom); +} + +inline bool PreparedGeosGeometry::covers(const GeosGeometry &other) const { + return GEOSPreparedCovers_r(handle, prepared, other.geom); +} + +inline bool PreparedGeosGeometry::covered_by(const GeosGeometry &other) const { + return GEOSPreparedCoveredBy_r(handle, prepared, other.geom); +} + +inline bool PreparedGeosGeometry::crosses(const GeosGeometry &other) const { + return GEOSPreparedCrosses_r(handle, prepared, other.geom); +} + +inline bool PreparedGeosGeometry::disjoint(const GeosGeometry &other) const { + return GEOSPreparedDisjoint_r(handle, prepared, other.geom); +} + +inline bool PreparedGeosGeometry::intersects(const GeosGeometry &other) const { + return GEOSPreparedIntersects_r(handle, prepared, other.geom); +} + +inline bool PreparedGeosGeometry::overlaps(const GeosGeometry &other) const { + return GEOSPreparedOverlaps_r(handle, prepared, other.geom); +} + +inline bool PreparedGeosGeometry::touches(const GeosGeometry &other) const { + return GEOSPreparedTouches_r(handle, prepared, other.geom); +} + +inline bool PreparedGeosGeometry::within(const GeosGeometry &other) const { + return GEOSPreparedWithin_r(handle, prepared, other.geom); +} + +inline double PreparedGeosGeometry::distance_to(const GeosGeometry &other) const { + double distance = 0; + GEOSPreparedDistance_r(handle, prepared, other.geom, &distance); + return distance; +} + +inline bool PreparedGeosGeometry::distance_within(const GeosGeometry &other, double distance) const { + return GEOSPreparedDistanceWithin_r(handle, prepared, other.geom, distance); +} + +} // namespace duckdb diff --git a/src/spatial/modules/geos/geos_module.cpp b/src/spatial/modules/geos/geos_module.cpp new file mode 100644 index 00000000..6f6dee70 --- /dev/null +++ b/src/spatial/modules/geos/geos_module.cpp @@ -0,0 +1,1542 @@ +#include "spatial/modules/geos/geos_module.hpp" +#include "spatial/modules/geos/geos_geometry.hpp" +#include "spatial/modules/geos/geos_serde.hpp" +#include "spatial/spatial_types.hpp" +#include "spatial/util/function_builder.hpp" + +#include "duckdb/common/vector_operations/senary_executor.hpp" + +#include "geos_c.h" + +namespace duckdb { + +//------------------------------------------------------------------------------ +// Local State +//------------------------------------------------------------------------------ + +namespace { + +class LocalState final : public FunctionLocalState { +public: + static unique_ptr Init(ExpressionState &state, const BoundFunctionExpression &expr, + FunctionData *bind_data) { + return make_uniq(state.GetContext()); + } + + static LocalState &ResetAndGet(ExpressionState &state) { + auto &local_state = ExecuteFunctionState::GetFunctionState(state)->Cast(); + return local_state; + } + + GEOSContextHandle_t GetContext() const { + return ctx; + } + + GeosGeometry Deserialize(const string_t &blob) const; + string_t Serialize(Vector &result, const GeosGeometry &geom) const; + + // Most GEOS functions do not use an arena, so just use the default allocator + explicit LocalState(ClientContext &context) { + ctx = GEOS_init_r(); + + GEOSContext_setErrorMessageHandler_r( + ctx, [](const char *message, void *) { throw InvalidInputException(message); }, nullptr); + } + + ~LocalState() override { + GEOS_finish_r(ctx); + } + +private: + GEOSContextHandle_t ctx; +}; + +string_t LocalState::Serialize(Vector &result, const GeosGeometry &geom) const { + // Get the size of the serialized geometry + const auto raw = geom.get_raw(); + const auto size = GeosSerde::GetRequiredSize(ctx, raw); + + // Allocate a blob of the correct size + auto blob = StringVector::EmptyString(result, size); + const auto ptr = blob.GetDataWriteable(); + + // Serialize the geometry into the blob + GeosSerde::Serialize(ctx, raw, ptr, size); + + // Finalize and return the blob + blob.Finalize(); + return blob; +} + +GeosGeometry LocalState::Deserialize(const string_t &blob) const { + const auto blob_ptr = blob.GetData(); + const auto blob_len = blob.GetSize(); + + const auto geom = GeosSerde::Deserialize(ctx, blob_ptr, blob_len); + + if (geom == nullptr) { + throw InvalidInputException("Could not deserialize geometry"); + } + + return GeosGeometry(ctx, geom); +} + +} // namespace + +//------------------------------------------------------------------------------ +// Base Functions +//------------------------------------------------------------------------------ + +namespace { + +template +class SymmetricPreparedBinaryFunction { +public: + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + + auto &lhs_vec = args.data[0]; + auto &rhs_vec = args.data[1]; + + const auto lhs_is_const = + lhs_vec.GetVectorType() == VectorType::CONSTANT_VECTOR && !ConstantVector::IsNull(lhs_vec); + const auto rhs_is_const = + rhs_vec.GetVectorType() == VectorType::CONSTANT_VECTOR && !ConstantVector::IsNull(rhs_vec); + + if (lhs_is_const && rhs_is_const) { + // Both are const, just execute once + result.SetVectorType(VectorType::CONSTANT_VECTOR); + const auto &lhs_blob = ConstantVector::GetData(lhs_vec)[0]; + const auto &rhs_blob = ConstantVector::GetData(rhs_vec)[0]; + const auto lhs_geom = lstate.Deserialize(lhs_blob); + const auto rhs_geom = lstate.Deserialize(rhs_blob); + ConstantVector::GetData(result)[0] = IMPL::ExecutePredicateNormal(lhs_geom, rhs_geom); + + } else if (lhs_is_const != rhs_is_const) { + // One of the two is const, prepare the const one and execute on the non-const one + auto &const_vec = lhs_is_const ? lhs_vec : rhs_vec; + auto &probe_vec = lhs_is_const ? rhs_vec : lhs_vec; + + const auto &const_blob = ConstantVector::GetData(const_vec)[0]; + const auto const_geom = lstate.Deserialize(const_blob); + const auto const_prep = const_geom.get_prepared(); + + UnaryExecutor::Execute( + probe_vec, result, args.size(), [&](const string_t &probe_blob) { + const auto probe_geom = lstate.Deserialize(probe_blob); + return IMPL::ExecutePredicatePrepared(const_prep, probe_geom); + }); + } else { + // Both are non-const, just execute normally + BinaryExecutor::Execute( + lhs_vec, rhs_vec, result, args.size(), [&](const string_t &lhs_blob, const string_t &rhs_blob) { + const auto lhs = lstate.Deserialize(lhs_blob); + const auto rhs = lstate.Deserialize(rhs_blob); + return IMPL::ExecutePredicateNormal(lhs, rhs); + }); + } + } +}; + +template +class AsymmetricPreparedBinaryFunction { +public: + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + + auto &lhs_vec = args.data[0]; + auto &rhs_vec = args.data[1]; + + const auto lhs_is_const = + lhs_vec.GetVectorType() == VectorType::CONSTANT_VECTOR && !ConstantVector::IsNull(lhs_vec); + const auto rhs_is_const = + rhs_vec.GetVectorType() == VectorType::CONSTANT_VECTOR && !ConstantVector::IsNull(rhs_vec); + + if (lhs_is_const && rhs_is_const) { + // Both are const, just execute once + result.SetVectorType(VectorType::CONSTANT_VECTOR); + const auto &lhs_blob = ConstantVector::GetData(lhs_vec)[0]; + const auto &rhs_blob = ConstantVector::GetData(rhs_vec)[0]; + const auto lhs_geom = lstate.Deserialize(lhs_blob); + const auto rhs_geom = lstate.Deserialize(rhs_blob); + ConstantVector::GetData(result)[0] = IMPL::ExecutePredicateNormal(lhs_geom, rhs_geom); + + } else if (lhs_is_const) { + // Prepare the left const and run on the non-const right + // Because this predicate is not symmetric, we can't just swap the two, so we only prepare the left + const auto lhs_blob = ConstantVector::GetData(lhs_vec)[0]; + const auto lhs_geom = lstate.Deserialize(lhs_blob); + const auto lhs_prep = lhs_geom.get_prepared(); + + UnaryExecutor::Execute(rhs_vec, result, args.size(), [&](const string_t &rhs_blob) { + const auto rhs_geom = lstate.Deserialize(rhs_blob); + return IMPL::ExecutePredicatePrepared(lhs_prep, rhs_geom); + }); + } else { + // Both are non-const, just execute normally + BinaryExecutor::Execute( + lhs_vec, rhs_vec, result, args.size(), [&](const string_t &lhs_blob, const string_t &rhs_blob) { + const auto lhs = lstate.Deserialize(lhs_blob); + const auto rhs = lstate.Deserialize(rhs_blob); + return IMPL::ExecutePredicateNormal(lhs, rhs); + }); + } + } +}; + +} // namespace + +//------------------------------------------------------------------------------ +// Functions +//------------------------------------------------------------------------------ + +namespace { + +struct ST_Boundary { + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::ExecuteWithNulls( + args.data[0], result, args.size(), [&](const string_t &geom_blob, ValidityMask &mask, idx_t row_idx) { + const auto geom = lstate.Deserialize(geom_blob); + if (geom.type() == GEOS_GEOMETRYCOLLECTION) { + mask.SetInvalid(row_idx); + return string_t {}; + } + const auto boundary = geom.get_boundary(); + + return lstate.Serialize(result, boundary); + }); + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Boundary", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns the boundary of a geometry"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +struct ST_Buffer { + + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + + BinaryExecutor::Execute(args.data[0], args.data[1], result, args.size(), + [&](const string_t &blob, double radius) { + const auto geom = lstate.Deserialize(blob); + const auto buffer = geom.get_buffer(radius, 8); + return lstate.Serialize(result, buffer); + }); + } + + static void ExecuteWithSegments(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + + TernaryExecutor::Execute( + args.data[0], args.data[1], args.data[2], result, args.size(), + [&](const string_t &blob, double radius, int32_t segments) { + const auto geom = lstate.Deserialize(blob); + const auto buffer = geom.get_buffer(radius, segments); + return lstate.Serialize(result, buffer); + }); + } + + template + static T TryParseStringArgument(const char *name, const vector &keys, const vector &values, + const string_t &arg) { + D_ASSERT(keys.size() == values.size()); + for (idx_t i = 0; i < keys.size(); i++) { + if (StringUtil::CIEquals(keys[i], arg.GetString())) { + return values[i]; + } + } + + auto candidates = StringUtil::Join(keys, ", "); + throw InvalidInputException("Unknown %s: '%s', accepted inputs: %s", name, arg.GetString().c_str(), + candidates.c_str()); + } + + static void ExecuteWithStyle(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + + SenaryExecutor::Execute( + args, result, + [&](const string_t &blob, double radius, int32_t segments, const string_t &cap_style_str, + const string_t &join_style_str, double mitre_limit) { + const auto geom = lstate.Deserialize(blob); + const auto cap_style = TryParseStringArgument( + "cap style", {"CAP_ROUND", "CAP_FLAT", "CAP_SQUARE"}, + {GEOSBUF_CAP_ROUND, GEOSBUF_CAP_FLAT, GEOSBUF_CAP_SQUARE}, cap_style_str); + + const auto join_style = TryParseStringArgument( + "join style", {"JOIN_ROUND", "JOIN_MITRE", "JOIN_BEVEL"}, + {GEOSBUF_JOIN_ROUND, GEOSBUF_JOIN_MITRE, GEOSBUF_JOIN_BEVEL}, join_style_str); + + const auto buffer = geom.get_buffer_style(radius, segments, cap_style, join_style, mitre_limit); + return lstate.Serialize(result, buffer); + ; + }); + } + + static constexpr auto DESCRIPTION = R"( + Returns a buffer around the input geometry at the target distance + + `geom` is the input geometry. + + `distance` is the target distance for the buffer, using the same units as the input geometry. + + `num_triangles` represents how many triangles that will be produced to approximate a quarter circle. The larger the number, the smoother the resulting geometry. The default value is 8. + + `join_style` must be one of "JOIN_ROUND", "JOIN_MITRE", "JOIN_BEVEL". This parameter is case-insensitive. + + `cap_style` must be one of "CAP_ROUND", "CAP_FLAT", "CAP_SQUARE". This parameter is case-insensitive. + + `mitre_limit` only applies when `join_style` is "JOIN_MITRE". It is the ratio of the distance from the corner to the mitre point to the corner radius. The default value is 1.0. + + This is a planar operation and will not take into account the curvature of the earth. + )"; + static constexpr auto EXAMPLE = ""; + + static void Register(DatabaseInstance &db) { + + FunctionBuilder::RegisterScalar(db, "ST_Buffer", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("distance", LogicalType::DOUBLE); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("distance", LogicalType::DOUBLE); + variant.AddParameter("num_triangles", LogicalType::INTEGER); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteWithSegments); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("distance", LogicalType::DOUBLE); + variant.AddParameter("num_triangles", LogicalType::INTEGER); + variant.AddParameter("cap_style", LogicalType::VARCHAR); + variant.AddParameter("join_style", LogicalType::VARCHAR); + variant.AddParameter("mitre_limit", LogicalType::DOUBLE); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteWithStyle); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +struct ST_Centroid { + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { + const auto geom = lstate.Deserialize(geom_blob); + const auto centroid = geom.get_centroid(); + return lstate.Serialize(result, centroid); + }); + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Centroid", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns the centroid of a geometry"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +struct ST_Contains : AsymmetricPreparedBinaryFunction { + static bool ExecutePredicateNormal(const GeosGeometry &lhs, const GeosGeometry &rhs) { + return lhs.contains(rhs); + } + static bool ExecutePredicatePrepared(const PreparedGeosGeometry &lhs, const GeosGeometry &rhs) { + return lhs.contains(rhs); + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Contains", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom1", GeoTypes::GEOMETRY()); + variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns true if the first geometry contains the second geometry"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "relation"); + }); + } +}; + +struct ST_ContainsProperly : AsymmetricPreparedBinaryFunction { + static bool ExecutePredicateNormal(const GeosGeometry &lhs, const GeosGeometry &rhs) { + // We have no choice but to prepare the left geometry + const auto lhs_prep = lhs.get_prepared(); + return lhs_prep.contains_properly(rhs); + } + + static bool ExecutePredicatePrepared(const PreparedGeosGeometry &lhs, const GeosGeometry &rhs) { + return lhs.contains_properly(rhs); + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_ContainsProperly", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom1", GeoTypes::GEOMETRY()); + variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns true if the first geometry contains the second geometry properly"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "relation"); + }); + } +}; + +struct ST_ConvexHull { + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { + const auto geom = lstate.Deserialize(geom_blob); + const auto hull = geom.get_convex_hull(); + return lstate.Serialize(result, hull); + }); + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_ConvexHull", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns the convex hull enclosing the geometry"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +struct ST_CoveredBy : AsymmetricPreparedBinaryFunction { + static bool ExecutePredicateNormal(const GeosGeometry &lhs, const GeosGeometry &rhs) { + return lhs.covered_by(rhs); + } + static bool ExecutePredicatePrepared(const PreparedGeosGeometry &lhs, const GeosGeometry &rhs) { + return lhs.covered_by(rhs); + } + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_CoveredBy", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom1", GeoTypes::GEOMETRY()); + variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns true if the first geometry is covered by the second geometry"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "relation"); + }); + } +}; + +struct ST_Covers : AsymmetricPreparedBinaryFunction { + static bool ExecutePredicateNormal(const GeosGeometry &lhs, const GeosGeometry &rhs) { + return lhs.covers(rhs); + } + static bool ExecutePredicatePrepared(const PreparedGeosGeometry &lhs, const GeosGeometry &rhs) { + return lhs.covers(rhs); + } + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Covers", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom1", GeoTypes::GEOMETRY()); + variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns true if the first geometry covers the second geometry"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "relation"); + }); + } +}; + +struct ST_Crosses : SymmetricPreparedBinaryFunction { + static bool ExecutePredicateNormal(const GeosGeometry &lhs, const GeosGeometry &rhs) { + return lhs.crosses(rhs); + } + static bool ExecutePredicatePrepared(const PreparedGeosGeometry &lhs, const GeosGeometry &rhs) { + return lhs.crosses(rhs); + } + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Crosses", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom1", GeoTypes::GEOMETRY()); + variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns true if the geometries cross each other"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "relation"); + }); + } +}; + +struct ST_Difference { + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + + BinaryExecutor::Execute(args.data[0], args.data[1], result, args.size(), + [&](const string_t &lhs_blob, const string_t &rhs_blob) { + const auto lhs = lstate.Deserialize(lhs_blob); + const auto rhs = lstate.Deserialize(rhs_blob); + const auto difference = lhs.get_difference(rhs); + return lstate.Serialize(result, difference); + }); + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Difference", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom1", GeoTypes::GEOMETRY()); + variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns the difference between two geometries"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +struct ST_Disjoint : SymmetricPreparedBinaryFunction { + static bool ExecutePredicateNormal(const GeosGeometry &lhs, const GeosGeometry &rhs) { + return lhs.disjoint(rhs); + } + static bool ExecutePredicatePrepared(const PreparedGeosGeometry &lhs, const GeosGeometry &rhs) { + return lhs.disjoint(rhs); + } + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Disjoint", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom1", GeoTypes::GEOMETRY()); + variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns true if the geometries are disjoint"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "relation"); + }); + } +}; + +struct ST_Distance : SymmetricPreparedBinaryFunction { + static double ExecutePredicateNormal(const GeosGeometry &lhs, const GeosGeometry &rhs) { + return lhs.distance_to(rhs); + } + static double ExecutePredicatePrepared(const PreparedGeosGeometry &lhs, const GeosGeometry &rhs) { + return lhs.distance_to(rhs); + } + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Distance", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom1", GeoTypes::GEOMETRY()); + variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::DOUBLE); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns the planar distance between two geometries"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "measurement"); + }); + } +}; + +struct ST_DistanceWithin { + + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + // Because this takes an extra argument, we cant reuse the SymmetricPreparedBinary... + + const auto &lstate = LocalState::ResetAndGet(state); + + auto &lhs_vec = args.data[0]; + auto &rhs_vec = args.data[1]; + auto &arg_vec = args.data[2]; + + const auto lhs_is_const = + lhs_vec.GetVectorType() == VectorType::CONSTANT_VECTOR && !ConstantVector::IsNull(lhs_vec); + const auto rhs_is_const = + rhs_vec.GetVectorType() == VectorType::CONSTANT_VECTOR && !ConstantVector::IsNull(rhs_vec); + const auto arg_is_const = + arg_vec.GetVectorType() == VectorType::CONSTANT_VECTOR && !ConstantVector::IsNull(arg_vec); + + if (lhs_is_const && rhs_is_const && arg_is_const) { + // Both geometries (and the argument) are constant, so only execute it once + result.SetVectorType(VectorType::CONSTANT_VECTOR); + const auto &lhs_blob = ConstantVector::GetData(lhs_vec)[0]; + const auto &rhs_blob = ConstantVector::GetData(rhs_vec)[0]; + const auto &arg_dist = ConstantVector::GetData(arg_vec)[0]; + const auto lhs_geom = lstate.Deserialize(lhs_blob); + const auto rhs_geom = lstate.Deserialize(rhs_blob); + + ConstantVector::GetData(result)[0] = lhs_geom.distance_within(rhs_geom, arg_dist); + } else if (lhs_is_const && rhs_is_const && !arg_is_const) { + // The geometries are constant, but the distance is not, prepare the larger one and execute unary + + const auto &lhs_blob = ConstantVector::GetData(lhs_vec)[0]; + const auto &rhs_blob = ConstantVector::GetData(rhs_vec)[0]; + + const auto rhs_bigger = rhs_blob.GetSize() > lhs_blob.GetSize(); + + const auto large_geom = rhs_bigger ? lstate.Deserialize(rhs_blob) : lstate.Deserialize(lhs_blob); + const auto probe_geom = rhs_bigger ? lstate.Deserialize(lhs_blob) : lstate.Deserialize(rhs_blob); + + // PreparedDistanceWithin only works if one is prepared. so just choose the larger one + const auto prep_geom = large_geom.get_prepared(); + + UnaryExecutor::Execute(arg_vec, result, args.size(), [&](const double arg_dist) { + return prep_geom.distance_within(probe_geom, arg_dist); + }); + + } else if (lhs_is_const != rhs_is_const) { + // One of the two is const, prepare the const one and execute on the non-const one + auto &const_vec = lhs_is_const ? lhs_vec : rhs_vec; + auto &probe_vec = lhs_is_const ? rhs_vec : lhs_vec; + + const auto &const_blob = ConstantVector::GetData(const_vec)[0]; + const auto const_geom = lstate.Deserialize(const_blob); + const auto const_prep = const_geom.get_prepared(); + + BinaryExecutor::Execute(probe_vec, arg_vec, result, args.size(), + [&](const string_t &probe_blob, double distance) { + const auto probe_geom = lstate.Deserialize(probe_blob); + return const_prep.distance_within(probe_geom, distance); + }); + } else { + // Both are non-const, just execute normally + TernaryExecutor::Execute( + lhs_vec, rhs_vec, arg_vec, result, args.size(), + [&](const string_t &lhs_blob, const string_t &rhs_blob, double distance) { + const auto lhs = lstate.Deserialize(lhs_blob); + const auto rhs = lstate.Deserialize(rhs_blob); + return lhs.distance_within(rhs, distance); + }); + } + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_DWithin", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom1", GeoTypes::GEOMETRY()); + variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.AddParameter("distance", LogicalType::DOUBLE); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription(R"( + Returns if two geometries are within a target distance of each-other + )"); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "relation"); + }); + } +}; + +struct ST_Equals { + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + + BinaryExecutor::Execute(args.data[0], args.data[1], result, args.size(), + [&](const string_t &lhs_blob, const string_t &rhs_blob) { + const auto lhs = lstate.Deserialize(lhs_blob); + const auto rhs = lstate.Deserialize(rhs_blob); + return lhs.equals(rhs); + }); + } + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Equals", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom1", GeoTypes::GEOMETRY()); + variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns true if the geometries are equal"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "relation"); + }); + } +}; + +struct ST_Envelope { + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { + const auto geom = lstate.Deserialize(geom_blob); + const auto intersection = geom.get_envelope(); + return lstate.Serialize(result, intersection); + }); + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Envelope", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns the envelope of a geometry"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +struct ST_Intersection { + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + + BinaryExecutor::Execute(args.data[0], args.data[1], result, args.size(), + [&](const string_t &lhs_blob, const string_t &rhs_blob) { + const auto lhs = lstate.Deserialize(lhs_blob); + const auto rhs = lstate.Deserialize(rhs_blob); + const auto intersection = lhs.get_intersection(rhs); + return lstate.Serialize(result, intersection); + }); + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Intersection", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom1", GeoTypes::GEOMETRY()); + variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns the intersection of two geometries"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +struct ST_Intersects : SymmetricPreparedBinaryFunction { + static bool ExecutePredicateNormal(const GeosGeometry &lhs, const GeosGeometry &rhs) { + return lhs.intersects(rhs); + } + + static bool ExecutePredicatePrepared(const PreparedGeosGeometry &lhs, const GeosGeometry &rhs) { + return lhs.intersects(rhs); + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Intersects", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom1", GeoTypes::GEOMETRY()); + variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns true if the geometries intersect"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "relation"); + }); + } +}; + +struct ST_IsRing { + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { + const auto geom = lstate.Deserialize(geom_blob); + return geom.is_ring(); + }); + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_IsRing", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns true if the geometry is a ring (both ST_IsClosed and ST_IsSimple)."); + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } +}; + +struct ST_IsSimple { + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { + const auto geom = lstate.Deserialize(geom_blob); + return geom.is_simple(); + }); + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_IsSimple", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns true if the geometry is simple"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } +}; + +struct ST_IsValid { + + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { + // GEOS can only construct geometries with a valid amount of vertices. + // So if deserialization fails, it cant be valid + try { + const auto geom = lstate.Deserialize(geom_blob); + return geom.is_valid(); + } catch (...) { + return false; + } + }); + } + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_IsValid", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns true if the geometry is valid"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } +}; + +struct ST_LineMerge { + + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + UnaryExecutor::Execute(args.data[0], result, args.size(), + [&](const string_t &geometry_blob) { + const auto geometry = lstate.Deserialize(geometry_blob); + const auto merged = geometry.get_linemerged(false); + return lstate.Serialize(result, merged); + }); + } + + static void ExecuteWithDirection(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + BinaryExecutor::Execute(args.data[0], args.data[1], result, args.size(), + [&](const string_t &geometry_blob, bool preserve_direction) { + const auto geometry = lstate.Deserialize(geometry_blob); + const auto merged = + geometry.get_linemerged(preserve_direction); + return lstate.Serialize(result, merged); + }); + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_LineMerge", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("preserve_direction", LogicalType::BOOLEAN); + variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteWithDirection); + }); + + func.SetDescription(R"("Merges" the input line geometry, optionally taking direction into account.)"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +struct ST_MakeValid { + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { + const auto geom = lstate.Deserialize(geom_blob); + const auto valid = geom.get_made_valid(); + return lstate.Serialize(result, valid); + }); + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_MakeValid", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns a valid representation of the geometry"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +struct ST_Normalize { + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { + const auto geom = lstate.Deserialize(geom_blob); + geom.normalize_in_place(); + return lstate.Serialize(result, geom); + }); + } + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Normalize", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns a normalized representation of the geometry"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +struct ST_Overlaps : SymmetricPreparedBinaryFunction { + static bool ExecutePredicateNormal(const GeosGeometry &lhs, const GeosGeometry &rhs) { + return lhs.overlaps(rhs); + } + static bool ExecutePredicatePrepared(const PreparedGeosGeometry &lhs, const GeosGeometry &rhs) { + return lhs.overlaps(rhs); + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Overlaps", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom1", GeoTypes::GEOMETRY()); + variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns true if the geometries overlap"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "relation"); + }); + } +}; + +struct ST_PointOnSurface { + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { + const auto geom = lstate.Deserialize(geom_blob); + const auto point = geom.get_point_on_surface(); + return lstate.Serialize(result, point); + }); + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_PointOnSurface", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns a point guaranteed to lie on the surface of the geometry"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +struct ST_ReducePrecision { + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + + BinaryExecutor::Execute( + args.data[0], args.data[1], result, args.size(), [&](const string_t &geom_blob, double precision) { + const auto geom = lstate.Deserialize(geom_blob); + const auto reduced = geom.get_reduced_precision(precision); + return lstate.Serialize(result, reduced); + }); + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_ReducePrecision", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("precision", LogicalType::DOUBLE); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns the geometry with all vertices reduced to the given precision"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +struct ST_RemoveRepeatedPoints { + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { + const auto geom = lstate.Deserialize(geom_blob); + const auto reduced = geom.get_without_repeated_points(0); + return lstate.Serialize(result, reduced); + }); + } + + static void ExecuteWithTolerance(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + + BinaryExecutor::Execute( + args.data[0], args.data[1], result, args.size(), [&](const string_t &geom_blob, double tolerance) { + const auto geom = lstate.Deserialize(geom_blob); + const auto reduced = geom.get_without_repeated_points(tolerance); + return lstate.Serialize(result, reduced); + }); + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_RemoveRepeatedPoints", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("tolerance", LogicalType::DOUBLE); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteWithTolerance); + }); + + func.SetDescription("Returns the geometry with repeated points removed"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +struct ST_Reverse { + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { + const auto geom = lstate.Deserialize(geom_blob); + const auto reversed = geom.get_reversed(); + return lstate.Serialize(result, reversed); + }); + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Reverse", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns the geometry with the order of its vertices reversed"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +struct ST_ShortestLine { + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + BinaryExecutor::Execute(args.data[0], args.data[1], result, args.size(), + [&](const string_t &lhs_blob, const string_t &rhs_blob) { + const auto lhs = lstate.Deserialize(lhs_blob); + const auto rhs = lstate.Deserialize(rhs_blob); + const auto line = lhs.get_shortest_line(rhs); + return lstate.Serialize(result, line); + }); + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_ShortestLine", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom1", GeoTypes::GEOMETRY()); + variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns the shortest line between two geometries"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "measurement"); + }); + } +}; + +struct ST_Simplify { + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + BinaryExecutor::Execute(args.data[0], args.data[1], result, args.size(), + [&](const string_t &geom_blob, double tolerance) { + const auto geom = lstate.Deserialize(geom_blob); + const auto simplified = geom.get_simplified(tolerance); + return lstate.Serialize(result, simplified); + }); + } + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Simplify", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("tolerance", LogicalType::DOUBLE); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns a simplified version of the geometry"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +struct ST_SimplifyPreserveTopology { + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + BinaryExecutor::Execute( + args.data[0], args.data[1], result, args.size(), [&](const string_t &geom_blob, double tolerance) { + const auto geom = lstate.Deserialize(geom_blob); + const auto simplified = geom.get_simplified_topo(tolerance); + return lstate.Serialize(result, simplified); + }); + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_SimplifyPreserveTopology", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("tolerance", LogicalType::DOUBLE); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns a simplified version of the geometry that preserves topology"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +struct ST_Touches : SymmetricPreparedBinaryFunction { + static bool ExecutePredicateNormal(const GeosGeometry &lhs, const GeosGeometry &rhs) { + return lhs.touches(rhs); + } + static bool ExecutePredicatePrepared(const PreparedGeosGeometry &lhs, const GeosGeometry &rhs) { + return lhs.touches(rhs); + } + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Touches", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom1", GeoTypes::GEOMETRY()); + variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns true if the geometries touch"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "relation"); + }); + } +}; + +struct ST_Union { + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + const auto &lstate = LocalState::ResetAndGet(state); + + BinaryExecutor::Execute(args.data[0], args.data[1], result, args.size(), + [&](const string_t &lhs_blob, const string_t &rhs_blob) { + const auto lhs = lstate.Deserialize(lhs_blob); + const auto rhs = lstate.Deserialize(rhs_blob); + const auto unioned = lhs.get_union(rhs); + return lstate.Serialize(result, unioned); + }); + } + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Union", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom1", GeoTypes::GEOMETRY()); + variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns the union of two geometries"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +struct ST_Within : AsymmetricPreparedBinaryFunction { + static bool ExecutePredicateNormal(const GeosGeometry &lhs, const GeosGeometry &rhs) { + return lhs.within(rhs); + } + static bool ExecutePredicatePrepared(const PreparedGeosGeometry &lhs, const GeosGeometry &rhs) { + return lhs.within(rhs); + } + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Within", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom1", GeoTypes::GEOMETRY()); + variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription("Returns true if the first geometry is within the second"); + func.SetTag("ext", "spatial"); + func.SetTag("category", "relation"); + }); + } +}; + +//###################################################################################################################### +// Aggregate Functions +//###################################################################################################################### + +//====================================================================================================================== +// Base GEOS-based unary aggregate +//====================================================================================================================== +struct GeosUnaryAggState { + GEOSGeometry *geom = nullptr; + GEOSContextHandle_t context = nullptr; +}; + +struct GeosUnaryAggFunction { + + // Serialize a GEOS geometry + static string_t Serialize(const GEOSContextHandle_t context, Vector &result, const GEOSGeometry *geom) { + D_ASSERT(geom); + const auto size = GeosSerde::GetRequiredSize(context, geom); + auto blob = StringVector::EmptyString(result, size); + const auto ptr = blob.GetDataWriteable(); + + // Serialize the geometry + GeosSerde::Serialize(context, geom, ptr, size); + + blob.Finalize(); + return blob; + } + + // Deserialize a GEOS geometry + static GEOSGeometry *Deserialize(const GEOSContextHandle_t context, const string_t &blob) { + const auto ptr = blob.GetData(); + const auto size = blob.GetSize(); + + return GeosSerde::Deserialize(context, ptr, size); + } + + template + static void Initialize(STATE &state) { + state.geom = nullptr; + state.context = GEOS_init_r(); + } + + template + static void Combine(const STATE &source, STATE &target, AggregateInputData &data) { + if (!source.geom) { + return; + } + if (!target.geom) { + target.geom = GEOSGeom_clone_r(target.context, source.geom); + return; + } + auto curr = target.geom; + target.geom = OP::Merge(target.context, curr, source.geom); + GEOSGeom_destroy_r(target.context, curr); + } + + template + static void Operation(STATE &state, const INPUT_TYPE &input, AggregateUnaryInput &) { + if (!state.geom) { + state.geom = Deserialize(state.context, input); + } else { + auto next = Deserialize(state.context, input); + auto curr = state.geom; + state.geom = OP::Merge(state.context, curr, next); + GEOSGeom_destroy_r(state.context, next); + GEOSGeom_destroy_r(state.context, curr); + } + } + + template + static void ConstantOperation(STATE &state, const INPUT_TYPE &input, AggregateUnaryInput &, idx_t) { + // There is no point in doing anything else, intersection and union is idempotent + if (!state.geom) { + state.geom = Deserialize(state.context, input); + } + } + + template + static void Finalize(STATE &state, T &target, AggregateFinalizeData &finalize_data) { + if (!state.geom) { + finalize_data.ReturnNull(); + } else { + target = Serialize(state.context, finalize_data.result, state.geom); + } + } + + template + static void Destroy(STATE &state, AggregateInputData &) { + if (state.geom) { + GEOSGeom_destroy_r(state.context, state.geom); + state.geom = nullptr; + } + if (state.context) { + GEOS_finish_r(state.context); + state.context = nullptr; + } + } + + static bool IgnoreNull() { + return true; + } +}; + +//====================================================================================================================== +// ST_Union_Agg +//====================================================================================================================== + +struct ST_Union_Agg : GeosUnaryAggFunction { + static GEOSGeometry *Merge(const GEOSContextHandle_t context, const GEOSGeometry *curr, const GEOSGeometry *next) { + return GEOSUnion_r(context, curr, next); + } + + static void Register(DatabaseInstance &db) { + const auto agg = + AggregateFunction::UnaryAggregateDestructor( + GeoTypes::GEOMETRY(), GeoTypes::GEOMETRY()); + + FunctionBuilder::RegisterAggregate(db, "ST_Union_Agg", [&](AggregateFunctionBuilder &func) { + func.SetFunction(agg); + func.SetDescription("Computes the union of a set of input geometries"); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +//====================================================================================================================== +// ST_Intersection_Agg +//====================================================================================================================== + +struct ST_Intersection_Agg : GeosUnaryAggFunction { + static GEOSGeometry *Merge(const GEOSContextHandle_t context, const GEOSGeometry *curr, const GEOSGeometry *next) { + return GEOSIntersection_r(context, curr, next); + } + + static void Register(DatabaseInstance &db) { + const auto agg = + AggregateFunction::UnaryAggregateDestructor( + GeoTypes::GEOMETRY(), GeoTypes::GEOMETRY()); + + FunctionBuilder::RegisterAggregate(db, "ST_Intersection_Agg", [&](AggregateFunctionBuilder &func) { + func.SetFunction(agg); + func.SetDescription("Computes the intersection of a set of geometries"); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +} // namespace + +//###################################################################################################################### +// Register Module +//###################################################################################################################### + +void RegisterGEOSModule(DatabaseInstance &db) { + + // Scalar Functions + ST_Boundary::Register(db); + ST_Buffer::Register(db); + ST_Centroid::Register(db); + ST_Contains::Register(db); + ST_ContainsProperly::Register(db); + ST_ConvexHull::Register(db); + ST_CoveredBy::Register(db); + ST_Covers::Register(db); + ST_Crosses::Register(db); + ST_Difference::Register(db); + ST_Disjoint::Register(db); + ST_Distance::Register(db); + ST_DistanceWithin::Register(db); + ST_Equals::Register(db); + ST_Envelope::Register(db); + ST_Intersection::Register(db); + ST_Intersects::Register(db); + ST_IsRing::Register(db); + ST_IsSimple::Register(db); + ST_IsValid::Register(db); + ST_LineMerge::Register(db); + ST_MakeValid::Register(db); + ST_Normalize::Register(db); + ST_Overlaps::Register(db); + ST_PointOnSurface::Register(db); + ST_ReducePrecision::Register(db); + ST_RemoveRepeatedPoints::Register(db); + ST_Reverse::Register(db); + ST_ShortestLine::Register(db); + ST_Simplify::Register(db); + ST_SimplifyPreserveTopology::Register(db); + ST_Touches::Register(db); + ST_Union::Register(db); + ST_Within::Register(db); + + // Aggregate Functions + ST_Union_Agg::Register(db); + ST_Intersection_Agg::Register(db); +} + +} // namespace duckdb diff --git a/src/spatial/modules/geos/geos_module.hpp b/src/spatial/modules/geos/geos_module.hpp new file mode 100644 index 00000000..92bf058e --- /dev/null +++ b/src/spatial/modules/geos/geos_module.hpp @@ -0,0 +1,9 @@ +#pragma once + +namespace duckdb { + +class DatabaseInstance; + +void RegisterGEOSModule(DatabaseInstance &db); + +} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/modules/geos/geos_serde.cpp b/src/spatial/modules/geos/geos_serde.cpp new file mode 100644 index 00000000..93ad4aa9 --- /dev/null +++ b/src/spatial/modules/geos/geos_serde.cpp @@ -0,0 +1,518 @@ +#include "spatial/modules/geos/geos_serde.hpp" + +#include "duckdb/common/typedefs.hpp" +#include "geos_c.h" + +#include +#include +#include +#include "spatial/geometry/geometry_processor.hpp" + +namespace duckdb { + +template +static T StorageTypeFromGEOS(int type) { + switch (type) { + case GEOS_POINT: + return static_cast(0); + case GEOS_LINESTRING: + return static_cast(1); + case GEOS_POLYGON: + return static_cast(2); + case GEOS_MULTIPOINT: + return static_cast(3); + case GEOS_MULTILINESTRING: + return static_cast(4); + case GEOS_MULTIPOLYGON: + return static_cast(5); + case GEOS_GEOMETRYCOLLECTION: + return static_cast(6); + default: + throw InvalidInputException("Unsupported GEOS geometry type %d", type); + } +} + +//---------------------------------------------------------------------------------------------------------------------- +// Get Required Size +//---------------------------------------------------------------------------------------------------------------------- + +static size_t GetCoordSeqLength(const GEOSContextHandle_t ctx, const GEOSCoordSequence *seq) { + uint32_t len = 0; + GEOSCoordSeq_getSize_r(ctx, seq, &len); + return len; +} + +static size_t GetRequiredSizeInternal(const GEOSContextHandle_t ctx, const GEOSGeometry *geom) { + const auto type = GEOSGeomTypeId_r(ctx, geom); + const bool has_z = GEOSHasZ_r(ctx, geom); + const bool has_m = GEOSHasM_r(ctx, geom); + + const auto vsize = sizeof(double) * (2 + has_z + has_m); + + switch (type) { + case GEOS_POINT: { + return 4 + 4 + (GEOSisEmpty_r(ctx, geom) ? 0 : vsize); + } + case GEOS_LINESTRING: { + const auto line_seq = GEOSGeom_getCoordSeq_r(ctx, geom); + uint32_t line_len = 0; + GEOSCoordSeq_getSize_r(ctx, line_seq, &line_len); + return 4 + 4 + line_len * vsize; + } + case GEOS_POLYGON: { + // 4 bytes for type, + // 4 bytes for num rings + // 4 bytes for num points in shell, + // vertex_size bytes per point in shell, + // 4 bytes for num holes, + // 4 bytes for num points in hole, + // vertex_size bytes per point in hole + // 4 bytes padding if (shell + holes) % 2 == 1 + size_t size = 4 + 4; + + const auto exterior_ptr = GEOSGetExteriorRing_r(ctx, geom); + const auto exterior_seq = GEOSGeom_getCoordSeq_r(ctx, exterior_ptr); + uint32_t exterior_len = 0; + GEOSCoordSeq_getSize_r(ctx, exterior_seq, &exterior_len); + size += 4 + exterior_len * vsize; + + const auto num_rings = GEOSGetNumInteriorRings_r(ctx, geom); + for (auto i = 0; i < num_rings; i++) { + const auto interior_ptr = GEOSGetInteriorRingN_r(ctx, geom, i); + const auto interior_seq = GEOSGeom_getCoordSeq_r(ctx, interior_ptr); + uint32_t interior_len = 0; + GEOSCoordSeq_getSize_r(ctx, interior_seq, &interior_len); + size += 4 + interior_len * vsize; + } + + // We need to count the shell as well + if ((num_rings + 1) % 2 != 0) { + size += 4; + } + return size; + } + case GEOS_MULTIPOINT: + case GEOS_MULTILINESTRING: + case GEOS_MULTIPOLYGON: + case GEOS_GEOMETRYCOLLECTION: { + size_t size = 4 + 4; + const auto num_items = GEOSGetNumGeometries_r(ctx, geom); + for (auto i = 0; i < num_items; i++) { + const auto item = GEOSGetGeometryN_r(ctx, geom, i); + const auto item_size = GetRequiredSizeInternal(ctx, item); + if (item_size == 0) { + // Unsupported geometry type + return 0; + } + size += item_size; + } + return size; + } + default: + // Unsupported geometry type + return 0; + } +} + +size_t GeosSerde::GetRequiredSize(GEOSContextHandle_t ctx, const GEOSGeom_t *geom) { + const auto is_point = (GEOSGeomTypeId_r(ctx, geom) == GEOS_POINT); + const auto is_empty = GEOSisEmpty_r(ctx, geom); + + const auto has_bbox = !is_point && !is_empty; + const auto has_z = GEOSHasZ_r(ctx, geom); + const auto has_m = GEOSHasM_r(ctx, geom); + + const auto dims = 2 + (has_z ? 1 : 0) + (has_m ? 1 : 0); + + const auto head_size = 4 + 4; // type + props + padding + const auto geom_size = GetRequiredSizeInternal(ctx, geom); + const auto bbox_size = has_bbox ? dims * sizeof(float) * 2 : 0; + + const auto full_size = head_size + geom_size + bbox_size; + + // Check that the size is a multiple of 8 + D_ASSERT(full_size % 8 == 0); + + return full_size; +} + +//---------------------------------------------------------------------------------------------------------------------- +// Serialization +//---------------------------------------------------------------------------------------------------------------------- + +static void SerializeCoordSeq(const GEOSContextHandle_t ctx, const GEOSCoordSequence *seq, bool has_z, bool has_m, + size_t len, BinaryWriter &cursor) { + const auto buffer = cursor.Reserve(len * sizeof(double) * (2 + has_z + has_m)); + GEOSCoordSeq_copyToBuffer_r(ctx, seq, reinterpret_cast(buffer), has_z, has_m); +} + +static void SerializeInternal(const GEOSContextHandle_t ctx, const GEOSGeometry *geom, BinaryWriter &cursor) { + const auto type = GEOSGeomTypeId_r(ctx, geom); + const bool has_z = GEOSHasZ_r(ctx, geom); + const bool has_m = GEOSHasM_r(ctx, geom); + + cursor.Write(StorageTypeFromGEOS(type)); + + switch (type) { + case GEOS_POINT: + case GEOS_LINESTRING: { + if (GEOSisEmpty_r(ctx, geom)) { + cursor.Write(0); + return; + } + const auto seq = GEOSGeom_getCoordSeq_r(ctx, geom); + const auto len = GetCoordSeqLength(ctx, seq); + cursor.Write(len); + SerializeCoordSeq(ctx, seq, has_z, has_m, len, cursor); + return; + } + case GEOS_POLYGON: { + if (GEOSisEmpty_r(ctx, geom)) { + cursor.Write(0); + return; + } + + const auto num_rings = GEOSGetNumInteriorRings_r(ctx, geom); + + cursor.Write(num_rings + 1); + + const auto exterior_ptr = GEOSGetExteriorRing_r(ctx, geom); + const auto exterior_seq = GEOSGeom_getCoordSeq_r(ctx, exterior_ptr); + const auto exterior_len = GetCoordSeqLength(ctx, exterior_seq); + + // Save the cursor position to write the ring lengths later + BinaryWriter len_cursor = cursor; + + // Jump over the ring lengths + cursor.Skip(sizeof(uint32_t) * (num_rings + 1)); + + // Add padding if odd number of rings + if ((num_rings + 1) % 2 != 0) { + cursor.Write(0); + } + + // Now write both the length and the coordinates in one pass + + // Starting with the exterior ring + len_cursor.Write(exterior_len); + SerializeCoordSeq(ctx, exterior_seq, has_z, has_m, exterior_len, cursor); + + // And for each interior ring + for (auto i = 0; i < num_rings; i++) { + const auto interior_ptr = GEOSGetInteriorRingN_r(ctx, geom, i); + const auto interior_seq = GEOSGeom_getCoordSeq_r(ctx, interior_ptr); + const auto interior_len = GetCoordSeqLength(ctx, interior_seq); + len_cursor.Write(interior_len); + SerializeCoordSeq(ctx, interior_seq, has_z, has_m, interior_len, cursor); + } + return; + } + case GEOS_MULTIPOINT: + case GEOS_MULTILINESTRING: + case GEOS_MULTIPOLYGON: + case GEOS_GEOMETRYCOLLECTION: { + const auto num_items = GEOSGetNumGeometries_r(ctx, geom); + cursor.Write(num_items); + for (auto i = 0; i < num_items; i++) { + const auto item = GEOSGetGeometryN_r(ctx, geom, i); + SerializeInternal(ctx, item, cursor); + } + return; + } + default: + // Unsupported geometry type + D_ASSERT(false); + break; + } +} + +namespace { + +struct Point { + double x; + double y; + double z; + double m; +}; + +struct Extent { + Point min; + Point max; +}; + +} // namespace + +inline void GetCoordSeqExtent(const GEOSContextHandle_t ctx, const GEOSCoordSeq_t *geom, bool has_z, bool has_m, + Extent &extent) { + + double x; + double y; + double z; + double m; + + const auto len = GetCoordSeqLength(ctx, geom); + + for (size_t i = 0; i < len; i++) { + GEOSCoordSeq_getXY_r(ctx, geom, i, &x, &y); + extent.min.x = std::min(extent.min.x, x); + extent.min.y = std::min(extent.min.y, y); + extent.max.x = std::max(extent.max.x, x); + extent.max.y = std::max(extent.max.y, y); + } + + if (has_z && has_m) { + for (size_t i = 0; i < len; i++) { + GEOSCoordSeq_getZ_r(ctx, geom, i, &z); + GEOSCoordSeq_getOrdinate_r(ctx, geom, i, 3, &m); + extent.min.z = std::min(extent.min.z, z); + extent.min.m = std::min(extent.min.m, m); + extent.max.z = std::max(extent.max.z, z); + extent.max.m = std::max(extent.max.m, m); + } + } else if (has_z) { + for (size_t i = 0; i < len; i++) { + GEOSCoordSeq_getZ_r(ctx, geom, i, &z); + extent.min.z = std::min(extent.min.z, z); + extent.max.z = std::max(extent.max.z, z); + } + } else if (has_m) { + for (size_t i = 0; i < len; i++) { + GEOSCoordSeq_getOrdinate_r(ctx, geom, i, 2, &m); + extent.min.m = std::min(extent.min.m, m); + extent.max.m = std::max(extent.max.m, m); + } + } +} + +inline void GetGeometryExtent(const GEOSContextHandle_t ctx, const GEOSGeometry *geom, bool has_z, bool has_m, + Extent &extent) { + switch (GEOSGeomTypeId_r(ctx, geom)) { + case GEOS_POINT: + case GEOS_LINESTRING: { + if (GEOSisEmpty_r(ctx, geom)) { + return; + } + const auto seq = GEOSGeom_getCoordSeq_r(ctx, geom); + GetCoordSeqExtent(ctx, seq, has_z, has_m, extent); + break; + } + case GEOS_POLYGON: { + // We only need to check the exterior ring + if (GEOSisEmpty_r(ctx, geom)) { + return; + } + const auto exterior_ptr = GEOSGetExteriorRing_r(ctx, geom); + const auto exterior_seq = GEOSGeom_getCoordSeq_r(ctx, exterior_ptr); + GetCoordSeqExtent(ctx, exterior_seq, has_z, has_m, extent); + break; + } + case GEOS_MULTIPOINT: + case GEOS_MULTILINESTRING: + case GEOS_MULTIPOLYGON: + case GEOS_GEOMETRYCOLLECTION: { + const auto num_items = GEOSGetNumGeometries_r(ctx, geom); + for (auto i = 0; i < num_items; i++) { + const auto item = GEOSGetGeometryN_r(ctx, geom, i); + GetGeometryExtent(ctx, item, has_z, has_m, extent); + } + break; + } + default: + // Unsupported geometry type + break; + } +} + +inline void SerializeExtent(const GEOSContextHandle_t ctx, const GEOSGeometry *geom, bool has_z, bool has_m, + BinaryWriter &cursor) { + + Extent extent = {}; + extent.min.x = std::numeric_limits::max(); + extent.min.y = std::numeric_limits::max(); + extent.min.z = std::numeric_limits::max(); + extent.min.m = std::numeric_limits::max(); + extent.max.x = std::numeric_limits::lowest(); + extent.max.y = std::numeric_limits::lowest(); + extent.max.z = std::numeric_limits::lowest(); + extent.max.m = std::numeric_limits::lowest(); + + GetGeometryExtent(ctx, geom, has_z, has_m, extent); + + cursor.Write(MathUtil::DoubleToFloatDown(extent.min.x)); + cursor.Write(MathUtil::DoubleToFloatDown(extent.min.y)); + cursor.Write(MathUtil::DoubleToFloatUp(extent.max.x)); + cursor.Write(MathUtil::DoubleToFloatUp(extent.max.y)); + + if (has_z) { + cursor.Write(MathUtil::DoubleToFloatDown(extent.min.z)); + cursor.Write(MathUtil::DoubleToFloatUp(extent.max.z)); + } + + if (has_m) { + cursor.Write(MathUtil::DoubleToFloatDown(extent.min.m)); + cursor.Write(MathUtil::DoubleToFloatUp(extent.max.m)); + } +} + +void GeosSerde::Serialize(GEOSContextHandle_t ctx, const GEOSGeom_t *geom, char *buffer, size_t buffer_size) { + BinaryWriter cursor(buffer, buffer_size); + + const auto type = GEOSGeomTypeId_r(ctx, geom); + if (type < GEOS_POINT || type > GEOS_GEOMETRYCOLLECTION) { + // Unsupported geometry type + throw InvalidInputException("Unsupported GEOS geometry type %d", type); + } + + const auto has_bbox = (type != GEOS_POINT && (GEOSisEmpty_r(ctx, geom) == 0)); + const auto has_z = GEOSHasZ_r(ctx, geom); + const auto has_m = GEOSHasM_r(ctx, geom); + + // Set flags + uint8_t flags = 0; + flags |= has_z ? 0x01 : 0; + flags |= has_m ? 0x02 : 0; + flags |= has_bbox ? 0x04 : 0; + + cursor.Write(StorageTypeFromGEOS(type)); + cursor.Write(flags); + cursor.Write(0); // unused + cursor.Write(0); // padding + + if (has_bbox) { + SerializeExtent(ctx, geom, has_z, has_m, cursor); + } + + // Serialize the geometry + SerializeInternal(ctx, geom, cursor); +} + +//------------------------------------------------------------------------------ +// Deserialize +//------------------------------------------------------------------------------ +// TODO: Remove the GeometryProcessor from here, come up with something better. + +namespace { + +template +bool IsPointerAligned(const void *ptr) { + auto uintptr = reinterpret_cast(ptr); + return (uintptr % alignof(T)) == 0; +} + +class GEOSDeserializer final : GeometryProcessor { +private: + GEOSContextHandle_t ctx; + vector aligned_buffer; + +private: + GEOSCoordSeq_t *HandleVertexData(const VertexData &vertices) { + auto n_dims = 2 + (HasZ() ? 1 : 0) + (HasM() ? 1 : 0); + auto vertex_size = sizeof(double) * n_dims; + + // We know that the data is interleaved :^) + auto data = vertices.data[0]; + auto count = vertices.count; + + if (HasZ()) { + // GEOS does a memcpy in this case, so we can pass the buffer directly even if it's not aligned + return GEOSCoordSeq_copyFromBuffer_r(ctx, reinterpret_cast(data), count, HasZ(), HasM()); + } else { + auto data_ptr = data; + auto vertex_data = reinterpret_cast(data_ptr); + if (!IsPointerAligned(data_ptr)) { + // If the pointer is not aligned we need to copy the data to an aligned buffer before passing it to GEOS + aligned_buffer.clear(); + aligned_buffer.resize(count * n_dims); + memcpy(aligned_buffer.data(), data_ptr, count * vertex_size); + vertex_data = aligned_buffer.data(); + } + + return GEOSCoordSeq_copyFromBuffer_r(ctx, vertex_data, count, HasZ(), HasM()); + } + } + + GEOSGeometry *ProcessPoint(const VertexData &data) override { + if (data.IsEmpty()) { + return GEOSGeom_createEmptyPoint_r(ctx); + } else { + auto seq = HandleVertexData(data); + return GEOSGeom_createPoint_r(ctx, seq); + } + } + + GEOSGeometry *ProcessLineString(const VertexData &data) override { + if (data.IsEmpty()) { + return GEOSGeom_createEmptyLineString_r(ctx); + } else { + auto seq = HandleVertexData(data); + return GEOSGeom_createLineString_r(ctx, seq); + } + } + + GEOSGeometry *ProcessPolygon(PolygonState &state) override { + auto num_rings = state.RingCount(); + if (num_rings == 0) { + return GEOSGeom_createEmptyPolygon_r(ctx); + } else { + // TODO: Make a vector here instead of using new + auto geoms = new GEOSGeometry *[num_rings]; + for (uint32_t i = 0; i < num_rings; i++) { + auto vertices = state.Next(); + auto seq = HandleVertexData(vertices); + geoms[i] = GEOSGeom_createLinearRing_r(ctx, seq); + } + auto result = GEOSGeom_createPolygon_r(ctx, geoms[0], geoms + 1, num_rings - 1); + delete[] geoms; + return result; + } + } + + GEOSGeometry *ProcessCollection(CollectionState &state) override { + GEOSGeomTypes collection_type = GEOS_GEOMETRYCOLLECTION; + switch (CurrentType()) { + case GeometryType::MULTIPOINT: + collection_type = GEOS_MULTIPOINT; + break; + case GeometryType::MULTILINESTRING: + collection_type = GEOS_MULTILINESTRING; + break; + case GeometryType::MULTIPOLYGON: + collection_type = GEOS_MULTIPOLYGON; + break; + default: + break; + } + auto item_count = state.ItemCount(); + if (item_count == 0) { + return GEOSGeom_createEmptyCollection_r(ctx, collection_type); + } else { + auto geoms = new GEOSGeometry *[item_count]; + for (uint32_t i = 0; i < item_count; i++) { + geoms[i] = state.Next(); + } + auto result = GEOSGeom_createCollection_r(ctx, collection_type, geoms, item_count); + delete[] geoms; + return result; + } + } + +public: + explicit GEOSDeserializer(GEOSContextHandle_t ctx) : ctx(ctx) { + } + virtual ~GEOSDeserializer() { + } + + GEOSGeom_t *Execute(const geometry_t &geom) { + return Process(geom); + } +}; + +} // namespace + +GEOSGeom_t *GeosSerde::Deserialize(GEOSContextHandle_t ctx, const char *buffer, size_t buffer_size) { + geometry_t blob(string_t(buffer, buffer_size)); + GEOSDeserializer deserializer(ctx); + return deserializer.Execute(blob); +} + +} // namespace duckdb diff --git a/src/spatial/modules/geos/geos_serde.hpp b/src/spatial/modules/geos/geos_serde.hpp new file mode 100644 index 00000000..6da77d05 --- /dev/null +++ b/src/spatial/modules/geos/geos_serde.hpp @@ -0,0 +1,20 @@ +#pragma once + +#include + +// forward declaration from geos_c.h +struct GEOSGeom_t; +struct GEOSContextHandle_HS; +typedef struct GEOSContextHandle_HS *GEOSContextHandle_t; + +namespace duckdb { + +class ArenaAllocator; + +struct GeosSerde { + static size_t GetRequiredSize(GEOSContextHandle_t ctx, const GEOSGeom_t *geom); + static void Serialize(GEOSContextHandle_t ctx, const GEOSGeom_t *geom, char *buffer, size_t buffer_size); + static GEOSGeom_t *Deserialize(GEOSContextHandle_t ctx, const char *buffer, size_t buffer_size); +}; + +} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/modules/main/CMakeLists.txt b/src/spatial/modules/main/CMakeLists.txt new file mode 100644 index 00000000..40e336c2 --- /dev/null +++ b/src/spatial/modules/main/CMakeLists.txt @@ -0,0 +1,7 @@ +set(EXTENSION_SOURCES + ${EXTENSION_SOURCES} + ${CMAKE_CURRENT_SOURCE_DIR}/spatial_functions_cast.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/spatial_functions_scalar.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/spatial_functions_table.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/spatial_functions_aggregate.cpp + PARENT_SCOPE) \ No newline at end of file diff --git a/src/spatial/modules/main/spatial_functions.hpp b/src/spatial/modules/main/spatial_functions.hpp new file mode 100644 index 00000000..09a74821 --- /dev/null +++ b/src/spatial/modules/main/spatial_functions.hpp @@ -0,0 +1,25 @@ +#pragma once + +#include "duckdb/common/typedefs.hpp" + +namespace duckdb { + +class DatabaseInstance; + +void RegisterSpatialScalarFunctions(DatabaseInstance &db); +void RegisterSpatialAggregateFunctions(DatabaseInstance &db); +void RegisterSpatialCastFunctions(DatabaseInstance &db); +void RegisterSpatialTableFunctions(DatabaseInstance &db); + +// TODO: Move these +class Vector; +struct CoreVectorOperations { +public: + static void Point2DToVarchar(Vector &source, Vector &result, idx_t count); + static void LineString2DToVarchar(Vector &source, Vector &result, idx_t count); + static void Polygon2DToVarchar(Vector &source, Vector &result, idx_t count); + static void Box2DToVarchar(Vector &source, Vector &result, idx_t count); + static void GeometryToVarchar(Vector &source, Vector &result, idx_t count); +}; + +} // namespace duckdb \ No newline at end of file diff --git a/spatial/src/spatial/core/functions/aggregate/st_extent_agg.cpp b/src/spatial/modules/main/spatial_functions_aggregate.cpp similarity index 58% rename from spatial/src/spatial/core/functions/aggregate/st_extent_agg.cpp rename to src/spatial/modules/main/spatial_functions_aggregate.cpp index 6afca5f6..a75b9a38 100644 --- a/spatial/src/spatial/core/functions/aggregate/st_extent_agg.cpp +++ b/src/spatial/modules/main/spatial_functions_aggregate.cpp @@ -1,15 +1,14 @@ -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/parser/parsed_data/create_aggregate_function_info.hpp" +#include "spatial/geometry/bbox.hpp" +#include "spatial/geometry/geometry_serialization.hpp" +#include "spatial/geometry/sgl.hpp" +#include "spatial/geometry/geometry_type.hpp" +#include "spatial/modules/main/spatial_functions.hpp" +#include "spatial/spatial_types.hpp" +#include "spatial/util/function_builder.hpp" -#include "spatial/common.hpp" -#include "spatial/core/geometry/geometry.hpp" -#include "spatial/core/geometry/bbox.hpp" -#include "spatial/core/functions/aggregate.hpp" -#include "spatial/core/types.hpp" +namespace duckdb { -namespace spatial { - -namespace core { +namespace { struct ExtentAggState { bool is_set; @@ -74,9 +73,36 @@ struct ExtentAggFunction { if (!state.is_set) { finalize_data.ReturnNull(); } else { - auto &arena = finalize_data.input.allocator; - auto box = Polygon::CreateFromBox(arena, state.xmin, state.ymin, state.xmax, state.ymax); - target = Geometry::Serialize(box, finalize_data.result); + // We can create the bounding box polygon directly on the stack + double buf[10]; + buf[0] = state.xmin; + buf[1] = state.ymin; + + buf[2] = state.xmin; + buf[3] = state.ymax; + + buf[4] = state.xmax; + buf[5] = state.ymax; + + buf[6] = state.xmax; + buf[7] = state.ymin; + + buf[8] = state.xmin; + buf[9] = state.ymin; + + auto ring = sgl::linestring::make_empty(); + ring.set_vertex_data(reinterpret_cast(buf), 5); + + auto bbox = sgl::polygon::make_empty(); + bbox.append_part(&ring); + + const auto size = Serde::GetRequiredSize(bbox); + auto blob = StringVector::EmptyString(finalize_data.result, size); + Serde::Serialize(bbox, blob.GetDataWriteable(), size); + blob.Finalize(); + + // TODO: dont use geometry_t here + target = geometry_t(blob); } } @@ -88,7 +114,7 @@ struct ExtentAggFunction { //------------------------------------------------------------------------------ // Documentation //------------------------------------------------------------------------------ -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; +// static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}, {"category", "construction"}}; static constexpr const char *DOC_DESCRIPTION = R"( Computes the minimal-bounding-box polygon containing the set of input geometries )"; @@ -103,25 +129,34 @@ static constexpr const char *DOC_ALIAS_DESCRIPTION = R"( Computes the minimal-bounding-box polygon containing the set of input geometries. )"; +} // namespace + //------------------------------------------------------------------------ // Register //------------------------------------------------------------------------ -void CoreAggregateFunctions::RegisterStExtentAgg(DatabaseInstance &db) { +void RegisterSpatialAggregateFunctions(DatabaseInstance &db) { - auto function = AggregateFunction::UnaryAggregate( + // TODO: Dont use geometry_t here + const auto agg = AggregateFunction::UnaryAggregate( GeoTypes::GEOMETRY(), GeoTypes::GEOMETRY()); - // Register the function - function.name = "ST_Extent_Agg"; - ExtensionUtil::RegisterFunction(db, function); - DocUtil::AddDocumentation(db, "ST_Extent_Agg", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); + FunctionBuilder::RegisterAggregate(db, "ST_Extent_Agg", [&](AggregateFunctionBuilder &func) { + func.SetFunction(agg); + func.SetDescription(DOC_DESCRIPTION); + func.SetExample(DOC_EXAMPLE); - // Also add an alias with the name ST_Envelope_Agg - function.name = "ST_Envelope_Agg"; - ExtensionUtil::RegisterFunction(db, function); - DocUtil::AddDocumentation(db, "ST_Envelope_Agg", DOC_ALIAS_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); -} + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + + FunctionBuilder::RegisterAggregate(db, "ST_Envelope_Agg", [&](AggregateFunctionBuilder &func) { + func.SetFunction(agg); + func.SetDescription(DOC_ALIAS_DESCRIPTION); + func.SetExample(DOC_EXAMPLE); -} // namespace core + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); +} -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/modules/main/spatial_functions_cast.cpp b/src/spatial/modules/main/spatial_functions_cast.cpp new file mode 100644 index 00000000..e2e4a480 --- /dev/null +++ b/src/spatial/modules/main/spatial_functions_cast.cpp @@ -0,0 +1,947 @@ +#include "spatial/modules/main/spatial_functions.hpp" +#include "spatial/geometry/geometry_processor.hpp" +#include "spatial/geometry/sgl.hpp" +#include "spatial/geometry/geometry_serialization.hpp" +#include "spatial/spatial_types.hpp" +#include "spatial/util/math.hpp" +#include "spatial/geometry/wkb_writer.hpp" + +#include "duckdb/common/error_data.hpp" +#include "duckdb/common/operator/cast_operators.hpp" +#include "duckdb/common/vector_operations/generic_executor.hpp" +#include "duckdb/main/extension_util.hpp" + +namespace duckdb { + +namespace { + +//###################################################################################################################### +// Util +//###################################################################################################################### + +//====================================================================================================================== +// Local State +//====================================================================================================================== + +class LocalState final : public FunctionLocalState { +public: + explicit LocalState(ClientContext &context) : arena(BufferAllocator::Get(context)), allocator(arena) { + } + + static unique_ptr InitCast(CastLocalStateParameters ¶ms); + static LocalState &ResetAndGet(CastParameters ¶ms); + + // De/Serialize geometries + sgl::geometry Deserialize(const string_t &blob); + string_t Serialize(Vector &vector, const sgl::geometry &geom); + + ArenaAllocator &GetArena() { + return arena; + } + GeometryAllocator &GetAllocator() { + return allocator; + } + +private: + ArenaAllocator arena; + GeometryAllocator allocator; +}; + +unique_ptr LocalState::InitCast(CastLocalStateParameters ¶meters) { + return make_uniq(*parameters.context); +} + +LocalState &LocalState::ResetAndGet(CastParameters &state) { + auto &local_state = state.local_state->Cast(); + local_state.arena.Reset(); + return local_state; +} + +sgl::geometry LocalState::Deserialize(const string_t &blob) { + sgl::geometry geom; + Serde::Deserialize(geom, arena, blob.GetDataUnsafe(), blob.GetSize()); + return geom; +} + +string_t LocalState::Serialize(Vector &vector, const sgl::geometry &geom) { + const auto size = Serde::GetRequiredSize(geom); + auto blob = StringVector::EmptyString(vector, size); + Serde::Serialize(geom, blob.GetDataWriteable(), size); + blob.Finalize(); + return blob; +} + +//###################################################################################################################### +// Cast Functions +//###################################################################################################################### + +//====================================================================================================================== +// GEOMETRY Casts +//====================================================================================================================== + +struct GeometryCasts { + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY -> VARCHAR + //------------------------------------------------------------------------------------------------------------------ + static bool ToVarcharCast(Vector &source, Vector &result, idx_t count, CastParameters &) { + CoreVectorOperations::GeometryToVarchar(source, result, count); + return true; + } + + //------------------------------------------------------------------------------------------------------------------ + // VARCHAR -> GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static bool FromVarcharCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { + auto &lstate = LocalState::ResetAndGet(parameters); + auto &alloc = lstate.GetAllocator(); + + sgl::ops::wkt_reader reader = {}; + reader.alloc = &alloc; + + auto success = true; + + UnaryExecutor::ExecuteWithNulls( + source, result, count, [&](const string_t &wkt, ValidityMask &mask, idx_t row_idx) { + const auto wkt_ptr = wkt.GetDataUnsafe(); + const auto wkt_len = wkt.GetSize(); + + reader.buf = wkt_ptr; + reader.end = wkt_ptr + wkt_len; + + sgl::geometry geom; + + if (!sgl::ops::wkt_reader_try_parse(&reader, &geom)) { + if (success) { + success = false; + const auto error = sgl::ops::wkt_reader_get_error_message(&reader); + HandleCastError::AssignError(error, parameters.error_message); + } + mask.SetInvalid(row_idx); + return string_t {}; + } + + return lstate.Serialize(result, geom); + }); + + return success; + } + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY -> WKB_BLOB + //------------------------------------------------------------------------------------------------------------------ + static bool ToWKBCast(Vector &source, Vector &result, idx_t count, CastParameters &) { + UnaryExecutor::Execute( + source, result, count, [&](const string_t &input) { return WKBWriter::Write(input, result); }); + return true; + } + + //------------------------------------------------------------------------------------------------------------------ + // WKB_BLOB -> GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static bool FromWKBCast(Vector &source, Vector &result, idx_t count, CastParameters ¶ms) { + auto &lstate = LocalState::ResetAndGet(params); + auto &alloc = lstate.GetAllocator(); + + constexpr auto MAX_STACK_DEPTH = 128; + uint32_t recursion_stack[MAX_STACK_DEPTH]; + + sgl::ops::wkb_reader reader = {}; + reader.copy_vertices = false; + reader.alloc = &alloc; + reader.allow_mixed_zm = false; + reader.nan_as_empty = true; + + reader.stack_buf = recursion_stack; + reader.stack_cap = MAX_STACK_DEPTH; + + bool success = true; + + UnaryExecutor::ExecuteWithNulls( + source, result, count, [&](const string_t &wkb, ValidityMask &mask, idx_t row_idx) { + reader.buf = wkb.GetDataUnsafe(); + reader.end = reader.buf + wkb.GetSize(); + + sgl::geometry geom(sgl::geometry_type::INVALID); + + // Try parse, if it fails, assign error message and return NULL + if (!sgl::ops::wkb_reader_try_parse(&reader, &geom)) { + const auto error = sgl::ops::wkb_reader_get_error_message(&reader); + if (success) { + success = false; + HandleCastError::AssignError(error, params.error_message); + } + mask.SetInvalid(row_idx); + return string_t {}; + } + + return lstate.Serialize(result, geom); + }); + + return success; + } + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + const auto wkb_type = GeoTypes::WKB_BLOB(); + const auto geom_type = GeoTypes::GEOMETRY(); + + // VARCHAR -> Geometry is explicitly castable + ExtensionUtil::RegisterCastFunction(db, geom_type, LogicalType::VARCHAR, BoundCastInfo(ToVarcharCast), 1); + + // Geometry -> VARCHAR is implicitly castable + ExtensionUtil::RegisterCastFunction(db, LogicalType::VARCHAR, geom_type, + BoundCastInfo(FromVarcharCast, nullptr, LocalState::InitCast)); + + // Geometry -> WKB is explicitly castable + ExtensionUtil::RegisterCastFunction(db, geom_type, wkb_type, BoundCastInfo(ToWKBCast)); + + // Geometry -> BLOB is explicitly castable + ExtensionUtil::RegisterCastFunction(db, geom_type, LogicalType::BLOB, DefaultCasts::ReinterpretCast); + + // WKB -> Geometry is explicitly castable + ExtensionUtil::RegisterCastFunction(db, wkb_type, geom_type, + BoundCastInfo(FromWKBCast, nullptr, LocalState::InitCast)); + + // WKB -> BLOB is implicitly castable + ExtensionUtil::RegisterCastFunction(db, wkb_type, LogicalType::BLOB, DefaultCasts::ReinterpretCast, 1); + } +}; + +//====================================================================================================================== +// POINT_2D Casts +//====================================================================================================================== + +struct PointCasts { + + //------------------------------------------------------------------------------------------------------------------ + // POINT_2D -> VARCHAR + //------------------------------------------------------------------------------------------------------------------ + static bool ToVarcharCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { + CoreVectorOperations::Point2DToVarchar(source, result, count); + return true; + } + + //------------------------------------------------------------------------------------------------------------------ + // POINT_2D -> GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static bool ToGeometryCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { + using POINT_TYPE = StructTypeBinary; + using GEOMETRY_TYPE = PrimitiveType; + + auto &lstate = LocalState::ResetAndGet(parameters); + + GenericExecutor::ExecuteUnary(source, result, count, [&](const POINT_TYPE &point) { + const double buffer[2] = {point.a_val, point.b_val}; + auto geom = sgl::point::make_empty(); + geom.set_type(sgl::geometry_type::POINT); + geom.set_vertex_data(reinterpret_cast(buffer), 1); + + return lstate.Serialize(result, geom); + }); + return true; + } + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY -> POINT_2D + //------------------------------------------------------------------------------------------------------------------ + static bool FromGeometryCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { + using POINT_TYPE = StructTypeBinary; + using GEOMETRY_TYPE = PrimitiveType; + + auto &lstate = LocalState::ResetAndGet(parameters); + + GenericExecutor::ExecuteUnary(source, result, count, [&](const GEOMETRY_TYPE &blob) { + const auto geom = lstate.Deserialize(blob.val); + if (geom.get_type() != sgl::geometry_type::POINT) { + throw ConversionException("Cannot cast non-point GEOMETRY to POINT_2D"); + } + if (geom.is_empty()) { + // TODO: Maybe make this return NULL instead + throw ConversionException("Cannot cast empty point GEOMETRY to POINT_2D"); + } + const auto vertex = geom.get_vertex_xy(0); + return POINT_TYPE {vertex.x, vertex.y}; + }); + + return true; + } + + //------------------------------------------------------------------------------------------------------------------ + // POINT(N) -> POINT_2D + //------------------------------------------------------------------------------------------------------------------ + static bool ToPoint2DCast(Vector &source, Vector &result, idx_t count, CastParameters &) { + auto &children = StructVector::GetEntries(source); + const auto &x_child = children[0]; + const auto &y_child = children[1]; + + const auto &result_children = StructVector::GetEntries(result); + const auto &result_x_child = result_children[0]; + const auto &result_y_child = result_children[1]; + + result_x_child->Reference(*x_child); + result_y_child->Reference(*y_child); + + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + return true; + } + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + // POINT_2D -> VARCHAR + ExtensionUtil::RegisterCastFunction(db, GeoTypes::POINT_2D(), LogicalType::VARCHAR, + BoundCastInfo(ToVarcharCast), 1); + // POINT_2D -> GEOMETRY + ExtensionUtil::RegisterCastFunction(db, GeoTypes::POINT_2D(), GeoTypes::GEOMETRY(), + BoundCastInfo(ToGeometryCast, nullptr, LocalState::InitCast), 1); + // GEOMETRY -> POINT_2D + ExtensionUtil::RegisterCastFunction(db, GeoTypes::GEOMETRY(), GeoTypes::POINT_2D(), + BoundCastInfo(FromGeometryCast, nullptr, LocalState::InitCast), 1); + // POINT_3D -> POINT_2D + ExtensionUtil::RegisterCastFunction(db, GeoTypes::POINT_3D(), GeoTypes::POINT_2D(), ToPoint2DCast, 1); + // POINT_4D -> POINT_2D + ExtensionUtil::RegisterCastFunction(db, GeoTypes::POINT_4D(), GeoTypes::POINT_2D(), ToPoint2DCast, 1); + } +}; + +//====================================================================================================================== +// LINESTRING_2D Casts +//====================================================================================================================== + +struct LinestringCasts { + + //------------------------------------------------------------------------------------------------------------------ + // LINESTRING_2D -> VARCHAR + //------------------------------------------------------------------------------------------------------------------ + static bool ToVarcharCast(Vector &source, Vector &result, idx_t count, CastParameters &) { + CoreVectorOperations::LineString2DToVarchar(source, result, count); + return true; + } + + //------------------------------------------------------------------------------------------------------------------ + // LINESTRING_2D -> GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static bool ToGeometryCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { + auto &lstate = LocalState::ResetAndGet(parameters); + auto &arena = lstate.GetArena(); + + auto &coord_vec = ListVector::GetEntry(source); + auto &coord_vec_children = StructVector::GetEntries(coord_vec); + const auto x_data = FlatVector::GetData(*coord_vec_children[0]); + const auto y_data = FlatVector::GetData(*coord_vec_children[1]); + + UnaryExecutor::Execute(source, result, count, [&](const list_entry_t &line) { + const auto vertex_data_mem = arena.AllocateAligned(sizeof(double) * 2 * line.length); + const auto vertex_data_ptr = reinterpret_cast(vertex_data_mem); + + for (idx_t i = 0; i < line.length; i++) { + vertex_data_ptr[i * 2] = x_data[line.offset + i]; + vertex_data_ptr[i * 2 + 1] = y_data[line.offset + i]; + } + + auto geom = sgl::linestring::make_empty(); + geom.set_type(sgl::geometry_type::LINESTRING); + geom.set_vertex_data(vertex_data_mem, line.length); + + return lstate.Serialize(result, geom); + }); + return true; + } + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY -> LINESTRING_2D + //------------------------------------------------------------------------------------------------------------------ + static bool FromGeometryCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { + auto &lstate = LocalState::ResetAndGet(parameters); + + auto &coord_vec = ListVector::GetEntry(result); + auto &coord_vec_children = StructVector::GetEntries(coord_vec); + const auto x_data = FlatVector::GetData(*coord_vec_children[0]); + const auto y_data = FlatVector::GetData(*coord_vec_children[1]); + + idx_t total_coords = 0; + + UnaryExecutor::Execute(source, result, count, [&](const string_t &blob) { + const auto line = lstate.Deserialize(blob); + if (line.get_type() != sgl::geometry_type::LINESTRING) { + // TODO: Dont throw here, return NULL instead to allow TRY_CAST + throw ConversionException("Cannot cast non-linestring GEOMETRY to LINESTRING_2D"); + } + + const auto line_size = line.get_count(); + + const auto entry = list_entry_t(total_coords, line_size); + total_coords += line_size; + ListVector::Reserve(result, total_coords); + + for (idx_t i = 0; i < line_size; i++) { + const auto vertex = line.get_vertex_xy(i); + x_data[entry.offset + i] = vertex.x; + y_data[entry.offset + i] = vertex.y; + } + return entry; + }); + ListVector::SetListSize(result, total_coords); + return true; + } + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + // LINESTRING_2D -> VARCHAR + ExtensionUtil::RegisterCastFunction(db, GeoTypes::LINESTRING_2D(), LogicalType::VARCHAR, + BoundCastInfo(ToVarcharCast), 1); + // LINESTRING_2D -> GEOMETRY + ExtensionUtil::RegisterCastFunction(db, GeoTypes::LINESTRING_2D(), GeoTypes::GEOMETRY(), + BoundCastInfo(ToGeometryCast, nullptr, LocalState::InitCast), 1); + // GEOMETRY -> LINESTRING_2D + ExtensionUtil::RegisterCastFunction(db, GeoTypes::GEOMETRY(), GeoTypes::LINESTRING_2D(), + BoundCastInfo(FromGeometryCast, nullptr, LocalState::InitCast), 1); + } +}; + +//====================================================================================================================== +// POLYGON_2D Casts +//====================================================================================================================== + +struct PolygonCasts { + + //------------------------------------------------------------------------------------------------------------------ + // POLYGON_2D -> VARCHAR + //------------------------------------------------------------------------------------------------------------------ + static bool ToVarcharCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { + CoreVectorOperations::Polygon2DToVarchar(source, result, count); + return true; + } + + //------------------------------------------------------------------------------------------------------------------ + // POLYGON_2D -> GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static bool ToGeometryCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { + auto &lstate = LocalState::ResetAndGet(parameters); + auto &arena = lstate.GetArena(); + + auto &ring_vec = ListVector::GetEntry(source); + const auto ring_entries = ListVector::GetData(ring_vec); + const auto &coord_vec = ListVector::GetEntry(ring_vec); + const auto &coord_vec_children = StructVector::GetEntries(coord_vec); + const auto x_data = FlatVector::GetData(*coord_vec_children[0]); + const auto y_data = FlatVector::GetData(*coord_vec_children[1]); + + UnaryExecutor::Execute(source, result, count, [&](const list_entry_t &poly) { + auto geom = sgl::polygon::make_empty(); + + for (idx_t i = 0; i < poly.length; i++) { + const auto ring_entry = ring_entries[poly.offset + i]; + + // Allocate part + const auto ring_mem = arena.AllocateAligned(sizeof(sgl::geometry)); + const auto ring_ptr = new (ring_mem) sgl::geometry(sgl::geometry_type::LINESTRING); + + // Allocate data + const auto ring_data_mem = arena.AllocateAligned(sizeof(double) * 2 * ring_entry.length); + const auto ring_data_ptr = reinterpret_cast(ring_data_mem); + + for (idx_t j = 0; j < ring_entry.length; j++) { + ring_data_ptr[j * 2] = x_data[ring_entry.offset + j]; + ring_data_ptr[j * 2 + 1] = y_data[ring_entry.offset + j]; + } + + ring_ptr->set_vertex_data(ring_data_mem, ring_entry.length); + + // Append part + geom.append_part(ring_ptr); + } + + return lstate.Serialize(result, geom); + }); + return true; + } + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY -> POLYGON_2D + //------------------------------------------------------------------------------------------------------------------ + static bool FromGeometryCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { + auto &lstate = LocalState::ResetAndGet(parameters); + auto &ring_vec = ListVector::GetEntry(result); + + idx_t total_rings = 0; + idx_t total_coords = 0; + + UnaryExecutor::Execute(source, result, count, [&](const string_t &blob) { + const auto poly = lstate.Deserialize(blob); + + // TODO: Dont throw here, return NULL instead to allow TRY_CAST + if (poly.get_type() != sgl::geometry_type::POLYGON) { + throw ConversionException("Cannot cast non-polygon GEOMETRY to POLYGON_2D"); + } + + const auto poly_size = poly.get_count(); + const auto poly_entry = list_entry_t(total_rings, poly_size); + + ListVector::Reserve(result, total_rings + poly_size); + + const auto tail = poly.get_last_part(); + auto head = tail; + + if (head) { + idx_t ring_idx = 0; + do { + D_ASSERT(ring_idx < poly_size); + head = head->get_next(); + + const auto ring_size = head->get_count(); + const auto ring_entry = list_entry_t(total_coords, ring_size); + + ListVector::Reserve(ring_vec, total_coords + ring_size); + + const auto ring_entries = ListVector::GetData(ring_vec); + auto &coord_vec = ListVector::GetEntry(ring_vec); + auto &coord_vec_children = StructVector::GetEntries(coord_vec); + const auto x_data = FlatVector::GetData(*coord_vec_children[0]); + const auto y_data = FlatVector::GetData(*coord_vec_children[1]); + + ring_entries[total_rings + ring_idx] = ring_entry; + + for (idx_t j = 0; j < ring_size; j++) { + const auto vertext = head->get_vertex_xy(j); + x_data[ring_entry.offset + j] = vertext.x; + y_data[ring_entry.offset + j] = vertext.y; + } + total_coords += ring_size; + + ring_idx++; + } while (head != tail); + } + + total_rings += poly_size; + + return poly_entry; + }); + + ListVector::SetListSize(result, total_rings); + ListVector::SetListSize(ring_vec, total_coords); + + return true; + } + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + // POLYGON_2D -> VARCHAR + ExtensionUtil::RegisterCastFunction(db, GeoTypes::POLYGON_2D(), LogicalType::VARCHAR, + BoundCastInfo(ToVarcharCast), 1); + // POLYGON_2D -> GEOMETRY + ExtensionUtil::RegisterCastFunction(db, GeoTypes::POLYGON_2D(), GeoTypes::GEOMETRY(), + BoundCastInfo(ToGeometryCast, nullptr, LocalState::InitCast), 1); + // GEOMETRY -> POLYGON_2D + ExtensionUtil::RegisterCastFunction(db, GeoTypes::GEOMETRY(), GeoTypes::POLYGON_2D(), + BoundCastInfo(FromGeometryCast, nullptr, LocalState::InitCast), 1); + } +}; + +//====================================================================================================================== +// BOX_2D Casts +//====================================================================================================================== + +struct BoxCasts { + + //------------------------------------------------------------------------------------------------------------------ + // BOX_2D -> VARCHAR + //------------------------------------------------------------------------------------------------------------------ + static bool ToVarcharCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { + CoreVectorOperations::Box2DToVarchar(source, result, count); + return true; + } + + //------------------------------------------------------------------------------------------------------------------ + // BOX_2D -> GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static bool ToGeometryCast2D(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { + auto &lstate = LocalState::ResetAndGet(parameters); + auto &alloc = lstate.GetAllocator(); + + using BOX_TYPE = StructTypeQuaternary; + using GEOMETRY_TYPE = PrimitiveType; + GenericExecutor::ExecuteUnary(source, result, count, [&](const BOX_TYPE &box) { + const auto minx = box.a_val; + const auto miny = box.b_val; + const auto maxx = box.c_val; + const auto maxy = box.d_val; + const auto poly = sgl::polygon::make_from_box(&alloc, minx, miny, maxx, maxy); + return lstate.Serialize(result, poly); + }); + return true; + } + + //------------------------------------------------------------------------------------------------------------------ + // BOX_2DF -> GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static bool ToGeometryCast2F(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { + auto &lstate = LocalState::ResetAndGet(parameters); + auto &alloc = lstate.GetAllocator(); + using BOX_TYPE = StructTypeQuaternary; + using GEOMETRY_TYPE = PrimitiveType; + GenericExecutor::ExecuteUnary(source, result, count, [&](const BOX_TYPE &box) { + const auto minx = box.a_val; + const auto miny = box.b_val; + const auto maxx = box.c_val; + const auto maxy = box.d_val; + const auto poly = sgl::polygon::make_from_box(&alloc, minx, miny, maxx, maxy); + return lstate.Serialize(result, poly); + }); + return true; + } + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + // BOX_2D -> VARCHAR + ExtensionUtil::RegisterCastFunction(db, GeoTypes::BOX_2D(), LogicalType::VARCHAR, BoundCastInfo(ToVarcharCast), + 1); + + // BOX_2D -> GEOMETRY + ExtensionUtil::RegisterCastFunction(db, GeoTypes::BOX_2D(), GeoTypes::GEOMETRY(), + BoundCastInfo(ToGeometryCast2D, nullptr, LocalState::InitCast), 1); + + // BOX_2F -> GEOMETRY + ExtensionUtil::RegisterCastFunction(db, GeoTypes::BOX_2DF(), GeoTypes::GEOMETRY(), + BoundCastInfo(ToGeometryCast2F, nullptr, LocalState::InitCast), 1); + } +}; + +} // namespace + +//====================================================================================================================== +// Vector Operations +//====================================================================================================================== +// TODO: Move/inline this. This is a relic from the original implementation, but being able to access it from outside +// is not really important anymore (there are other ways to work around it). + +//------------------------------------------------------------------------------ +// POINT_2D -> VARCHAR +//------------------------------------------------------------------------------ +void CoreVectorOperations::Point2DToVarchar(Vector &source, Vector &result, idx_t count) { + using POINT_TYPE = StructTypeBinary; + using VARCHAR_TYPE = PrimitiveType; + + GenericExecutor::ExecuteUnary(source, result, count, [&](POINT_TYPE &point) { + auto x = point.a_val; + auto y = point.b_val; + + if (std::isnan(x) || std::isnan(y)) { + return StringVector::AddString(result, "POINT EMPTY"); + } + + return StringVector::AddString(result, StringUtil::Format("POINT (%s)", MathUtil::format_coord(x, y))); + }); +} + +//------------------------------------------------------------------------------ +// LINESTRING_2D -> VARCHAR +//------------------------------------------------------------------------------ +void CoreVectorOperations::LineString2DToVarchar(Vector &source, Vector &result, idx_t count) { + auto &inner = ListVector::GetEntry(source); + auto &children = StructVector::GetEntries(inner); + auto x_data = FlatVector::GetData(*children[0]); + auto y_data = FlatVector::GetData(*children[1]); + + UnaryExecutor::Execute(source, result, count, [&](list_entry_t &line) { + auto offset = line.offset; + auto length = line.length; + + if (length == 0) { + return StringVector::AddString(result, "LINESTRING EMPTY"); + } + + string result_str = "LINESTRING ("; + for (idx_t i = offset; i < offset + length; i++) { + result_str += MathUtil::format_coord(x_data[i], y_data[i]); + if (i < offset + length - 1) { + result_str += ", "; + } + } + result_str += ")"; + return StringVector::AddString(result, result_str); + }); +} + +//------------------------------------------------------------------------------ +// POLYGON_2D -> VARCHAR +//------------------------------------------------------------------------------ +void CoreVectorOperations::Polygon2DToVarchar(Vector &source, Vector &result, idx_t count) { + auto &poly_vector = source; + auto &ring_vector = ListVector::GetEntry(poly_vector); + auto ring_entries = ListVector::GetData(ring_vector); + auto &point_vector = ListVector::GetEntry(ring_vector); + auto &point_children = StructVector::GetEntries(point_vector); + auto x_data = FlatVector::GetData(*point_children[0]); + auto y_data = FlatVector::GetData(*point_children[1]); + + UnaryExecutor::Execute(poly_vector, result, count, [&](list_entry_t polygon_entry) { + auto offset = polygon_entry.offset; + auto length = polygon_entry.length; + + if (length == 0) { + return StringVector::AddString(result, "POLYGON EMPTY"); + } + + string result_str = "POLYGON ("; + for (idx_t i = offset; i < offset + length; i++) { + auto ring_entry = ring_entries[i]; + auto ring_offset = ring_entry.offset; + auto ring_length = ring_entry.length; + result_str += "("; + for (idx_t j = ring_offset; j < ring_offset + ring_length; j++) { + result_str += MathUtil::format_coord(x_data[j], y_data[j]); + if (j < ring_offset + ring_length - 1) { + result_str += ", "; + } + } + result_str += ")"; + if (i < offset + length - 1) { + result_str += ", "; + } + } + result_str += ")"; + return StringVector::AddString(result, result_str); + }); +} + +//------------------------------------------------------------------------------ +// BOX_2D -> VARCHAR +//------------------------------------------------------------------------------ +void CoreVectorOperations::Box2DToVarchar(Vector &source, Vector &result, idx_t count) { + using BOX_TYPE = StructTypeQuaternary; + using VARCHAR_TYPE = PrimitiveType; + GenericExecutor::ExecuteUnary(source, result, count, [&](BOX_TYPE &box) { + return StringVector::AddString(result, + StringUtil::Format("BOX(%s, %s)", MathUtil::format_coord(box.a_val, box.b_val), + MathUtil::format_coord(box.c_val, box.d_val))); + }); +} + +//------------------------------------------------------------------------------ +// GEOMETRY -> VARCHAR +//------------------------------------------------------------------------------ +namespace { +class GeometryTextProcessor final : GeometryProcessor { +private: + string text; + +public: + void OnVertexData(const VertexData &data) { + auto &dims = data.data; + auto &strides = data.stride; + auto count = data.count; + + if (HasZ() && HasM()) { + for (uint32_t i = 0; i < count; i++) { + auto x = Load(dims[0] + i * strides[0]); + auto y = Load(dims[1] + i * strides[1]); + auto z = Load(dims[2] + i * strides[2]); + auto m = Load(dims[3] + i * strides[3]); + text += MathUtil::format_coord(x, y, z, m); + if (i < count - 1) { + text += ", "; + } + } + } else if (HasZ()) { + for (uint32_t i = 0; i < count; i++) { + auto x = Load(dims[0] + i * strides[0]); + auto y = Load(dims[1] + i * strides[1]); + auto zm = Load(dims[2] + i * strides[2]); + text += MathUtil::format_coord(x, y, zm); + if (i < count - 1) { + text += ", "; + } + } + } else if (HasM()) { + for (uint32_t i = 0; i < count; i++) { + auto x = Load(dims[0] + i * strides[0]); + auto y = Load(dims[1] + i * strides[1]); + auto m = Load(dims[3] + i * strides[3]); + text += MathUtil::format_coord(x, y, m); + if (i < count - 1) { + text += ", "; + } + } + } else { + for (uint32_t i = 0; i < count; i++) { + auto x = Load(dims[0] + i * strides[0]); + auto y = Load(dims[1] + i * strides[1]); + text += MathUtil::format_coord(x, y); + + if (i < count - 1) { + text += ", "; + } + } + } + } + + void ProcessPoint(const VertexData &data, bool in_typed_collection) override { + if (!in_typed_collection) { + text += "POINT"; + if (HasZ() && HasM()) { + text += " ZM"; + } else if (HasZ()) { + text += " Z"; + } else if (HasM()) { + text += " M"; + } + text += " "; + } + + if (data.count == 0) { + text += "EMPTY"; + } else if (in_typed_collection) { + OnVertexData(data); + } else { + text += "("; + OnVertexData(data); + text += ")"; + } + } + + void ProcessLineString(const VertexData &data, bool in_typed_collection) override { + if (!in_typed_collection) { + text += "LINESTRING"; + if (HasZ() && HasM()) { + text += " ZM"; + } else if (HasZ()) { + text += " Z"; + } else if (HasM()) { + text += " M"; + } + text += " "; + } + + if (data.count == 0) { + text += "EMPTY"; + } else { + text += "("; + OnVertexData(data); + text += ")"; + } + } + + void ProcessPolygon(PolygonState &state, bool in_typed_collection) override { + if (!in_typed_collection) { + text += "POLYGON"; + if (HasZ() && HasM()) { + text += " ZM"; + } else if (HasZ()) { + text += " Z"; + } else if (HasM()) { + text += " M"; + } + text += " "; + } + + if (state.RingCount() == 0) { + text += "EMPTY"; + } else { + text += "("; + bool first = true; + while (!state.IsDone()) { + if (!first) { + text += ", "; + } + first = false; + text += "("; + auto vertices = state.Next(); + OnVertexData(vertices); + text += ")"; + } + text += ")"; + } + } + + void ProcessCollection(CollectionState &state, bool) override { + bool collection_is_typed = false; + switch (CurrentType()) { + case GeometryType::MULTIPOINT: + text += "MULTIPOINT"; + collection_is_typed = true; + break; + case GeometryType::MULTILINESTRING: + text += "MULTILINESTRING"; + collection_is_typed = true; + break; + case GeometryType::MULTIPOLYGON: + text += "MULTIPOLYGON"; + collection_is_typed = true; + break; + case GeometryType::GEOMETRYCOLLECTION: + text += "GEOMETRYCOLLECTION"; + collection_is_typed = false; + break; + default: + throw InvalidInputException("Invalid geometry type"); + } + + if (HasZ() && HasM()) { + text += " ZM"; + } else if (HasZ()) { + text += " Z"; + } else if (HasM()) { + text += " M"; + } + + if (state.ItemCount() == 0) { + text += " EMPTY"; + } else { + text += " ("; + bool first = true; + while (!state.IsDone()) { + if (!first) { + text += ", "; + } + first = false; + state.Next(collection_is_typed); + } + text += ")"; + } + } + + virtual ~GeometryTextProcessor() = default; + + const string &Execute(const geometry_t &geom) { + text.clear(); + Process(geom, false); + return text; + } +}; + +} // namespace + +void CoreVectorOperations::GeometryToVarchar(Vector &source, Vector &result, idx_t count) { + GeometryTextProcessor processor; + UnaryExecutor::Execute(source, result, count, [&](const geometry_t &input) { + const auto text = processor.Execute(input); + return StringVector::AddString(result, text); + }); +} + +//###################################################################################################################### +// Register +//###################################################################################################################### + +void RegisterSpatialCastFunctions(DatabaseInstance &db) { + GeometryCasts::Register(db); + PointCasts::Register(db); + LinestringCasts::Register(db); + PolygonCasts::Register(db); + BoxCasts::Register(db); +} + +} // namespace duckdb diff --git a/src/spatial/modules/main/spatial_functions_scalar.cpp b/src/spatial/modules/main/spatial_functions_scalar.cpp new file mode 100644 index 00000000..cb8e4f5d --- /dev/null +++ b/src/spatial/modules/main/spatial_functions_scalar.cpp @@ -0,0 +1,7808 @@ +#include "spatial/modules/main/spatial_functions.hpp" + +#include "spatial/spatial_types.hpp" +#include "spatial/util/function_builder.hpp" +#include "spatial/util/math.hpp" +#include "spatial/util/binary_reader.hpp" +#include "spatial/geometry/geometry_serialization.hpp" +#include "spatial/geometry/sgl.hpp" +#include "spatial/geometry/wkb_writer.hpp" + +#include "duckdb/common/vector_operations/generic_executor.hpp" +#include "duckdb/common/types/blob.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckdb/execution/expression_executor.hpp" + +#include "yyjson.h" + +namespace duckdb { + +namespace { + +//###################################################################################################################### +// Util +//###################################################################################################################### + +//====================================================================================================================== +// LocalState +//====================================================================================================================== + +class LocalState final : public FunctionLocalState { +public: + explicit LocalState(ClientContext &context) : arena(BufferAllocator::Get(context)), allocator(arena) { + } + + static unique_ptr Init(ExpressionState &state, const BoundFunctionExpression &expr, + FunctionData *bind_data); + static LocalState &ResetAndGet(ExpressionState &state); + + // De/Serialize geometries + sgl::geometry Deserialize(const string_t &blob); + string_t Serialize(Vector &vector, const sgl::geometry &geom); + + ArenaAllocator &GetArena() { + return arena; + } + GeometryAllocator &GetAllocator() { + return allocator; + } + +private: + ArenaAllocator arena; + GeometryAllocator allocator; +}; + +unique_ptr LocalState::Init(ExpressionState &state, const BoundFunctionExpression &expr, + FunctionData *bind_data) { + return make_uniq_base(state.GetContext()); +} + +LocalState &LocalState::ResetAndGet(ExpressionState &state) { + auto &local_state = ExecuteFunctionState::GetFunctionState(state)->Cast(); + local_state.arena.Reset(); + return local_state; +} + +sgl::geometry LocalState::Deserialize(const string_t &blob) { + sgl::geometry geom; + Serde::Deserialize(geom, arena, blob.GetDataUnsafe(), blob.GetSize()); + return geom; +} + +string_t LocalState::Serialize(Vector &vector, const sgl::geometry &geom) { + const auto size = Serde::GetRequiredSize(geom); + auto blob = StringVector::EmptyString(vector, size); + Serde::Serialize(geom, blob.GetDataWriteable(), size); + blob.Finalize(); + return blob; +} +} // namespace + +namespace { + +//###################################################################################################################### +// Functions +//###################################################################################################################### + +//====================================================================================================================== +// ST_Area +//====================================================================================================================== + +struct ST_Area { + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + + auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &blob) { + const auto geom = lstate.Deserialize(blob); + return sgl::ops::area(&geom); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // POLYGON_2D + //------------------------------------------------------------------------------------------------------------------ + static void PolygonAreaFunction(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 1); + + auto &input = args.data[0]; + auto count = args.size(); + + auto &ring_vec = ListVector::GetEntry(input); + auto ring_entries = ListVector::GetData(ring_vec); + auto &coord_vec = ListVector::GetEntry(ring_vec); + auto &coord_vec_children = StructVector::GetEntries(coord_vec); + auto x_data = FlatVector::GetData(*coord_vec_children[0]); + auto y_data = FlatVector::GetData(*coord_vec_children[1]); + + UnaryExecutor::Execute(input, result, count, [&](list_entry_t polygon) { + auto polygon_offset = polygon.offset; + auto polygon_length = polygon.length; + + bool first = true; + double area = 0; + for (idx_t ring_idx = polygon_offset; ring_idx < polygon_offset + polygon_length; ring_idx++) { + auto ring = ring_entries[ring_idx]; + auto ring_offset = ring.offset; + auto ring_length = ring.length; + + double sum = 0; + for (idx_t coord_idx = ring_offset; coord_idx < ring_offset + ring_length - 1; coord_idx++) { + sum += (x_data[coord_idx] * y_data[coord_idx + 1]) - (x_data[coord_idx + 1] * y_data[coord_idx]); + } + sum = std::abs(sum); + if (first) { + // Add outer ring + area = sum * 0.5; + first = false; + } else { + // Subtract holes + area -= sum * 0.5; + } + } + return area; + }); + + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // LINESTRING_2D + //------------------------------------------------------------------------------------------------------------------ + static void LineStringAreaFunction(DataChunk &args, ExpressionState &state, Vector &result) { + auto input = args.data[0]; + UnaryExecutor::Execute(input, result, args.size(), [](list_entry_t) { return 0; }); + } + + //------------------------------------------------------------------------------------------------------------------ + // POINT_2D + //------------------------------------------------------------------------------------------------------------------ + static void PointAreaFunction(DataChunk &args, ExpressionState &state, Vector &result) { + using POINT_TYPE = StructTypeBinary; + using AREA_TYPE = PrimitiveType; + GenericExecutor::ExecuteUnary(args.data[0], result, args.size(), + [](POINT_TYPE) { return 0; }); + } + + //------------------------------------------------------------------------------------------------------------------ + // BOX_2D + //------------------------------------------------------------------------------------------------------------------ + static void BoxAreaFunction(DataChunk &args, ExpressionState &state, Vector &result) { + + using BOX_TYPE = StructTypeQuaternary; + using AREA_TYPE = PrimitiveType; + + GenericExecutor::ExecuteUnary(args.data[0], result, args.size(), [&](BOX_TYPE &box) { + auto minx = box.a_val; + auto miny = box.b_val; + auto maxx = box.c_val; + auto maxy = box.d_val; + return AREA_TYPE {(maxx - minx) * (maxy - miny)}; + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr const char *DESCRIPTION = R"( + Compute the area of a geometry. + + Returns `0.0` for any geometry that is not a `POLYGON`, `MULTIPOLYGON` or `GEOMETRYCOLLECTION` containing polygon + geometries. + + The area is in the same units as the spatial reference system of the geometry. + + The `POINT_2D` and `LINESTRING_2D` overloads of this function always return `0.0` but are included for completeness. + )"; + + static constexpr const char *EXAMPLE = R"( + select ST_Area('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'::geometry); + -- 1.0 + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + + FunctionBuilder::RegisterScalar(db, "ST_Area", [](ScalarFunctionBuilder &func) { + // GEOMETRY + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::DOUBLE); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + // POLYGON_2D + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("polygon", GeoTypes::POLYGON_2D()); + variant.SetReturnType(LogicalType::DOUBLE); + + variant.SetFunction(PolygonAreaFunction); + }); + + // LINESTRING_2D + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("linestring", GeoTypes::LINESTRING_2D()); + variant.SetReturnType(LogicalType::DOUBLE); + + variant.SetFunction(LineStringAreaFunction); + }); + + // POINT_2D + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("point", GeoTypes::POINT_2D()); + variant.SetReturnType(LogicalType::DOUBLE); + + variant.SetFunction(PointAreaFunction); + }); + + // BOX_2D + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("box", GeoTypes::BOX_2D()); + variant.SetReturnType(LogicalType::DOUBLE); + + variant.SetFunction(BoxAreaFunction); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } +}; + +//====================================================================================================================== +// ST_AsGeoJSON +//====================================================================================================================== + +using namespace duckdb_yyjson_spatial; + +class JSONAllocator { + // Stolen from the JSON extension :) +public: + explicit JSONAllocator(ArenaAllocator &allocator) + : allocator(allocator), yyjson_allocator({Allocate, Reallocate, Free, &allocator}) { + } + yyjson_alc *GetYYJSONAllocator() { + return &yyjson_allocator; + } + void Reset() { + allocator.Reset(); + } + +private: + static void *Allocate(void *ctx, size_t size) { + const auto alloc = static_cast(ctx); + return alloc->AllocateAligned(size); + } + static void *Reallocate(void *ctx, void *ptr, size_t old_size, size_t size) { + const auto alloc = static_cast(ctx); + return alloc->ReallocateAligned(data_ptr_cast(ptr), old_size, size); + } + static void Free(void *ctx, void *ptr) { + // NOP because ArenaAllocator can't free + } + ArenaAllocator &allocator; + yyjson_alc yyjson_allocator; +}; + +struct ST_AsGeoJSON { + + //------------------------------------------------------------------------------------------------------------------ + // JSON Formatting Functions + //------------------------------------------------------------------------------------------------------------------ + // TODO: Move these into SGL at some point, make non-recursive + static void FormatCoord(const sgl::geometry *geom, yyjson_mut_doc *doc, yyjson_mut_val *obj) { + const auto vertex_type = static_cast(geom->has_z() + geom->has_m() * 2); + const auto vertex_count = geom->get_count(); + + if (vertex_count == 0) { + // Make empty + const auto coord = yyjson_mut_arr(doc); + yyjson_mut_obj_add_val(doc, obj, "coordinates", coord); + return; + } + + // GeoJSON does not support M values, so we ignore them + switch (vertex_type) { + case sgl::vertex_type::XY: + case sgl::vertex_type::XYM: { + const auto coord = yyjson_mut_arr(doc); + const auto vert = geom->get_vertex_xy(0); + yyjson_mut_arr_add_real(doc, coord, vert.x); + yyjson_mut_arr_add_real(doc, coord, vert.y); + yyjson_mut_obj_add_val(doc, obj, "coordinates", coord); + + } break; + case sgl::vertex_type::XYZ: + case sgl::vertex_type::XYZM: { + const auto coord = yyjson_mut_arr(doc); + const auto vert = geom->get_vertex_xyzm(0); + + yyjson_mut_arr_add_real(doc, coord, vert.x); + yyjson_mut_arr_add_real(doc, coord, vert.y); + yyjson_mut_arr_add_real(doc, coord, vert.zm); + yyjson_mut_obj_add_val(doc, obj, "coordinates", coord); + + } break; + default: + D_ASSERT(false); + break; + } + } + + static void FormatCoords(const sgl::geometry *geom, yyjson_mut_doc *doc, yyjson_mut_val *obj) { + const auto vertex_type = static_cast(geom->has_z() + geom->has_m() * 2); + const auto vertex_count = geom->get_count(); + + // GeoJSON does not support M values, so we ignore them + switch (vertex_type) { + case sgl::vertex_type::XY: + case sgl::vertex_type::XYM: { + for (uint32_t i = 0; i < vertex_count; i++) { + const auto coord = yyjson_mut_arr(doc); + const auto vert = geom->get_vertex_xy(i); + yyjson_mut_arr_add_real(doc, coord, vert.x); + yyjson_mut_arr_add_real(doc, coord, vert.y); + yyjson_mut_arr_append(obj, coord); + } + } break; + case sgl::vertex_type::XYZ: + case sgl::vertex_type::XYZM: { + for (uint32_t i = 0; i < vertex_count; i++) { + const auto coord = yyjson_mut_arr(doc); + const auto vert = geom->get_vertex_xyzm(i); + + yyjson_mut_arr_add_real(doc, coord, vert.x); + yyjson_mut_arr_add_real(doc, coord, vert.y); + yyjson_mut_arr_add_real(doc, coord, vert.zm); + yyjson_mut_arr_append(obj, coord); + } + } break; + default: + D_ASSERT(false); + break; + } + } + + static void FormatRecursive(const sgl::geometry *geom, yyjson_mut_doc *doc, yyjson_mut_val *obj) { + switch (geom->get_type()) { + case sgl::geometry_type::POINT: { + yyjson_mut_obj_add_str(doc, obj, "type", "Point"); + FormatCoord(geom, doc, obj); + } break; + case sgl::geometry_type::LINESTRING: { + yyjson_mut_obj_add_str(doc, obj, "type", "LineString"); + const auto coords = yyjson_mut_arr(doc); + yyjson_mut_obj_add_val(doc, obj, "coordinates", coords); + FormatCoords(geom, doc, coords); + } break; + case sgl::geometry_type::POLYGON: { + yyjson_mut_obj_add_str(doc, obj, "type", "Polygon"); + const auto coords = yyjson_mut_arr(doc); + yyjson_mut_obj_add_val(doc, obj, "coordinates", coords); + + const auto tail = geom->get_last_part(); + auto head = tail; + if (head) { + do { + head = head->get_next(); + const auto ring = yyjson_mut_arr(doc); + FormatCoords(head, doc, ring); + yyjson_mut_arr_append(coords, ring); + } while (head != tail); + } + } break; + case sgl::geometry_type::MULTI_POINT: { + yyjson_mut_obj_add_str(doc, obj, "type", "MultiPoint"); + + const auto coords = yyjson_mut_arr(doc); + yyjson_mut_obj_add_val(doc, obj, "coordinates", coords); + + const auto tail = geom->get_last_part(); + auto head = tail; + + if (head) { + do { + head = head->get_next(); + FormatCoords(head, doc, coords); + } while (head != tail); + } + } break; + case sgl::geometry_type::MULTI_LINESTRING: { + yyjson_mut_obj_add_str(doc, obj, "type", "MultiLineString"); + + const auto coords = yyjson_mut_arr(doc); + yyjson_mut_obj_add_val(doc, obj, "coordinates", coords); + + const auto tail = geom->get_last_part(); + auto head = tail; + + if (head) { + do { + head = head->get_next(); + const auto line = yyjson_mut_arr(doc); + FormatCoords(head, doc, line); + yyjson_mut_arr_append(coords, line); + } while (head != tail); + } + } break; + case sgl::geometry_type::MULTI_POLYGON: { + yyjson_mut_obj_add_str(doc, obj, "type", "MultiPolygon"); + + const auto coords = yyjson_mut_arr(doc); + yyjson_mut_obj_add_val(doc, obj, "coordinates", coords); + + const auto tail = geom->get_last_part(); + auto head = tail; + + if (head) { + do { + head = head->get_next(); + const auto poly = yyjson_mut_arr(doc); + + const auto ring_tail = head->get_last_part(); + auto ring_head = ring_tail; + if (ring_head) { + do { + ring_head = ring_head->get_next(); + const auto ring = yyjson_mut_arr(doc); + FormatCoords(ring_head, doc, ring); + yyjson_mut_arr_append(poly, ring); + } while (ring_head != ring_tail); + } + yyjson_mut_arr_append(coords, poly); + } while (head != tail); + } + } break; + case sgl::geometry_type::MULTI_GEOMETRY: { + yyjson_mut_obj_add_str(doc, obj, "type", "GeometryCollection"); + + const auto geoms = yyjson_mut_arr(doc); + yyjson_mut_obj_add_val(doc, obj, "geometries", geoms); + + const auto tail = geom->get_last_part(); + auto head = tail; + + if (head) { + do { + head = head->get_next(); + const auto sub_geom = yyjson_mut_obj(doc); + FormatRecursive(head, doc, sub_geom); + yyjson_mut_arr_append(geoms, sub_geom); + } while (head != tail); + } + } break; + default: + D_ASSERT(false); + break; + } + } + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + JSONAllocator allocator(lstate.GetArena()); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](string_t &blob) { + const auto geom = lstate.Deserialize(blob); + + const auto doc = yyjson_mut_doc_new(allocator.GetYYJSONAllocator()); + const auto obj = yyjson_mut_obj(doc); + yyjson_mut_doc_set_root(doc, obj); + + FormatRecursive(&geom, doc, obj); + + size_t json_size = 0; + char *json_data = yyjson_mut_write_opts(doc, 0, allocator.GetYYJSONAllocator(), &json_size, nullptr); + // Because the arena allocator only resets after each pipeline invocation, we can safely just point into the + // arena here without needing to copy the data to the string heap with StringVector::AddString + return string_t {json_data, static_cast(json_size)}; + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Returns the geometry as a GeoJSON fragment + + This does not return a complete GeoJSON document, only the geometry fragment. + To construct a complete GeoJSON document or feature, look into using the DuckDB JSON extension in conjunction with this function. + This function supports geometries with Z values, but not M values. M values are ignored. + )"; + + static constexpr auto EXAMPLE = R"( + select ST_AsGeoJSON('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'::geometry); + ---- + {"type":"Polygon","coordinates":[[[0.0,0.0],[0.0,1.0],[1.0,1.0],[1.0,0.0],[0.0,0.0]]]} + + -- Convert a geometry into a full GeoJSON feature (requires the JSON extension to be loaded) + SELECT CAST({ + type: 'Feature', + geometry: ST_AsGeoJSON(ST_Point(1,2)), + properties: { + name: 'my_point' + } + } AS JSON); + ---- + {"type":"Feature","geometry":{"type":"Point","coordinates":[1.0,2.0]},"properties":{"name":"my_point"}} + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_AsGeoJSON", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::JSON()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "conversion"); + }); + } +}; + +//====================================================================================================================== +// ST_AsText +//====================================================================================================================== + +struct ST_AsText { + + //------------------------------------------------------------------------------------------------------------------ + // POINT_2D + //------------------------------------------------------------------------------------------------------------------ + static void ExecutePoint(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 1); + auto &input = args.data[0]; + auto count = args.size(); + CoreVectorOperations::Point2DToVarchar(input, result, count); + } + + //------------------------------------------------------------------------------------------------------------------ + // LINESTRING_2D + //------------------------------------------------------------------------------------------------------------------ + // TODO: We want to format these to trim trailing zeros + static void ExecuteLineString(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 1); + auto &input = args.data[0]; + auto count = args.size(); + CoreVectorOperations::LineString2DToVarchar(input, result, count); + } + + //------------------------------------------------------------------------------------------------------------------ + // POLYGON_2D + //------------------------------------------------------------------------------------------------------------------ + // TODO: We want to format these to trim trailing zeros + static void ExecutePolygon(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 1); + auto count = args.size(); + auto &input = args.data[0]; + CoreVectorOperations::Polygon2DToVarchar(input, result, count); + } + + //------------------------------------------------------------------------------------------------------------------ + // BOX_2D + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteBox(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 1); + auto count = args.size(); + auto &input = args.data[0]; + CoreVectorOperations::Box2DToVarchar(input, result, count); + } + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + // TODO: Move this to SGL once we have proper double formatting + static void ExecuteGeometry(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 1); + auto count = args.size(); + auto &input = args.data[0]; + CoreVectorOperations::GeometryToVarchar(input, result, count); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr const char *DESCRIPTION = R"( + Returns the geometry as a WKT string + )"; + + static constexpr const char *EXAMPLE = R"( + SELECT ST_AsText(ST_MakeEnvelope(0,0,1,1)); + ---- + POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0)) + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_AsText", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::VARCHAR); + + variant.SetFunction(ExecuteGeometry); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("point", GeoTypes::POINT_2D()); + variant.SetReturnType(LogicalType::VARCHAR); + + variant.SetFunction(ExecutePoint); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("linestring", GeoTypes::LINESTRING_2D()); + variant.SetReturnType(LogicalType::VARCHAR); + + variant.SetFunction(ExecuteLineString); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("polygon", GeoTypes::POLYGON_2D()); + variant.SetReturnType(LogicalType::VARCHAR); + + variant.SetFunction(ExecutePolygon); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("box", GeoTypes::BOX_2D()); + variant.SetReturnType(LogicalType::VARCHAR); + + variant.SetFunction(ExecuteBox); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "conversion"); + }); + } +}; + +//====================================================================================================================== +// ST_AsWKB +//====================================================================================================================== + +struct ST_AsWKB { + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + + UnaryExecutor::Execute( + args.data[0], result, args.size(), [&](const string_t &input) { return WKBWriter::Write(input, result); }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = "Returns the geometry as a WKB (Well-Known-Binary) blob"; + static constexpr auto EXAMPLE = R"( + SELECT ST_AsWKB('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'::GEOMETRY)::BLOB; + ---- + \x01\x03\x00\x00\x00\x01\x00\x00\x00\x05... + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_AsWKB", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::WKB_BLOB()); + + variant.SetFunction(Execute); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "conversion"); + }); + } +}; + +//====================================================================================================================== +// ST_AsHEXWKB +//====================================================================================================================== + +struct ST_AsHEXWKB { + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + vector buffer; + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &blob) { + buffer.clear(); + + WKBWriter::Write(blob, buffer); + + auto blob_size = buffer.size() * 2; // every byte is rendered as two characters + auto blob_str = StringVector::EmptyString(result, blob_size); + auto blob_ptr = blob_str.GetDataWriteable(); + + idx_t str_idx = 0; + for (auto byte : buffer) { + auto byte_a = byte >> 4; + auto byte_b = byte & 0x0F; + blob_ptr[str_idx++] = Blob::HEX_TABLE[byte_a]; + blob_ptr[str_idx++] = Blob::HEX_TABLE[byte_b]; + } + + blob_str.Finalize(); + return blob_str; + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr const char *DESCRIPTION = R"( + Returns the geometry as a HEXWKB string + )"; + + static constexpr const char *EXAMPLE = R"( + SELECT ST_AsHexWKB('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'::geometry); + ---- + 01030000000100000005000000000000000000000000000... + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_AsHEXWKB", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::VARCHAR); + + variant.SetFunction(Execute); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "conversion"); + }); + } +}; + +//====================================================================================================================== +// ST_AsSVG +//====================================================================================================================== + +struct ST_AsSVG { + + //------------------------------------------------------------------------------------------------------------------ + // SVG Formatting Functions + //------------------------------------------------------------------------------------------------------------------ + // TODO: Move this to sgl once we have proper double formatting. And make non-recursive please. + + static void FormatPoint(const sgl::geometry *geom, vector &buffer, int32_t max_digits, bool rel) { + D_ASSERT(geom->get_type() == sgl::geometry_type::POINT); + if (geom->is_empty()) { + return; + } + const auto vert = geom->get_vertex_xy(0); + if (rel) { + constexpr auto x = "x=\""; + constexpr auto y = "y=\""; + buffer.insert(buffer.end(), x, x + 3); + MathUtil::format_coord(vert.x, buffer, max_digits); + buffer.push_back('"'); + buffer.push_back(' '); + buffer.insert(buffer.end(), y, y + 3); + MathUtil::format_coord(-vert.y, buffer, max_digits); + buffer.push_back('"'); + } else { + constexpr auto cx = "cx=\""; + constexpr auto cy = "cy=\""; + buffer.insert(buffer.end(), cx, cx + 4); + MathUtil::format_coord(vert.x, buffer, max_digits); + buffer.push_back('"'); + buffer.push_back(' '); + buffer.insert(buffer.end(), cy, cy + 4); + MathUtil::format_coord(-vert.y, buffer, max_digits); + buffer.push_back('"'); + } + } + + static void FormatLineString(const sgl::geometry *geom, vector &buffer, int32_t max_digits, bool rel, + bool close) { + D_ASSERT(geom->get_type() == sgl::geometry_type::LINESTRING); + + const auto vertex_count = geom->get_count(); + if (vertex_count == 0) { + return; + } + + sgl::vertex_xy last_vert = geom->get_vertex_xy(0); + buffer.push_back('M'); + buffer.push_back(' '); + MathUtil::format_coord(last_vert.x, -last_vert.y, buffer, max_digits); + + if (vertex_count == 1) { + return; + } + + buffer.push_back(' '); + buffer.push_back(rel ? 'l' : 'L'); + + if (rel) { + for (uint32_t i = 1; i < vertex_count; i++) { + if (i == vertex_count - 1 && close) { + buffer.push_back(' '); + buffer.push_back('z'); + } else { + const auto vert = geom->get_vertex_xy(i); + const auto delta = vert - last_vert; + last_vert = vert; + buffer.push_back(' '); + MathUtil::format_coord(delta.x, -delta.y, buffer, max_digits); + } + } + } else { + for (uint32_t i = 1; i < vertex_count; i++) { + if (i == vertex_count - 1 && close) { + buffer.push_back(' '); + buffer.push_back('Z'); + } else { + const auto vert = geom->get_vertex_xy(i); + buffer.push_back(' '); + MathUtil::format_coord(vert.x, -vert.y, buffer, max_digits); + } + } + } + } + + static void FormatPolygon(const sgl::geometry *geom, vector &buffer, int32_t max_digits, bool rel) { + const auto tail = geom->get_last_part(); + auto head = tail; + if (head) { + do { + head = head->get_next(); + FormatLineString(head, buffer, max_digits, rel, true); + } while (head != tail); + } + } + + static void FormatRecursive(const sgl::geometry *geom, vector &buffer, int32_t max_digits, bool rel) { + switch (geom->get_type()) { + case sgl::geometry_type::POINT: + FormatPoint(geom, buffer, max_digits, rel); + break; + case sgl::geometry_type::LINESTRING: + FormatLineString(geom, buffer, max_digits, rel, false); + break; + case sgl::geometry_type::POLYGON: + FormatPolygon(geom, buffer, max_digits, rel); + break; + case sgl::geometry_type::MULTI_POINT: { + const auto tail = geom->get_last_part(); + auto head = tail; + if (head) { + do { + head = head->get_next(); + FormatPoint(head, buffer, max_digits, rel); + if (head != tail) { + buffer.push_back(','); + } + } while (head != tail); + } + } break; + case sgl::geometry_type::MULTI_LINESTRING: { + const auto tail = geom->get_last_part(); + auto head = tail; + if (head) { + do { + head = head->get_next(); + FormatLineString(head, buffer, max_digits, rel, false); + if (head != tail) { + buffer.push_back(' '); + } + } while (head != tail); + } + } break; + case sgl::geometry_type::MULTI_POLYGON: { + const auto tail = geom->get_last_part(); + auto head = tail; + if (head) { + do { + head = head->get_next(); + FormatPolygon(head, buffer, max_digits, rel); + if (head != tail) { + buffer.push_back(' '); + } + } while (head != tail); + } + } break; + case sgl::geometry_type::MULTI_GEOMETRY: { + const auto tail = geom->get_last_part(); + auto head = tail; + if (head) { + do { + head = head->get_next(); + FormatRecursive(head, buffer, max_digits, rel); + if (head != tail) { + buffer.push_back(';'); + } + } while (head != tail); + } + } break; + default: + D_ASSERT(false); + break; + } + } + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + vector buffer; + + TernaryExecutor::Execute( + args.data[0], args.data[1], args.data[2], result, args.size(), + [&](const string_t &blob, const bool rel, const int32_t max_digits) { + // Clear buffer + buffer.clear(); + + // Deserialize geometry + const auto geom = lstate.Deserialize(blob); + + FormatRecursive(&geom, buffer, max_digits, rel); + + return StringVector::AddString(result, buffer.data(), buffer.size()); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Convert the geometry into a SVG fragment or path + + Convert the geometry into a SVG fragment or path + The SVG fragment is returned as a string. The fragment is a path element that can be used in an SVG document. + The second boolean argument specifies whether the path should be relative or absolute. + The third argument specifies the maximum number of digits to use for the coordinates. + + Points are formatted as cx/cy using absolute coordinates or x/y using relative coordinates. + )"; + + static constexpr auto EXAMPLE = R"( + SELECT ST_AsSVG('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'::GEOMETRY, false, 15); + ---- + M 0 0 L 0 -1 1 -1 1 0 Z + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_AsSVG", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("relative", LogicalType::BOOLEAN); + variant.AddParameter("precision", LogicalType::INTEGER); + + variant.SetReturnType(LogicalType::VARCHAR); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "conversion"); + }); + } +}; + +//====================================================================================================================== +// ST_Centroid +//====================================================================================================================== +// The GEOMETRY version is currently implemented in the GEOS module + +struct ST_Centroid { + + //------------------------------------------------------------------------------------------------------------------ + // POINT_2D + //------------------------------------------------------------------------------------------------------------------ + // Provided for completeness sake + static void ExecutePoint(DataChunk &args, ExpressionState &state, Vector &result) { + result.Reference(args.data[0]); + } + + //------------------------------------------------------------------------------------------------------------------ + // LINESTRING_2D + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteLineString(DataChunk &args, ExpressionState &state, Vector &result) { + auto input = args.data[0]; + auto count = args.size(); + UnifiedVectorFormat format; + input.ToUnifiedFormat(count, format); + + auto line_vertex_entries = ListVector::GetData(input); + auto &line_vertex_vec = ListVector::GetEntry(input); + auto &line_vertex_vec_children = StructVector::GetEntries(line_vertex_vec); + auto line_x_data = FlatVector::GetData(*line_vertex_vec_children[0]); + auto line_y_vec = FlatVector::GetData(*line_vertex_vec_children[1]); + + auto &point_vertex_children = StructVector::GetEntries(result); + auto point_x_data = FlatVector::GetData(*point_vertex_children[0]); + auto point_y_data = FlatVector::GetData(*point_vertex_children[1]); + for (idx_t out_row_idx = 0; out_row_idx < count; out_row_idx++) { + + auto in_row_idx = format.sel->get_index(out_row_idx); + if (format.validity.RowIsValid(in_row_idx)) { + auto line = line_vertex_entries[in_row_idx]; + auto line_offset = line.offset; + auto line_length = line.length; + + double total_x = 0; + double total_y = 0; + double total_length = 0; + + // To calculate the centroid of a line, we calculate the centroid of each segment + // and then weight the segment centroids by the length of the segment. + // The final centroid is the sum of the weighted segment centroids divided by the total length. + for (idx_t coord_idx = line_offset; coord_idx < line_offset + line_length - 1; coord_idx++) { + auto x1 = line_x_data[coord_idx]; + auto y1 = line_y_vec[coord_idx]; + auto x2 = line_x_data[coord_idx + 1]; + auto y2 = line_y_vec[coord_idx + 1]; + + auto segment_length = sqrt((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1)); + total_length += segment_length; + total_x += (x1 + x2) * 0.5 * segment_length; + total_y += (y1 + y2) * 0.5 * segment_length; + } + + point_x_data[out_row_idx] = total_x / total_length; + point_y_data[out_row_idx] = total_y / total_length; + + } else { + FlatVector::SetNull(result, out_row_idx, true); + } + } + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // POLYGON_2D + //------------------------------------------------------------------------------------------------------------------ + static void ExecutePolygon(DataChunk &args, ExpressionState &state, Vector &result) { + auto input = args.data[0]; + auto count = args.size(); + UnifiedVectorFormat format; + input.ToUnifiedFormat(count, format); + + auto poly_entries = ListVector::GetData(input); + auto &ring_vec = ListVector::GetEntry(input); + auto ring_entries = ListVector::GetData(ring_vec); + auto &vertex_vec = ListVector::GetEntry(ring_vec); + auto &vertex_vec_children = StructVector::GetEntries(vertex_vec); + auto x_data = FlatVector::GetData(*vertex_vec_children[0]); + auto y_data = FlatVector::GetData(*vertex_vec_children[1]); + + auto ¢roid_children = StructVector::GetEntries(result); + auto centroid_x_data = FlatVector::GetData(*centroid_children[0]); + auto centroid_y_data = FlatVector::GetData(*centroid_children[1]); + + for (idx_t in_row_idx = 0; in_row_idx < count; in_row_idx++) { + if (format.validity.RowIsValid(in_row_idx)) { + auto poly = poly_entries[in_row_idx]; + auto poly_offset = poly.offset; + auto poly_length = poly.length; + + double poly_centroid_x = 0; + double poly_centroid_y = 0; + double poly_area = 0; + + // To calculate the centroid of a polygon, we calculate the centroid of each ring + // and then weight the ring centroids by the area of the ring. + // The final centroid is the sum of the weighted ring centroids divided by the total area. + for (idx_t ring_idx = poly_offset; ring_idx < poly_offset + poly_length; ring_idx++) { + auto ring = ring_entries[ring_idx]; + auto ring_offset = ring.offset; + auto ring_length = ring.length; + + double ring_centroid_x = 0; + double ring_centroid_y = 0; + double ring_area = 0; + + // To calculate the centroid of a ring, we calculate the centroid of each triangle + // and then weight the triangle centroids by the area of the triangle. + // The final centroid is the sum of the weighted triangle centroids divided by the ring area. + for (idx_t coord_idx = ring_offset; coord_idx < ring_offset + ring_length - 1; coord_idx++) { + auto x1 = x_data[coord_idx]; + auto y1 = y_data[coord_idx]; + auto x2 = x_data[coord_idx + 1]; + auto y2 = y_data[coord_idx + 1]; + + auto tri_area = (x1 * y2) - (x2 * y1); + ring_centroid_x += (x1 + x2) * tri_area; + ring_centroid_y += (y1 + y2) * tri_area; + ring_area += tri_area; + } + ring_area *= 0.5; + + ring_centroid_x /= (ring_area * 6); + ring_centroid_y /= (ring_area * 6); + + if (ring_idx == poly_offset) { + // The first ring is the outer ring, and the remaining rings are holes. + // For the outer ring, we add the area and centroid to the total area and centroid. + poly_area += ring_area; + poly_centroid_x += ring_centroid_x * ring_area; + poly_centroid_y += ring_centroid_y * ring_area; + } else { + // For holes, we subtract the area and centroid from the total area and centroid. + poly_area -= ring_area; + poly_centroid_x -= ring_centroid_x * ring_area; + poly_centroid_y -= ring_centroid_y * ring_area; + } + } + centroid_x_data[in_row_idx] = poly_centroid_x / poly_area; + centroid_y_data[in_row_idx] = poly_centroid_y / poly_area; + } else { + FlatVector::SetNull(result, in_row_idx, true); + } + } + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // BOX_2D/F + //------------------------------------------------------------------------------------------------------------------ + template + static void ExecuteBox(DataChunk &args, ExpressionState &state, Vector &result) { + auto input = args.data[0]; + auto count = args.size(); + UnifiedVectorFormat format; + input.ToUnifiedFormat(count, format); + auto &box_children = StructVector::GetEntries(input); + auto minx_data = FlatVector::GetData(*box_children[0]); + auto miny_data = FlatVector::GetData(*box_children[1]); + auto maxx_data = FlatVector::GetData(*box_children[2]); + auto maxy_data = FlatVector::GetData(*box_children[3]); + + auto ¢roid_children = StructVector::GetEntries(result); + auto centroid_x_data = FlatVector::GetData(*centroid_children[0]); + auto centroid_y_data = FlatVector::GetData(*centroid_children[1]); + + for (idx_t out_row_idx = 0; out_row_idx < count; out_row_idx++) { + auto in_row_idx = format.sel->get_index(out_row_idx); + if (format.validity.RowIsValid(in_row_idx)) { + centroid_x_data[out_row_idx] = (minx_data[in_row_idx] + maxx_data[in_row_idx]) * 0.5; + centroid_y_data[out_row_idx] = (miny_data[in_row_idx] + maxy_data[in_row_idx]) * 0.5; + } else { + FlatVector::SetNull(result, out_row_idx, true); + } + } + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + // TODO: add example & desc + static constexpr auto DESCRIPTION = ""; + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Centroid", [&](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("point", GeoTypes::POINT_2D()); + variant.SetReturnType(GeoTypes::POINT_2D()); + variant.SetFunction(ExecutePoint); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("linestring", GeoTypes::LINESTRING_2D()); + variant.SetReturnType(GeoTypes::POINT_2D()); + variant.SetFunction(ExecuteLineString); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("polygon", GeoTypes::POLYGON_2D()); + variant.SetReturnType(GeoTypes::POINT_2D()); + variant.SetFunction(ExecutePolygon); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("box", GeoTypes::BOX_2D()); + variant.SetReturnType(GeoTypes::POINT_2D()); + variant.SetFunction(ExecuteBox); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("box", GeoTypes::BOX_2DF()); + variant.SetReturnType(GeoTypes::POINT_2D()); + variant.SetFunction(ExecuteBox); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } +}; + +//====================================================================================================================== +// ST_Collect +//====================================================================================================================== + +struct ST_Collect { + + //------------------------------------------------------------------------------------------------------------------ + // Execution + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + auto &child_vec = ListVector::GetEntry(args.data[0]); + auto child_count = ListVector::GetListSize(args.data[0]); + + UnifiedVectorFormat input_vdata; + child_vec.ToUnifiedFormat(child_count, input_vdata); + + UnaryExecutor::Execute( + args.data[0], result, args.size(), [&](const list_entry_t &entry) { + const auto offset = entry.offset; + const auto length = entry.length; + + if (length == 0) { + const auto empty = sgl::multi_geometry::make_empty(); + return lstate.Serialize(result, empty); + } + + // First figure out if we have Z or M + bool has_z = false; + bool has_m = false; + + // First pass, check if we have Z or M + for (idx_t out_idx = offset; out_idx < offset + length; out_idx++) { + const auto row_idx = input_vdata.sel->get_index(out_idx); + if (!input_vdata.validity.RowIsValid(row_idx)) { + continue; + } + + auto &blob = UnifiedVectorFormat::GetData(input_vdata)[row_idx]; + + // TODO: Peek dont deserialize + const auto geom = lstate.Deserialize(blob); + has_z = has_z || geom.has_z(); + has_m = has_m || geom.has_m(); + } + + bool all_points = true; + bool all_lines = true; + bool all_polygons = true; + + sgl::geometry collection(sgl::geometry_type::INVALID, has_z, has_m); + + for (idx_t out_idx = offset; out_idx < offset + length; out_idx++) { + const auto row_idx = input_vdata.sel->get_index(out_idx); + if (!input_vdata.validity.RowIsValid(row_idx)) { + continue; + } + + auto &blob = UnifiedVectorFormat::GetData(input_vdata)[row_idx]; + // TODO: Deserialize to heap immediately + auto geom = lstate.Deserialize(blob); + + // TODO: Peek dont deserialize + if (geom.is_empty()) { + continue; + } + + all_points = all_points && geom.get_type() == sgl::geometry_type::POINT; + all_lines = all_lines && geom.get_type() == sgl::geometry_type::LINESTRING; + all_polygons = all_polygons && geom.get_type() == sgl::geometry_type::POLYGON; + + // Force Z and M so that the dimensions match + sgl::ops::force_zm(lstate.GetAllocator(), &geom, has_z, has_m, 0, 0); + + const auto mem = lstate.GetArena().Allocate(sizeof(sgl::geometry)); + const auto part = new (mem) sgl::geometry(geom); + + // Append to collection + collection.append_part(part); + } + + if (collection.is_empty()) { + // NULL's and EMPTY do not contribute to the result. + return lstate.Serialize(result, sgl::multi_geometry::make_empty()); + } + + // Figure out the type of the collection + if (all_points) { + collection.set_type(sgl::geometry_type::MULTI_POINT); + } else if (all_lines) { + collection.set_type(sgl::geometry_type::MULTI_LINESTRING); + } else if (all_polygons) { + collection.set_type(sgl::geometry_type::MULTI_POLYGON); + } else { + collection.set_type(sgl::geometry_type::MULTI_GEOMETRY); + } + + // Serialize the collection + return lstate.Serialize(result, collection); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Collects a list of geometries into a collection geometry. + - If all geometries are `POINT`'s, a `MULTIPOINT` is returned. + - If all geometries are `LINESTRING`'s, a `MULTILINESTRING` is returned. + - If all geometries are `POLYGON`'s, a `MULTIPOLYGON` is returned. + - Otherwise if the input collection contains a mix of geometry types, a `GEOMETRYCOLLECTION` is returned. + + Empty and `NULL` geometries are ignored. If all geometries are empty or `NULL`, a `GEOMETRYCOLLECTION EMPTY` is returned. + )"; + + static constexpr auto EXAMPLE = R"( + -- With all POINT's, a MULTIPOINT is returned + SELECT ST_Collect([ST_Point(1, 2), ST_Point(3, 4)]); + ---- + MULTIPOINT (1 2, 3 4) + + -- With mixed geometry types, a GEOMETRYCOLLECTION is returned + SELECT ST_Collect([ST_Point(1, 2), ST_GeomFromText('LINESTRING(3 4, 5 6)')]); + ---- + GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (3 4, 5 6)) + + -- Note that the empty geometry is ignored, so the result is a MULTIPOINT + SELECT ST_Collect([ST_Point(1, 2), NULL, ST_GeomFromText('GEOMETRYCOLLECTION EMPTY')]); + ---- + MULTIPOINT (1 2) + + -- If all geometries are empty or NULL, a GEOMETRYCOLLECTION EMPTY is returned + SELECT ST_Collect([NULL, ST_GeomFromText('GEOMETRYCOLLECTION EMPTY')]); + ---- + GEOMETRYCOLLECTION EMPTY + + -- Tip: You can use the `ST_Collect` function together with the `list()` aggregate function to collect multiple rows of geometries into a single geometry collection: + + CREATE TABLE points (geom GEOMETRY); + + INSERT INTO points VALUES (ST_Point(1, 2)), (ST_Point(3, 4)); + + SELECT ST_Collect(list(geom)) FROM points; + ---- + MULTIPOINT (1 2, 3 4) + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Collect", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geoms", LogicalType::LIST(GeoTypes::GEOMETRY())); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +//====================================================================================================================== +// ST_CollectionExtract +//====================================================================================================================== + +struct ST_CollectionExtract { + + //------------------------------------------------------------------------------------------------------------------ + // Execute (TYPED) + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteTyped(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + BinaryExecutor::Execute( + args.data[0], args.data[1], result, args.size(), [&](const string_t &blob, int32_t requested_type) { + auto geom = lstate.Deserialize(blob); + const auto type = geom.get_type(); + + const auto has_z = geom.has_z(); + const auto has_m = geom.has_m(); + + switch (requested_type) { + case 1: + switch (type) { + case sgl::geometry_type::MULTI_POINT: + case sgl::geometry_type::POINT: + return blob; + case sgl::geometry_type::MULTI_GEOMETRY: { + // collect all points + const auto points = sgl::ops::extract_points(&geom); + return lstate.Serialize(result, points); + } + case sgl::geometry_type::MULTI_LINESTRING: + case sgl::geometry_type::MULTI_POLYGON: + return lstate.Serialize(result, sgl::multi_point::make_empty(has_z, has_m)); + default: + return lstate.Serialize(result, sgl::point::make_empty(has_z, has_m)); + } + break; + case 2: + switch (type) { + case sgl::geometry_type::MULTI_LINESTRING: + case sgl::geometry_type::LINESTRING: + return blob; + case sgl::geometry_type::MULTI_GEOMETRY: { + // collect all lines + const auto lines = sgl::ops::extract_linestrings(&geom); + return lstate.Serialize(result, lines); + } + case sgl::geometry_type::MULTI_POINT: + case sgl::geometry_type::MULTI_POLYGON: + return lstate.Serialize(result, sgl::multi_linestring::make_empty(has_z, has_m)); + default: + return lstate.Serialize(result, sgl::linestring::make_empty(has_z, has_m)); + } + break; + case 3: + switch (type) { + case sgl::geometry_type::MULTI_POLYGON: + case sgl::geometry_type::POLYGON: + return blob; + case sgl::geometry_type::MULTI_GEOMETRY: { + // collect all polygons + const auto polygons = sgl::ops::extract_polygons(&geom); + return lstate.Serialize(result, polygons); + } + case sgl::geometry_type::MULTI_POINT: + case sgl::geometry_type::MULTI_LINESTRING: + return lstate.Serialize(result, sgl::multi_polygon::make_empty(has_z, has_m)); + default: + return lstate.Serialize(result, sgl::polygon::make_empty(has_z, has_m)); + } + break; + default: + throw InvalidInputException("Invalid requested type parameter for collection extract, must be 1 " + "(POINT), 2 (LINESTRING) or 3 (POLYGON)"); + } + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute (AUTO) + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteAuto(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &input) { + // TODO: Peek without deserialize + auto geom = lstate.Deserialize(input); + + if (geom.get_type() != sgl::geometry_type::MULTI_GEOMETRY) { + return input; + } + if (geom.is_empty()) { + return input; + } + + // Find the highest dimension of the geometries in the collection + // Empty geometries are ignored + const auto dim = sgl::ops::max_surface_dimension(&geom, true); + switch (dim) { + // Point case + case 0: { + const auto mpoint = sgl::ops::extract_points(&geom); + return lstate.Serialize(result, mpoint); + } + // LineString case + case 1: { + const auto mline = sgl::ops::extract_linestrings(&geom); + return lstate.Serialize(result, mline); + } + // Polygon case + case 2: { + const auto mpoly = sgl::ops::extract_polygons(&geom); + return lstate.Serialize(result, mpoly); + } + default: { + throw InternalException("Invalid dimension in collection extract"); + } + } + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Extracts geometries from a GeometryCollection into a typed multi geometry. + + If the input geometry is a GeometryCollection, the function will return a multi geometry, determined by the `type` parameter. + - if `type` = 1, returns a MultiPoint containg all the Points in the collection + - if `type` = 2, returns a MultiLineString containg all the LineStrings in the collection + - if `type` = 3, returns a MultiPolygon containg all the Polygons in the collection + + If no `type` parameters is provided, the function will return a multi geometry matching the highest "surface dimension" + of the contained geometries. E.g. if the collection contains only Points, a MultiPoint will be returned. But if the + collection contains both Points and LineStrings, a MultiLineString will be returned. Similarly, if the collection + contains Polygons, a MultiPolygon will be returned. Contained geometries of a lower surface dimension will be ignored. + + If the input geometry contains nested GeometryCollections, their geometries will be extracted recursively and included + into the final multi geometry as well. + + If the input geometry is not a GeometryCollection, the function will return the input geometry as is. + )"; + + static constexpr auto EXAMPLE = R"( + select st_collectionextract('MULTIPOINT(1 2,3 4)'::geometry, 1); + -- MULTIPOINT (1 2, 3 4) + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_CollectionExtract", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("type", LogicalType::INTEGER); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteTyped); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteAuto); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +//====================================================================================================================== +// ST_Contains +//====================================================================================================================== + +struct ST_Contains { + + //------------------------------------------------------------------------------------------------------------------ + // POINT_2D -> POLYGON_2D + //------------------------------------------------------------------------------------------------------------------ + // TODO: This should probably be revised. Im not sure if the current implementation is entirely accurate + + static void Operation(Vector &in_point, Vector &in_polygon, Vector &result, idx_t count) { + enum class Side { LEFT, RIGHT, ON }; + + in_polygon.Flatten(count); + in_point.Flatten(count); + + // Setup point vectors + auto &p_children = StructVector::GetEntries(in_point); + auto p_x_data = FlatVector::GetData(*p_children[0]); + auto p_y_data = FlatVector::GetData(*p_children[1]); + + // Setup polygon vectors + auto polygon_entries = ListVector::GetData(in_polygon); + auto &ring_vec = ListVector::GetEntry(in_polygon); + auto ring_entries = ListVector::GetData(ring_vec); + auto &coord_vec = ListVector::GetEntry(ring_vec); + auto &coord_children = StructVector::GetEntries(coord_vec); + auto x_data = FlatVector::GetData(*coord_children[0]); + auto y_data = FlatVector::GetData(*coord_children[1]); + + auto result_data = FlatVector::GetData(result); + + for (idx_t polygon_idx = 0; polygon_idx < count; polygon_idx++) { + auto polygon = polygon_entries[polygon_idx]; + auto polygon_offset = polygon.offset; + auto polygon_length = polygon.length; + bool first = true; + + // does the point lie inside the polygon? + bool contains = false; + + auto x = p_x_data[polygon_idx]; + auto y = p_y_data[polygon_idx]; + + for (idx_t ring_idx = polygon_offset; ring_idx < polygon_offset + polygon_length; ring_idx++) { + auto ring = ring_entries[ring_idx]; + auto ring_offset = ring.offset; + auto ring_length = ring.length; + + auto x1 = x_data[ring_offset]; + auto y1 = y_data[ring_offset]; + int winding_number = 0; + + for (idx_t coord_idx = ring_offset + 1; coord_idx < ring_offset + ring_length; coord_idx++) { + // foo foo foo + auto x2 = x_data[coord_idx]; + auto y2 = y_data[coord_idx]; + + if (x1 == x2 && y1 == y2) { + x1 = x2; + y1 = y2; + continue; + } + + auto y_min = std::min(y1, y2); + auto y_max = std::max(y1, y2); + + if (y > y_max || y < y_min) { + x1 = x2; + y1 = y2; + continue; + } + + auto side = Side::ON; + double side_v = ((x - x1) * (y2 - y1) - (x2 - x1) * (y - y1)); + if (side_v == 0) { + side = Side::ON; + } else if (side_v < 0) { + side = Side::LEFT; + } else { + side = Side::RIGHT; + } + + if (side == Side::ON && (((x1 <= x && x < x2) || (x1 >= x && x > x2)) || + ((y1 <= y && y < y2) || (y1 >= y && y > y2)))) { + + // return Contains::ON_EDGE; + contains = false; + break; + } else if (side == Side::LEFT && (y1 < y && y <= y2)) { + winding_number++; + } else if (side == Side::RIGHT && (y2 <= y && y < y1)) { + winding_number--; + } + + x1 = x2; + y1 = y2; + } + bool in_ring = winding_number != 0; + if (first) { + if (!in_ring) { + // if the first ring is not inside, then the point is not inside the polygon + contains = false; + break; + } else { + // if the first ring is inside, then the point is inside the polygon + // but might be inside a hole, so we continue + contains = true; + } + } else { + if (in_ring) { + // if the hole is inside, then the point is not inside the polygon + contains = false; + break; + } // else continue + } + first = false; + } + result_data[polygon_idx] = contains; + } + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + Operation(args.data[0], args.data[1], result, args.size()); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + + // TODO: Add example + static constexpr auto DESCRIPTION = ""; + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Contains", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom1", GeoTypes::POLYGON_2D()); + variant.AddParameter("geom2", GeoTypes::POINT_2D()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetFunction(Execute); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "relation"); + }); + } +}; + +//====================================================================================================================== +// ST_Dimension +//====================================================================================================================== + +struct ST_Dimension { + + //------------------------------------------------------------------------------------------------------------------ + // Execute + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &blob) { + const auto geom = lstate.Deserialize(blob); + return sgl::ops::max_surface_dimension(&geom, false); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = "Returns the dimension of a geometry."; + + static constexpr auto EXAMPLE = R"( + select st_dimension('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'::geometry); + ---- + 2 + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Dimension", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::INTEGER); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } +}; + +//====================================================================================================================== +// ST_Distance +//====================================================================================================================== + +struct ST_Distance { + + //------------------------------------------------------------------------------------------------------------------ + // Helpers + //------------------------------------------------------------------------------------------------------------------ + // TODO: Move this to SGL, into VectorOperations or deprecate. + template + static PointXY ClosestPointOnSegment(const PointXY &p, const PointXY &p1, const PointXY &p2) { + // If the segment is a Vertex, then return that Vertex + if (p1.ApproxEqualTo(p2)) { + return p1; + } + auto n1 = ((p.x - p1.x) * (p2.x - p1.x) + (p.y - p1.y) * (p2.y - p1.y)); + auto n2 = ((p2.x - p1.x) * (p2.x - p1.x) + (p2.y - p1.y) * (p2.y - p1.y)); + auto r = n1 / n2; + // If r is less than 0, then the Point is outside the segment in the p1 direction + if (r <= 0) { + return p1; + } + // If r is greater than 1, then the Point is outside the segment in the p2 direction + if (r >= 1) { + return p2; + } + // Interpolate between p1 and p2 + return PointXY(p1.x + r * (p2.x - p1.x), p1.y + r * (p2.y - p1.y)); + } + + template + static double DistanceToSegmentSquared(const PointXY &px, const PointXY &ax, const PointXY &bx) { + auto point = ClosestPointOnSegment(px, ax, bx); + auto dx = px.x - point.x; + auto dy = px.y - point.y; + return dx * dx + dy * dy; + } + + //------------------------------------------------------------------------------ + // POINT_2D/POINT_2D + //------------------------------------------------------------------------------ + static void ExecutePointPoint(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 2); + auto &left = args.data[0]; + auto &right = args.data[1]; + auto count = args.size(); + + left.Flatten(count); + right.Flatten(count); + + auto &left_entries = StructVector::GetEntries(left); + auto &right_entries = StructVector::GetEntries(right); + + auto left_x = FlatVector::GetData(*left_entries[0]); + auto left_y = FlatVector::GetData(*left_entries[1]); + auto right_x = FlatVector::GetData(*right_entries[0]); + auto right_y = FlatVector::GetData(*right_entries[1]); + + auto out_data = FlatVector::GetData(result); + for (idx_t i = 0; i < count; i++) { + out_data[i] = std::sqrt(std::pow(left_x[i] - right_x[i], 2) + std::pow(left_y[i] - right_y[i], 2)); + } + + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------ + // POINT_2D/LINESTRING_2D + //------------------------------------------------------------------------------ + static void PointLineStringOperation(Vector &in_point, Vector &in_line, Vector &result, idx_t count) { + + // Set up the point vectors + in_point.Flatten(count); + auto &p_children = StructVector::GetEntries(in_point); + auto &p_x = p_children[0]; + auto &p_y = p_children[1]; + auto p_x_data = FlatVector::GetData(*p_x); + auto p_y_data = FlatVector::GetData(*p_y); + + // Set up the line vectors + in_line.Flatten(count); + + auto &inner = ListVector::GetEntry(in_line); + auto &children = StructVector::GetEntries(inner); + auto &x = children[0]; + auto &y = children[1]; + auto x_data = FlatVector::GetData(*x); + auto y_data = FlatVector::GetData(*y); + auto lines = ListVector::GetData(in_line); + + auto result_data = FlatVector::GetData(result); + for (idx_t i = 0; i < count; i++) { + auto offset = lines[i].offset; + auto length = lines[i].length; + + double min_distance = std::numeric_limits::max(); + auto p = PointXY(p_x_data[i], p_y_data[i]); + + // Loop over the segments and find the closes one to the point + for (idx_t j = 0; j < length - 1; j++) { + auto a = PointXY(x_data[offset + j], y_data[offset + j]); + auto b = PointXY(x_data[offset + j + 1], y_data[offset + j + 1]); + + auto distance = DistanceToSegmentSquared(p, a, b); + if (distance < min_distance) { + min_distance = distance; + + if (min_distance == 0) { + break; + } + } + } + result_data[i] = std::sqrt(min_distance); + } + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + static void ExecutePointLineString(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 2); + auto &in_point = args.data[0]; + auto &in_line = args.data[1]; + auto count = args.size(); + PointLineStringOperation(in_point, in_line, result, count); + } + + static void ExecuteLineStringPoint(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 2); + auto &in_line = args.data[0]; + auto &in_point = args.data[1]; + auto count = args.size(); + PointLineStringOperation(in_point, in_line, result, count); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + // TODO: add example/description + static constexpr auto DESCRIPTION = ""; + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Distance", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("point1", GeoTypes::POINT_2D()); + variant.AddParameter("point2", GeoTypes::POINT_2D()); + variant.SetReturnType(LogicalType::DOUBLE); + variant.SetFunction(ExecutePointPoint); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("point", GeoTypes::POINT_2D()); + variant.AddParameter("linestring", GeoTypes::LINESTRING_2D()); + variant.SetReturnType(LogicalType::DOUBLE); + variant.SetFunction(ExecutePointLineString); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("linestring", GeoTypes::LINESTRING_2D()); + variant.AddParameter("point", GeoTypes::POINT_2D()); + variant.SetReturnType(LogicalType::DOUBLE); + variant.SetFunction(ExecuteLineStringPoint); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } +}; + +//====================================================================================================================== +// ST_Dump +//====================================================================================================================== + +struct ST_Dump { + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + auto count = args.size(); + + auto &geom_vec = args.data[0]; + UnifiedVectorFormat geom_format; + geom_vec.ToUnifiedFormat(count, geom_format); + + idx_t total_geom_count = 0; + idx_t total_path_count = 0; + + vector>> items; + vector path; + + for (idx_t out_row_idx = 0; out_row_idx < count; out_row_idx++) { + auto in_row_idx = geom_format.sel->get_index(out_row_idx); + + if (!geom_format.validity.RowIsValid(in_row_idx)) { + FlatVector::SetNull(result, out_row_idx, true); + continue; + } + + auto &blob = UnifiedVectorFormat::GetData(geom_format)[in_row_idx]; + auto geom = lstate.Deserialize(blob); + + // Traverse the geometries + // TODO: Move this to SGL + const sgl::geometry *part = &geom; + const sgl::geometry *root = part->get_parent(); + + path.clear(); + items.clear(); + + bool is_done = false; + while (!is_done) { + switch (part->get_type()) { + case sgl::geometry_type::POINT: + case sgl::geometry_type::LINESTRING: + case sgl::geometry_type::POLYGON: { + // Add the path + items.emplace_back(part, path); + } break; + case sgl::geometry_type::MULTI_POINT: + case sgl::geometry_type::MULTI_LINESTRING: + case sgl::geometry_type::MULTI_POLYGON: + case sgl::geometry_type::MULTI_GEOMETRY: { + if (!part->is_empty()) { + part = part->get_first_part(); + path.push_back(1); + continue; + } + } break; + default: { + throw NotImplementedException("Unsupported geometry type in ST_Dump"); + } + } + + while (true) { + const auto parent = part->get_parent(); + + if (parent == root) { + is_done = true; + break; + } + + if (part != parent->get_last_part()) { + path.back()++; + part = part->get_next(); + break; + } + + part = parent; + path.pop_back(); + } + } + + // Push to the result vector + auto result_entries = ListVector::GetData(result); + + auto geom_offset = total_geom_count; + auto geom_length = items.size(); + + result_entries[out_row_idx].length = geom_length; + result_entries[out_row_idx].offset = geom_offset; + + total_geom_count += geom_length; + + ListVector::Reserve(result, total_geom_count); + ListVector::SetListSize(result, total_geom_count); + + auto &result_list = ListVector::GetEntry(result); + auto &result_list_children = StructVector::GetEntries(result_list); + auto &result_geom_vec = result_list_children[0]; + auto &result_path_vec = result_list_children[1]; + + // The child geometries must share the same properties as the parent geometry + auto geom_data = FlatVector::GetData(*result_geom_vec); + for (idx_t i = 0; i < geom_length; i++) { + // Write the geometry + auto item_blob = std::get<0>(items[i]); + geom_data[geom_offset + i] = lstate.Serialize(*result_geom_vec, *item_blob); + + // Now write the paths + auto &path = std::get<1>(items[i]); + auto path_offset = total_path_count; + auto path_length = path.size(); + + total_path_count += path_length; + + ListVector::Reserve(*result_path_vec, total_path_count); + ListVector::SetListSize(*result_path_vec, total_path_count); + + auto path_entries = ListVector::GetData(*result_path_vec); + + path_entries[geom_offset + i].offset = path_offset; + path_entries[geom_offset + i].length = path_length; + + auto &path_data_vec = ListVector::GetEntry(*result_path_vec); + auto path_data = FlatVector::GetData(path_data_vec); + + for (idx_t j = 0; j < path_length; j++) { + path_data[path_offset + j] = path[j]; + } + } + } + + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Dumps a geometry into a list of sub-geometries and their "path" in the original geometry. + )"; + + static constexpr auto EXAMPLE = R"( + select st_dump('MULTIPOINT(1 2,3 4)'::geometry); + ---- + [{'geom': 'POINT(1 2)', 'path': [0]}, {'geom': 'POINT(3 4)', 'path': [1]}] + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + + FunctionBuilder::RegisterScalar(db, "ST_Dump", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + + variant.SetReturnType(LogicalType::LIST(LogicalType::STRUCT( + {{"geom", GeoTypes::GEOMETRY()}, {"path", LogicalType::LIST(LogicalType::INTEGER)}}))); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +//====================================================================================================================== +// ST_Extent +//====================================================================================================================== + +struct ST_Extent { + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + const auto &bbox_vec = StructVector::GetEntries(result); + const auto min_x_data = FlatVector::GetData(*bbox_vec[0]); + const auto min_y_data = FlatVector::GetData(*bbox_vec[1]); + const auto max_x_data = FlatVector::GetData(*bbox_vec[2]); + const auto max_y_data = FlatVector::GetData(*bbox_vec[3]); + + UnifiedVectorFormat input_vdata; + args.data[0].ToUnifiedFormat(args.size(), input_vdata); + const auto input_data = UnifiedVectorFormat::GetData(input_vdata); + + const auto count = args.size(); + + for (idx_t out_idx = 0; out_idx < count; out_idx++) { + const auto row_idx = input_vdata.sel->get_index(out_idx); + if (!input_vdata.validity.RowIsValid(row_idx)) { + // null in -> null out + FlatVector::SetNull(result, out_idx, true); + continue; + } + + const auto &blob = input_data[row_idx]; + const auto geom = lstate.Deserialize(blob); + + auto bbox = sgl::box_xy::smallest(); + + if (!sgl::ops::try_get_extent_xy(&geom, &bbox)) { + // no vertices -> no extent -> return null + FlatVector::SetNull(result, out_idx, true); + continue; + } + + min_x_data[out_idx] = bbox.min.x; + min_y_data[out_idx] = bbox.min.y; + max_x_data[out_idx] = bbox.max.x; + max_y_data[out_idx] = bbox.max.y; + } + + if (args.AllConstant()) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute (WKB) + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteWKB(DataChunk &args, ExpressionState &state, Vector &result) { + const auto count = args.size(); + auto &input = args.data[0]; + + UnifiedVectorFormat input_vdata; + input.ToUnifiedFormat(count, input_vdata); + + const auto &struct_vec = StructVector::GetEntries(result); + const auto min_x_data = FlatVector::GetData(*struct_vec[0]); + const auto min_y_data = FlatVector::GetData(*struct_vec[1]); + const auto max_x_data = FlatVector::GetData(*struct_vec[2]); + const auto max_y_data = FlatVector::GetData(*struct_vec[3]); + + static constexpr auto MAX_STACK_DEPTH = 128; + uint32_t recursion_stack[MAX_STACK_DEPTH] = {}; + + sgl::ops::wkb_reader reader = {}; + reader.allow_mixed_zm = true; + reader.nan_as_empty = true; + reader.stack_buf = recursion_stack; + reader.stack_cap = MAX_STACK_DEPTH; + + for (idx_t out_idx = 0; out_idx < count; out_idx++) { + const auto row_idx = input_vdata.sel->get_index(out_idx); + + if (!input_vdata.validity.RowIsValid(row_idx)) { + FlatVector::SetNull(result, out_idx, true); + continue; + } + + const auto &blob = UnifiedVectorFormat::GetData(input_vdata)[row_idx]; + + reader.buf = blob.GetDataUnsafe(); + reader.end = reader.buf + blob.GetSize(); + + sgl::box_xy bbox = {}; + size_t vertex_count = 0; + if (!sgl::ops::wkb_reader_try_parse_stats(&reader, &bbox, &vertex_count)) { + const auto error = sgl::ops::wkb_reader_get_error_message(&reader); + throw InvalidInputException("Failed to parse WKB: %s", error); + } + + if (vertex_count == 0) { + // no vertices -> no extent -> return null + FlatVector::SetNull(result, out_idx, true); + continue; + } + + // Else, write the bounding box + min_x_data[out_idx] = bbox.min.x; + min_y_data[out_idx] = bbox.min.y; + max_x_data[out_idx] = bbox.max.x; + max_y_data[out_idx] = bbox.max.y; + } + + if (args.AllConstant()) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Returns the minimal bounding box enclosing the input geometry + )"; + + // TODO: Example + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Extent", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::BOX_2D()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("wkb", GeoTypes::WKB_BLOB()); + variant.SetReturnType(GeoTypes::BOX_2D()); + + variant.SetFunction(ExecuteWKB); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } +}; + +//====================================================================================================================== +// ST_Extent_Approx +//====================================================================================================================== + +struct ST_Extent_Approx { + + //------------------------------------------------------------------------------------------------------------------ + // Execute + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + + const auto count = args.size(); + auto &input = args.data[0]; + + const auto &struct_vec = StructVector::GetEntries(result); + const auto min_x_data = FlatVector::GetData(*struct_vec[0]); + const auto min_y_data = FlatVector::GetData(*struct_vec[1]); + const auto max_x_data = FlatVector::GetData(*struct_vec[2]); + const auto max_y_data = FlatVector::GetData(*struct_vec[3]); + + UnifiedVectorFormat input_vdata; + input.ToUnifiedFormat(count, input_vdata); + const auto input_data = UnifiedVectorFormat::GetData(input_vdata); + + for (idx_t i = 0; i < count; i++) { + const auto row_idx = input_vdata.sel->get_index(i); + if (input_vdata.validity.RowIsValid(row_idx)) { + auto &blob = input_data[row_idx]; + + // Try to get the cached bounding box from the blob + Box2D bbox; + if (blob.TryGetCachedBounds(bbox)) { + min_x_data[i] = MathUtil::DoubleToFloatDown(bbox.min.x); + min_y_data[i] = MathUtil::DoubleToFloatDown(bbox.min.y); + max_x_data[i] = MathUtil::DoubleToFloatUp(bbox.max.x); + max_y_data[i] = MathUtil::DoubleToFloatUp(bbox.max.y); + } else { + // No bounding box, return null + FlatVector::SetNull(result, i, true); + } + } else { + // Null input, return null + FlatVector::SetNull(result, i, true); + } + } + + if (input.GetVectorType() == VectorType::CONSTANT_VECTOR) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + // TODO: Add docs + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Extent_Approx", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::BOX_2DF()); + + variant.SetFunction(Execute); + }); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } +}; + +//====================================================================================================================== +// ST_ExteriorRing +//====================================================================================================================== + +struct ST_ExteriorRing { + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteGeometry(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::ExecuteWithNulls( + args.data[0], result, args.size(), [&](const string_t &blob, ValidityMask &mask, const idx_t idx) { + // TODO: Peek dont deserialize + const auto geom = lstate.Deserialize(blob); + + if (geom.get_type() != sgl::geometry_type::POLYGON) { + mask.SetInvalid(idx); + return string_t {}; + } + + if (geom.is_empty()) { + const auto empty = sgl::linestring::make_empty(geom.has_z(), geom.has_m()); + return lstate.Serialize(result, empty); + } + + const auto shell = geom.get_first_part(); + return lstate.Serialize(result, *shell); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // POLYGON_2D + //------------------------------------------------------------------------------------------------------------------ + static void ExecutePolygon(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 1); + auto &poly_vec = args.data[0]; + auto poly_entries = ListVector::GetData(poly_vec); + auto &ring_vec = ListVector::GetEntry(poly_vec); + auto ring_entries = ListVector::GetData(ring_vec); + auto &vertex_vec = ListVector::GetEntry(ring_vec); + auto &vertex_vec_children = StructVector::GetEntries(vertex_vec); + auto poly_x_data = FlatVector::GetData(*vertex_vec_children[0]); + auto poly_y_data = FlatVector::GetData(*vertex_vec_children[1]); + + auto count = args.size(); + UnifiedVectorFormat poly_format; + poly_vec.ToUnifiedFormat(count, poly_format); + + // First figure out how many vertices we need + idx_t total_vertex_count = 0; + for (idx_t i = 0; i < count; i++) { + auto row_idx = poly_format.sel->get_index(i); + if (poly_format.validity.RowIsValid(row_idx)) { + auto poly = poly_entries[row_idx]; + if (poly.length != 0) { + // We only care about the exterior ring (first entry) + auto &ring = ring_entries[poly.offset]; + total_vertex_count += ring.length; + } + } + } + + // Now we can allocate the result vector + auto &line_vec = result; + ListVector::Reserve(line_vec, total_vertex_count); + ListVector::SetListSize(line_vec, total_vertex_count); + + auto line_entries = ListVector::GetData(line_vec); + auto &line_coord_vec = StructVector::GetEntries(ListVector::GetEntry(line_vec)); + auto line_data_x = FlatVector::GetData(*line_coord_vec[0]); + auto line_data_y = FlatVector::GetData(*line_coord_vec[1]); + + // Now we can fill the result vector + idx_t line_data_offset = 0; + for (idx_t i = 0; i < count; i++) { + auto row_idx = poly_format.sel->get_index(i); + if (poly_format.validity.RowIsValid(row_idx)) { + auto poly = poly_entries[row_idx]; + + if (poly.length == 0) { + line_entries[i].offset = 0; + line_entries[i].length = 0; + continue; + } + + // We only care about the exterior ring (first entry) + auto &ring = ring_entries[poly.offset]; + + auto &line_entry = line_entries[i]; + line_entry.offset = line_data_offset; + line_entry.length = ring.length; + + for (idx_t coord_idx = 0; coord_idx < ring.length; coord_idx++) { + line_data_x[line_entry.offset + coord_idx] = poly_x_data[ring.offset + coord_idx]; + line_data_y[line_entry.offset + coord_idx] = poly_y_data[ring.offset + coord_idx]; + } + + line_data_offset += ring.length; + } else { + FlatVector::SetNull(line_vec, i, true); + } + } + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = "Returns the exterior ring (shell) of a polygon geometry."; + + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_ExteriorRing", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteGeometry); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("polygon", GeoTypes::POLYGON_2D()); + variant.SetReturnType(GeoTypes::LINESTRING_2D()); + + variant.SetFunction(ExecutePolygon); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } +}; + +//====================================================================================================================== +// ST_FlipCoordinates +//====================================================================================================================== + +struct ST_FlipCoordinates { + + //------------------------------------------------------------------------------------------------------------------ + // POINT_2D + //------------------------------------------------------------------------------------------------------------------ + // TODO: We should be able to optimize these and avoid the flatten + static void ExecutePoint(DataChunk &args, ExpressionState &state, Vector &result) { + auto input = args.data[0]; + auto count = args.size(); + + // TODO: Avoid flatten + input.Flatten(count); + + auto &coords_in = StructVector::GetEntries(input); + auto x_data_in = FlatVector::GetData(*coords_in[0]); + auto y_data_in = FlatVector::GetData(*coords_in[1]); + + auto &coords_out = StructVector::GetEntries(result); + auto x_data_out = FlatVector::GetData(*coords_out[0]); + auto y_data_out = FlatVector::GetData(*coords_out[1]); + + memcpy(x_data_out, y_data_in, count * sizeof(double)); + memcpy(y_data_out, x_data_in, count * sizeof(double)); + + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // LINESTRING_2D + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteLineString(DataChunk &args, ExpressionState &state, Vector &result) { + auto input = args.data[0]; + auto count = args.size(); + + // TODO: Avoid flatten + input.Flatten(count); + + auto coord_vec_in = ListVector::GetEntry(input); + auto &coords_in = StructVector::GetEntries(coord_vec_in); + auto x_data_in = FlatVector::GetData(*coords_in[0]); + auto y_data_in = FlatVector::GetData(*coords_in[1]); + + auto coord_count = ListVector::GetListSize(input); + ListVector::Reserve(result, coord_count); + ListVector::SetListSize(result, coord_count); + + auto line_entries_in = ListVector::GetData(input); + auto line_entries_out = ListVector::GetData(result); + memcpy(line_entries_out, line_entries_in, count * sizeof(list_entry_t)); + + auto coord_vec_out = ListVector::GetEntry(result); + auto &coords_out = StructVector::GetEntries(coord_vec_out); + auto x_data_out = FlatVector::GetData(*coords_out[0]); + auto y_data_out = FlatVector::GetData(*coords_out[1]); + + memcpy(x_data_out, y_data_in, coord_count * sizeof(double)); + memcpy(y_data_out, x_data_in, coord_count * sizeof(double)); + + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // POLYGON_2D + //------------------------------------------------------------------------------------------------------------------ + static void ExecutePolygon(DataChunk &args, ExpressionState &state, Vector &result) { + auto input = args.data[0]; + auto count = args.size(); + + // TODO: Avoid flatten + input.Flatten(count); + + auto ring_vec_in = ListVector::GetEntry(input); + auto ring_count = ListVector::GetListSize(input); + + auto coord_vec_in = ListVector::GetEntry(ring_vec_in); + auto &coords_in = StructVector::GetEntries(coord_vec_in); + auto x_data_in = FlatVector::GetData(*coords_in[0]); + auto y_data_in = FlatVector::GetData(*coords_in[1]); + + auto coord_count = ListVector::GetListSize(ring_vec_in); + + ListVector::Reserve(result, ring_count); + ListVector::SetListSize(result, ring_count); + auto ring_vec_out = ListVector::GetEntry(result); + ListVector::Reserve(ring_vec_out, coord_count); + ListVector::SetListSize(ring_vec_out, coord_count); + + auto ring_entries_in = ListVector::GetData(input); + auto ring_entries_out = ListVector::GetData(result); + memcpy(ring_entries_out, ring_entries_in, count * sizeof(list_entry_t)); + + auto coord_entries_in = ListVector::GetData(ring_vec_in); + auto coord_entries_out = ListVector::GetData(ring_vec_out); + memcpy(coord_entries_out, coord_entries_in, ring_count * sizeof(list_entry_t)); + + auto coord_vec_out = ListVector::GetEntry(ring_vec_out); + auto &coords_out = StructVector::GetEntries(coord_vec_out); + auto x_data_out = FlatVector::GetData(*coords_out[0]); + auto y_data_out = FlatVector::GetData(*coords_out[1]); + + memcpy(x_data_out, y_data_in, coord_count * sizeof(double)); + memcpy(y_data_out, x_data_in, coord_count * sizeof(double)); + + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // BOX_2D + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteBox(DataChunk &args, ExpressionState &state, Vector &result) { + + auto input = args.data[0]; + auto count = args.size(); + + // TODO: Avoid flatten + input.Flatten(count); + + auto &children_in = StructVector::GetEntries(input); + auto min_x_in = FlatVector::GetData(*children_in[0]); + auto min_y_in = FlatVector::GetData(*children_in[1]); + auto max_x_in = FlatVector::GetData(*children_in[2]); + auto max_y_in = FlatVector::GetData(*children_in[3]); + + auto &children_out = StructVector::GetEntries(result); + auto min_x_out = FlatVector::GetData(*children_out[0]); + auto min_y_out = FlatVector::GetData(*children_out[1]); + auto max_x_out = FlatVector::GetData(*children_out[2]); + auto max_y_out = FlatVector::GetData(*children_out[3]); + + memcpy(min_x_out, min_y_in, count * sizeof(double)); + memcpy(min_y_out, min_x_in, count * sizeof(double)); + memcpy(max_x_out, max_y_in, count * sizeof(double)); + memcpy(max_y_out, max_x_in, count * sizeof(double)); + } + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + // TODO: Move this to SGL, make non-recursive + static void FlipPoint(ArenaAllocator &alloc, sgl::geometry *geom) { + if (!geom->is_empty()) { + const auto vertex_count = geom->get_count(); + const auto vertex_size = geom->get_vertex_size(); + const auto vertex_data = geom->get_vertex_data(); + + // Copy the vertex data + const auto new_vertex_data = alloc.AllocateAligned(vertex_count * vertex_size); + memcpy(new_vertex_data, vertex_data, vertex_count * vertex_size); + + // Flip the x and y coordinates + const auto vertex_ptr = reinterpret_cast(new_vertex_data); + std::swap(vertex_ptr[0], vertex_ptr[1]); + + // Update the vertex data + geom->set_vertex_data(new_vertex_data, 1); + } + } + + static void FlipLineString(ArenaAllocator &alloc, sgl::geometry *geom) { + if (!geom->is_empty()) { + const auto vertex_count = geom->get_count(); + const auto vertex_size = geom->get_vertex_size(); + const auto vertex_data = geom->get_vertex_data(); + + // Copy the vertex data + const auto new_vertex_data = alloc.AllocateAligned(vertex_count * vertex_size); + memcpy(new_vertex_data, vertex_data, vertex_count * vertex_size); + + // Flip the x and y coordinates + for (idx_t i = 0; i < vertex_count; i++) { + const auto x_ptr = reinterpret_cast(new_vertex_data + i * vertex_size); + const auto y_ptr = reinterpret_cast(new_vertex_data + i * vertex_size + sizeof(double)); + + std::swap(*x_ptr, *y_ptr); + } + + // Update the vertex data + geom->set_vertex_data(new_vertex_data, vertex_count); + } + } + + static void FlipPolygon(ArenaAllocator &alloc, sgl::geometry *geom) { + const auto tail = geom->get_last_part(); + auto head = tail; + if (head) { + do { + head = head->get_next(); + FlipLineString(alloc, head); + } while (head != tail); + } + } + + static void FlipRecursive(ArenaAllocator &alloc, sgl::geometry *geom) { + switch (geom->get_type()) { + case sgl::geometry_type::POINT: + FlipPoint(alloc, geom); + break; + case sgl::geometry_type::LINESTRING: + FlipLineString(alloc, geom); + break; + case sgl::geometry_type::POLYGON: + FlipPolygon(alloc, geom); + break; + case sgl::geometry_type::MULTI_POINT: { + const auto tail = geom->get_last_part(); + auto head = tail; + if (head) { + do { + FlipPoint(alloc, head); + head = head->get_next(); + } while (head != tail); + } + } break; + case sgl::geometry_type::MULTI_LINESTRING: { + const auto tail = geom->get_last_part(); + auto head = tail; + if (head) { + do { + FlipLineString(alloc, head); + head = head->get_next(); + } while (head != tail); + } + } break; + case sgl::geometry_type::MULTI_POLYGON: { + const auto tail = geom->get_last_part(); + auto head = tail; + if (head) { + do { + FlipPolygon(alloc, head); + head = head->get_next(); + } while (head != tail); + } + } break; + case sgl::geometry_type::MULTI_GEOMETRY: { + const auto tail = geom->get_last_part(); + auto head = tail; + if (head) { + do { + FlipRecursive(alloc, head); + head = head->get_next(); + } while (head != tail); + } + } break; + default: + D_ASSERT(false); + break; + } + } + + static void ExecuteGeometry(DataChunk &args, ExpressionState &state, Vector &result) { + + auto input = args.data[0]; + auto count = args.size(); + + UnaryExecutor::Execute(input, result, count, [&](const string_t &blob) { + // This is pretty memory intensive, so reset arena after each call + auto &lstate = LocalState::ResetAndGet(state); + auto &arena = lstate.GetArena(); + + // Deserialize the geometry + auto geom = lstate.Deserialize(blob); + // Flip the coordinates + FlipRecursive(arena, &geom); + + // Serialize the result + return lstate.Serialize(result, geom); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Description + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Returns a new geometry with the coordinates of the input geometry "flipped" so that x = y and y = x + )"; + + // TODO: Add example + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_FlipCoordinates", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteGeometry); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("point", GeoTypes::POINT_2D()); + variant.SetReturnType(GeoTypes::POINT_2D()); + + variant.SetFunction(ExecutePoint); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("linestring", GeoTypes::LINESTRING_2D()); + variant.SetReturnType(GeoTypes::LINESTRING_2D()); + + variant.SetFunction(ExecuteLineString); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("polygon", GeoTypes::POLYGON_2D()); + variant.SetReturnType(GeoTypes::POLYGON_2D()); + + variant.SetFunction(ExecutePolygon); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("box", GeoTypes::BOX_2D()); + variant.SetReturnType(GeoTypes::BOX_2D()); + + variant.SetFunction(ExecuteBox); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +//====================================================================================================================== +// ST_Force 2D/3DZ/3DM/4D +//====================================================================================================================== + +template +struct ST_ForceBase { + + //------------------------------------------------------------------------------------------------------------------ + // Execute + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + auto &alloc = lstate.GetAllocator(); + + auto has_z = IMPL::HAS_Z; + auto has_m = IMPL::HAS_M; + + auto &input = args.data[0]; + const auto count = args.size(); + + // TODO: This can be optimized to avoid de/serialization if the vertex type already matches + + if (has_z && has_m) { + auto &z_values = args.data[1]; + auto &m_values = args.data[2]; + + TernaryExecutor::Execute( + input, z_values, m_values, result, count, [&](const string_t &blob, double z, double m) { + auto geom = lstate.Deserialize(blob); + sgl::ops::force_zm(alloc, &geom, true, true, z, m); + return lstate.Serialize(result, geom); + }); + + return; + } + + if (has_z || has_m) { + auto &zm_values = args.data[1]; + + BinaryExecutor::Execute( + input, zm_values, result, count, [&](const string_t &blob, double zm) { + const auto def_z = has_z ? zm : 0; + const auto def_m = has_m ? zm : 0; + + auto geom = lstate.Deserialize(blob); + sgl::ops::force_zm(alloc, &geom, has_z, has_m, def_z, def_m); + return lstate.Serialize(result, geom); + }); + + return; + } + + UnaryExecutor::Execute(input, result, count, [&](const string_t &blob) { + auto geom = lstate.Deserialize(blob); + sgl::ops::force_zm(alloc, &geom, false, false, 0, 0); + return lstate.Serialize(result, geom); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, IMPL::NAME, [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription(IMPL::DESCRIPTION); + func.SetExample(IMPL::EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +struct ST_Force2D : ST_ForceBase { + static auto constexpr NAME = "ST_Force2D"; + static auto constexpr HAS_Z = false; + static auto constexpr HAS_M = false; + static auto constexpr EXAMPLE = ""; + static auto constexpr DESCRIPTION = R"( + Forces the vertices of a geometry to have X and Y components + + This function will drop any Z and M values from the input geometry, if present. If the input geometry is already 2D, it will be returned as is. + )"; +}; + +struct ST_Force3DZ : ST_ForceBase { + static auto constexpr NAME = "ST_Force3DZ"; + static auto constexpr HAS_Z = true; + static auto constexpr HAS_M = false; + static auto constexpr EXAMPLE = ""; + static auto constexpr DESCRIPTION = R"( + Forces the vertices of a geometry to have X, Y and Z components + + The following cases apply: + - If the input geometry has a M component but no Z component, the M component will be replaced with the new Z value. + - If the input geometry has a Z component but no M component, it will be returned as is. + - If the input geometry has both a Z component and a M component, the M component will be removed. + - Otherwise, if the input geometry has neither a Z or M component, the new Z value will be added to the vertices of the input geometry. + )"; +}; + +struct ST_Force3DM : ST_ForceBase { + static auto constexpr NAME = "ST_Force3DM"; + static auto constexpr HAS_Z = false; + static auto constexpr HAS_M = true; + static auto constexpr EXAMPLE = ""; + static auto constexpr DESCRIPTION = R"( + Forces the vertices of a geometry to have X, Y and M components + + The following cases apply: + - If the input geometry has a Z component but no M component, the Z component will be replaced with the new M value. + - If the input geometry has a M component but no Z component, it will be returned as is. + - If the input geometry has both a Z component and a M component, the Z component will be removed. + - Otherwise, if the input geometry has neither a Z or M component, the new M value will be added to the vertices of the input geometry. + )"; +}; + +struct ST_Force4D : ST_ForceBase { + static auto constexpr NAME = "ST_Force4D"; + static auto constexpr HAS_Z = true; + static auto constexpr HAS_M = true; + static auto constexpr EXAMPLE = ""; + static auto constexpr DESCRIPTION = R"( + Forces the vertices of a geometry to have X, Y, Z and M components + + The following cases apply: + - If the input geometry has a Z component but no M component, the new M value will be added to the vertices of the input geometry. + - If the input geometry has a M component but no Z component, the new Z value will be added to the vertices of the input geometry. + - If the input geometry has both a Z component and a M component, the geometry will be returned as is. + - Otherwise, if the input geometry has neither a Z or M component, the new Z and M values will be added to the vertices of the input geometry. + )"; +}; + +//====================================================================================================================== +// ST_GeometryType +//====================================================================================================================== + +struct ST_GeometryType { + + //------------------------------------------------------------------------------------------------------------------ + // Binding + //------------------------------------------------------------------------------------------------------------------ + // This function is a bit botched, but we cant change it without breaking backwards compatability + // therefore, we use these constants for the geometry type values, instead of the normal type enum + + static constexpr uint8_t LEGACY_POINT_TYPE = 0; + static constexpr uint8_t LEGACY_LINESTRING_TYPE = 1; + static constexpr uint8_t LEGACY_POLYGON_TYPE = 2; + static constexpr uint8_t LEGACY_MULTIPOINT_TYPE = 3; + static constexpr uint8_t LEGACY_MULTILINESTRING_TYPE = 4; + static constexpr uint8_t LEGACY_MULTIPOLYGON_TYPE = 5; + static constexpr uint8_t LEGACY_GEOMETRYCOLLECTION_TYPE = 6; + static constexpr uint8_t LEGACY_UNKNOWN_TYPE = 7; + + static unique_ptr Bind(ClientContext &context, ScalarFunction &bound_function, + vector> &arguments) { + // Create an enum type for all geometry types + // Ensure that these are in the same order as the GeometryType enum + const vector enum_values = {"POINT", "LINESTRING", "POLYGON", "MULTIPOINT", "MULTILINESTRING", + "MULTIPOLYGON", "GEOMETRYCOLLECTION", + // or... + "UNKNOWN"}; + + bound_function.return_type = GeoTypes::CreateEnumType("GEOMETRY_TYPE", enum_values); + return nullptr; + } + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteGeometry(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &blob) { + // TODO: Peek dont deserialize + + const auto geom = lstate.Deserialize(blob); + switch (geom.get_type()) { + case sgl::geometry_type::POINT: + return LEGACY_POINT_TYPE; + case sgl::geometry_type::LINESTRING: + return LEGACY_LINESTRING_TYPE; + case sgl::geometry_type::POLYGON: + return LEGACY_POLYGON_TYPE; + case sgl::geometry_type::MULTI_POINT: + return LEGACY_MULTIPOINT_TYPE; + case sgl::geometry_type::MULTI_LINESTRING: + return LEGACY_MULTILINESTRING_TYPE; + case sgl::geometry_type::MULTI_POLYGON: + return LEGACY_MULTIPOLYGON_TYPE; + case sgl::geometry_type::MULTI_GEOMETRY: + return LEGACY_GEOMETRYCOLLECTION_TYPE; + default: + return LEGACY_UNKNOWN_TYPE; + } + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // POINT_2D + //------------------------------------------------------------------------------------------------------------------ + static void ExecutePoint(DataChunk &args, ExpressionState &state, Vector &result) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + *ConstantVector::GetData(result) = LEGACY_POINT_TYPE; + } + + //------------------------------------------------------------------------------------------------------------------ + // LINESTRING_2D + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteLineString(DataChunk &args, ExpressionState &state, Vector &result) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + *ConstantVector::GetData(result) = LEGACY_LINESTRING_TYPE; + } + + //------------------------------------------------------------------------------------------------------------------ + // POLYGON_2D + //------------------------------------------------------------------------------------------------------------------ + static void ExecutePolygon(DataChunk &args, ExpressionState &state, Vector &result) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + *ConstantVector::GetData(result) = LEGACY_POLYGON_TYPE; + } + + //------------------------------------------------------------------------------------------------------------------ + // WKB + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteWKB(DataChunk &args, ExpressionState &state, Vector &result) { + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &blob) { + BinaryReader cursor(blob.GetData(), blob.GetSize()); + + const auto le = cursor.Read(); + const auto type = le ? cursor.Read() : cursor.ReadBE(); + const auto normalized_type = (type & 0xffff) % 1000; + + if (normalized_type == 0 || normalized_type > 7) { + return LEGACY_UNKNOWN_TYPE; + } + + // Return the geometry type + // Subtract 1 since the WKB type is 1-indexed + return static_cast(normalized_type - 1); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Returns a 'GEOMETRY_TYPE' enum identifying the input geometry type. Possible enum return types are: `POINT`, `LINESTRING`, `POLYGON`, `MULTIPOINT`, `MULTILINESTRING`, `MULTIPOLYGON`, and `GEOMETRYCOLLECTION`. + )"; + + static constexpr auto EXAMPLE = R"( + SELECT DISTINCT ST_GeometryType(ST_GeomFromText('POINT(1 1)')); + ---- + POINT + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_GeometryType", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalTypeId::ANY); + + variant.SetBind(Bind); + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteGeometry); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("point", GeoTypes::POINT_2D()); + variant.SetReturnType(LogicalTypeId::ANY); + + variant.SetBind(Bind); + variant.SetFunction(ExecutePoint); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("linestring", GeoTypes::LINESTRING_2D()); + variant.SetReturnType(LogicalTypeId::ANY); + + variant.SetBind(Bind); + variant.SetFunction(ExecuteLineString); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("polygon", GeoTypes::POLYGON_2D()); + variant.SetReturnType(LogicalTypeId::ANY); + + variant.SetBind(Bind); + variant.SetFunction(ExecutePolygon); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("wkb", GeoTypes::WKB_BLOB()); + variant.SetReturnType(LogicalTypeId::ANY); + + variant.SetBind(Bind); + variant.SetFunction(ExecuteWKB); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } +}; + +//====================================================================================================================== +// ST_GeomFromHEXWKB +//====================================================================================================================== + +struct ST_GeomFromHEXWKB { + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + // TODO: Move this into SGL + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 1); + auto &input = args.data[0]; + auto count = args.size(); + + auto &lstate = LocalState::ResetAndGet(state); + auto &alloc = lstate.GetAllocator(); + + constexpr auto MAX_STACK_DEPTH = 128; + uint32_t recursion_stack[MAX_STACK_DEPTH]; + + sgl::ops::wkb_reader reader = {}; + reader.copy_vertices = false; + reader.alloc = &alloc; + reader.allow_mixed_zm = true; + reader.nan_as_empty = true; + + reader.stack_buf = recursion_stack; + reader.stack_cap = MAX_STACK_DEPTH; + + UnaryExecutor::Execute(input, result, count, [&](const string_t &input_hex) { + const auto hex_size = input_hex.GetSize(); + const auto hex_ptr = const_data_ptr_cast(input_hex.GetData()); + + if (hex_size % 2 == 1) { + throw InvalidInputException("Invalid HEX WKB string, length must be even."); + } + + const auto blob_size = hex_size / 2; + + const unique_ptr wkb_blob(new data_t[blob_size]); + const auto blob_ptr = wkb_blob.get(); + auto blob_idx = 0; + for (idx_t hex_idx = 0; hex_idx < hex_size; hex_idx += 2) { + const auto byte_a = Blob::HEX_MAP[hex_ptr[hex_idx]]; + const auto byte_b = Blob::HEX_MAP[hex_ptr[hex_idx + 1]]; + D_ASSERT(byte_a != -1); + D_ASSERT(byte_b != -1); + + blob_ptr[blob_idx++] = (byte_a << 4) + byte_b; + } + + reader.buf = reinterpret_cast(blob_ptr); + reader.end = reader.buf + blob_size; + + sgl::geometry geom(sgl::geometry_type::INVALID); + + if (!sgl::ops::wkb_reader_try_parse(&reader, &geom)) { + const auto error = sgl::ops::wkb_reader_get_error_message(&reader); + throw InvalidInputException("Could not parse HEX WKB string: %s", error); + } + + // Enforce that we have a cohesive ZM layout + if (reader.has_mixed_zm) { + sgl::ops::force_zm(alloc, &geom, reader.has_any_z, reader.has_any_m, 0, 0); + } + + return lstate.Serialize(result, geom); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + // TODO: Add docs + static constexpr auto DESCRIPTION = R"( + Deserialize a GEOMETRY from a HEX(E)WKB encoded string + + DuckDB spatial doesnt currently differentiate between `WKB` and `EWKB`, so `ST_GeomFromHEXWKB` and `ST_GeomFromHEXEWKB" are just aliases of eachother. + )"; + + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + + // Our WKB reader also parses EWKB, even though it will just ignore SRID's. + // so we'll just add an alias for now. In the future, once we actually handle + // EWKB and store SRID's, these functions should differentiate between + // the two formats. + + for (const auto &alias : {"ST_GeomFromHEXWKB", "ST_GeomFromHEXEWKB"}) { + FunctionBuilder::RegisterScalar(db, alias, [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("hexwkb", LogicalType::VARCHAR); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } + } +}; + +//====================================================================================================================== +// ST_GeomFromGeoJSON +//====================================================================================================================== + +struct ST_GeomFromGeoJSON { + + //------------------------------------------------------------------------------------------------------------------ + // GEOJSON -> GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + // TODO: Move this into SGL and make non-recursive + // At least rewrite, its kind of a mess right now. + + static sgl::geometry PointFromGeoJSON(yyjson_val *coord_array, ArenaAllocator &arena, const string_t &raw, + bool &has_z) { + auto len = yyjson_arr_size(coord_array); + if (len == 0) { + // empty point + return sgl::point::make_empty(has_z); + } + if (len < 2) { + throw InvalidInputException("GeoJSON input coordinates field is not an array of at least length 2: %s", + raw.GetString()); + } + auto x_val = yyjson_arr_get_first(coord_array); + if (!yyjson_is_num(x_val)) { + throw InvalidInputException("GeoJSON input coordinates field is not an array of numbers: %s", + raw.GetString()); + } + auto y_val = yyjson_arr_get(coord_array, 1); + if (!yyjson_is_num(y_val)) { + throw InvalidInputException("GeoJSON input coordinates field is not an array of numbers: %s", + raw.GetString()); + } + + auto x = yyjson_get_num(x_val); + auto y = yyjson_get_num(y_val); + + auto geom_has_z = len > 2; + if (geom_has_z) { + has_z = true; + auto z_val = yyjson_arr_get(coord_array, 2); + if (!yyjson_is_num(z_val)) { + throw InvalidInputException("GeoJSON input coordinates field is not an array of numbers: %s", + raw.GetString()); + } + auto z = yyjson_get_num(z_val); + auto mem = arena.AllocateAligned(sizeof(double) * 3); + auto ptr = reinterpret_cast(mem); + + ptr[0] = x; + ptr[1] = y; + ptr[2] = z; + + auto point = sgl::point::make_empty(true); + point.set_vertex_data(mem, 1); + return point; + } else { + auto mem = arena.AllocateAligned(sizeof(double) * 2); + auto ptr = reinterpret_cast(mem); + + ptr[0] = x; + ptr[1] = y; + + auto point = sgl::point::make_empty(false); + point.set_vertex_data(mem, 1); + return point; + } + } + + static sgl::geometry VerticesFromGeoJSON(yyjson_val *coord_array, ArenaAllocator &arena, const string_t &raw, + bool &has_z) { + auto len = yyjson_arr_size(coord_array); + if (len == 0) { + // Empty + return sgl::linestring::make_empty(has_z, false); + } + + // Sniff the coordinates to see if we have Z + bool has_any_z = false; + size_t idx, max; + yyjson_val *coord; + yyjson_arr_foreach(coord_array, idx, max, coord) { + if (!yyjson_is_arr(coord)) { + throw InvalidInputException("GeoJSON input coordinates field is not an array of arrays: %s", + raw.GetString()); + } + auto coord_len = yyjson_arr_size(coord); + if (coord_len > 2) { + has_any_z = true; + } else if (coord_len < 2) { + throw InvalidInputException( + "GeoJSON input coordinates field is not an array of arrays of length >= 2: %s", raw.GetString()); + } + } + + if (has_any_z) { + has_z = true; + } + + sgl::geometry verts(sgl::geometry_type::LINESTRING, has_any_z, false); + const auto vertex_size = has_any_z ? 3 : 2; + const auto mem = arena.AllocateAligned(sizeof(double) * vertex_size * len); + verts.set_vertex_data(mem, len); + + const auto vertex_data = reinterpret_cast(mem); + + yyjson_arr_foreach(coord_array, idx, max, coord) { + auto coord_len = yyjson_arr_size(coord); + auto x_val = yyjson_arr_get_first(coord); + if (!yyjson_is_num(x_val)) { + throw InvalidInputException("GeoJSON input coordinates field is not an array of arrays of numbers: %s", + raw.GetString()); + } + auto y_val = yyjson_arr_get(coord, 1); + if (!yyjson_is_num(y_val)) { + throw InvalidInputException("GeoJSON input coordinates field is not an array of arrays of numbers: %s", + raw.GetString()); + } + auto x = yyjson_get_num(x_val); + auto y = yyjson_get_num(y_val); + auto z = 0.0; + + if (coord_len > 2) { + auto z_val = yyjson_arr_get(coord, 2); + if (!yyjson_is_num(z_val)) { + throw InvalidInputException( + "GeoJSON input coordinates field is not an array of arrays of numbers: %s", raw.GetString()); + } + z = yyjson_get_num(z_val); + } + + vertex_data[idx * vertex_size] = x; + vertex_data[idx * vertex_size + 1] = y; + if (has_any_z) { + vertex_data[idx * vertex_size + 2] = z; + } + } + + return verts; + } + + static sgl::geometry LineStringFromGeoJSON(yyjson_val *coord_array, ArenaAllocator &arena, const string_t &raw, + bool &has_z) { + return VerticesFromGeoJSON(coord_array, arena, raw, has_z); + } + + static sgl::geometry PolygonFromGeoJSON(yyjson_val *coord_array, ArenaAllocator &arena, const string_t &raw, + bool &has_z) { + auto num_rings = yyjson_arr_size(coord_array); + if (num_rings == 0) { + // Empty + return sgl::polygon::make_empty(has_z, false); + } + + // Polygon + sgl::geometry polygon(sgl::geometry_type::POLYGON, has_z, false); + size_t idx, max; + yyjson_val *ring_val; + yyjson_arr_foreach(coord_array, idx, max, ring_val) { + if (!yyjson_is_arr(ring_val)) { + throw InvalidInputException("GeoJSON input coordinates field is not an array of arrays: %s", + raw.GetString()); + } + const auto mem = arena.AllocateAligned(sizeof(sgl::geometry)); + const auto ring = new (mem) sgl::geometry(VerticesFromGeoJSON(ring_val, arena, raw, has_z)); + polygon.append_part(ring); + } + + return polygon; + } + + static sgl::geometry MultiPointFromGeoJSON(yyjson_val *coord_array, ArenaAllocator &arena, const string_t &raw, + bool &has_z) { + auto num_points = yyjson_arr_size(coord_array); + if (num_points == 0) { + // Empty + return sgl::multi_point::make_empty(has_z, false); + } + + // MultiPoint + sgl::geometry multi_point(sgl::geometry_type::MULTI_POINT, has_z, false); + + size_t idx, max; + yyjson_val *point_val; + yyjson_arr_foreach(coord_array, idx, max, point_val) { + if (!yyjson_is_arr(point_val)) { + throw InvalidInputException("GeoJSON input coordinates field is not an array of arrays: %s", + raw.GetString()); + } + if (yyjson_arr_size(point_val) < 2) { + throw InvalidInputException( + "GeoJSON input coordinates field is not an array of arrays of length >= 2: %s", raw.GetString()); + } + + const auto mem = arena.AllocateAligned(sizeof(sgl::geometry)); + const auto point = new (mem) sgl::geometry(PointFromGeoJSON(point_val, arena, raw, has_z)); + multi_point.append_part(point); + } + return multi_point; + } + + static sgl::geometry MultiLineStringFromGeoJSON(yyjson_val *coord_array, ArenaAllocator &arena, const string_t &raw, + bool &has_z) { + auto num_linestrings = yyjson_arr_size(coord_array); + if (num_linestrings == 0) { + // Empty + return sgl::multi_linestring::make_empty(has_z, false); + } + + // MultiLineString + sgl::geometry multi_linestring(sgl::geometry_type::MULTI_LINESTRING, has_z, false); + + size_t idx, max; + yyjson_val *linestring_val; + yyjson_arr_foreach(coord_array, idx, max, linestring_val) { + if (!yyjson_is_arr(linestring_val)) { + throw InvalidInputException("GeoJSON input coordinates field is not an array of arrays: %s", + raw.GetString()); + } + const auto mem = arena.AllocateAligned(sizeof(sgl::geometry)); + const auto line = new (mem) sgl::geometry(LineStringFromGeoJSON(linestring_val, arena, raw, has_z)); + + multi_linestring.append_part(line); + } + + return multi_linestring; + } + + static sgl::geometry MultiPolygonFromGeoJSON(yyjson_val *coord_array, ArenaAllocator &arena, const string_t &raw, + bool &has_z) { + auto num_polygons = yyjson_arr_size(coord_array); + if (num_polygons == 0) { + // Empty + return sgl::multi_polygon::make_empty(has_z, false); + } + + // MultiPolygon + sgl::geometry multi_polygon(sgl::geometry_type::MULTI_POLYGON, has_z, false); + + size_t idx, max; + yyjson_val *polygon_val; + yyjson_arr_foreach(coord_array, idx, max, polygon_val) { + if (!yyjson_is_arr(polygon_val)) { + throw InvalidInputException("GeoJSON input coordinates field is not an array of arrays: %s", + raw.GetString()); + } + const auto mem = arena.AllocateAligned(sizeof(sgl::geometry)); + const auto polygon = new (mem) sgl::geometry(PolygonFromGeoJSON(polygon_val, arena, raw, has_z)); + + multi_polygon.append_part(polygon); + } + + return multi_polygon; + } + + static sgl::geometry GeometryCollectionFromGeoJSON(yyjson_val *root, ArenaAllocator &arena, const string_t &raw, + bool &has_z) { + auto geometries_val = yyjson_obj_get(root, "geometries"); + if (!geometries_val) { + throw InvalidInputException("GeoJSON input does not have a geometries field: %s", raw.GetString()); + } + if (!yyjson_is_arr(geometries_val)) { + throw InvalidInputException("GeoJSON input geometries field is not an array: %s", raw.GetString()); + } + auto num_geometries = yyjson_arr_size(geometries_val); + if (num_geometries == 0) { + // Empty + return sgl::multi_geometry::make_empty(has_z, false); + } + + // GeometryCollection + sgl::geometry geometry_collection(sgl::geometry_type::MULTI_GEOMETRY, has_z, false); + size_t idx, max; + yyjson_val *geometry_val; + yyjson_arr_foreach(geometries_val, idx, max, geometry_val) { + const auto mem = arena.AllocateAligned(sizeof(sgl::geometry)); + const auto geometry = new (mem) sgl::geometry(FromGeoJSON(geometry_val, arena, raw, has_z)); + + geometry_collection.append_part(geometry); + } + + return geometry_collection; + } + + static sgl::geometry FromGeoJSON(yyjson_val *root, ArenaAllocator &arena, const string_t &raw, bool &has_z) { + auto type_val = yyjson_obj_get(root, "type"); + if (!type_val) { + throw InvalidInputException("GeoJSON input does not have a type field: %s", raw.GetString()); + } + auto type_str = yyjson_get_str(type_val); + if (!type_str) { + throw InvalidInputException("GeoJSON input type field is not a string: %s", raw.GetString()); + } + + if (StringUtil::Equals(type_str, "GeometryCollection")) { + return GeometryCollectionFromGeoJSON(root, arena, raw, has_z); + } + + // Get the coordinates + auto coord_array = yyjson_obj_get(root, "coordinates"); + if (!coord_array) { + throw InvalidInputException("GeoJSON input does not have a coordinates field: %s", raw.GetString()); + } + if (!yyjson_is_arr(coord_array)) { + throw InvalidInputException("GeoJSON input coordinates field is not an array: %s", raw.GetString()); + } + + if (StringUtil::Equals(type_str, "Point")) { + return PointFromGeoJSON(coord_array, arena, raw, has_z); + } + if (StringUtil::Equals(type_str, "LineString")) { + return LineStringFromGeoJSON(coord_array, arena, raw, has_z); + } + if (StringUtil::Equals(type_str, "Polygon")) { + return PolygonFromGeoJSON(coord_array, arena, raw, has_z); + } + if (StringUtil::Equals(type_str, "MultiPoint")) { + return MultiPointFromGeoJSON(coord_array, arena, raw, has_z); + } + if (StringUtil::Equals(type_str, "MultiLineString")) { + return MultiLineStringFromGeoJSON(coord_array, arena, raw, has_z); + } + if (StringUtil::Equals(type_str, "MultiPolygon")) { + return MultiPolygonFromGeoJSON(coord_array, arena, raw, has_z); + } + throw InvalidInputException("GeoJSON input has invalid type field: %s", raw.GetString()); + } + + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 1); + auto &input = args.data[0]; + auto count = args.size(); + + auto &lstate = LocalState::ResetAndGet(state); + auto &arena = lstate.GetArena(); + + JSONAllocator json_allocator(arena); + + UnaryExecutor::Execute(input, result, count, [&](const string_t &input) { + yyjson_read_err err; + auto doc = yyjson_read_opts(const_cast(input.GetDataUnsafe()), input.GetSize(), + YYJSON_READ_ALLOW_TRAILING_COMMAS | YYJSON_READ_ALLOW_COMMENTS, + json_allocator.GetYYJSONAllocator(), &err); + + if (err.code) { + throw InvalidInputException("Could not parse GeoJSON input: %s, (%s)", err.msg, input.GetString()); + } + + auto root = yyjson_doc_get_root(doc); + if (!yyjson_is_obj(root)) { + throw InvalidInputException("Could not parse GeoJSON input: %s, (%s)", err.msg, input.GetString()); + } + + bool has_z = false; + auto geom = FromGeoJSON(root, arena, input, has_z); + if (has_z) { + // Ensure the geometries has consistent Z values + sgl::ops::force_zm(lstate.GetAllocator(), &geom, has_z, false, 0, 0); + } + + return lstate.Serialize(result, geom); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Deserializes a GEOMETRY from a GeoJSON fragment. + )"; + + static constexpr auto EXAMPLE = R"( + SELECT ST_GeomFromGeoJSON('{"type":"Point","coordinates":[1.0,2.0]}'); + ---- + POINT (1 2) + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_GeomFromGeoJSON", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geojson", LogicalType::JSON()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geojson", LogicalType::VARCHAR); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "conversion"); + }); + } +}; + +//====================================================================================================================== +// ST_GeomFromText +//====================================================================================================================== + +struct ST_GeomFromText { + + //------------------------------------------------------------------------------------------------------------------ + // Binding + //------------------------------------------------------------------------------------------------------------------ + // TODO: Remove this, this doesnt make any sense here. Invalid geometries should be handled by TRY_CAST + // + struct BindData final : public FunctionData { + explicit BindData(bool ignore_invalid) : ignore_invalid(ignore_invalid) { + } + + unique_ptr Copy() const override { + return make_uniq(ignore_invalid); + } + bool Equals(const FunctionData &other_p) const override { + return true; + } + + bool ignore_invalid = false; + }; + + static unique_ptr Bind(ClientContext &context, ScalarFunction &bound_function, + vector> &arguments) { + if (arguments.empty()) { + throw InvalidInputException("ST_GeomFromText requires at least one argument"); + } + const auto &input_type = arguments[0]->return_type; + if (input_type.id() != LogicalTypeId::VARCHAR) { + throw InvalidInputException("ST_GeomFromText requires a string argument"); + } + + bool ignore_invalid = false; + for (idx_t i = 1; i < arguments.size(); i++) { + auto &arg = arguments[i]; + if (arg->HasParameter()) { + throw InvalidInputException("Parameters are not supported in ST_GeomFromText optional arguments"); + } + if (!arg->IsFoldable()) { + throw InvalidInputException( + "Non-constant arguments are not supported in ST_GeomFromText optional arguments"); + } + if (arg->alias == "ignore_invalid") { + if (arg->return_type.id() != LogicalTypeId::BOOLEAN) { + throw InvalidInputException("ST_GeomFromText optional argument 'ignore_invalid' must be a boolean"); + } + ignore_invalid = BooleanValue::Get(ExpressionExecutor::EvaluateScalar(context, *arg)); + } + } + return make_uniq(ignore_invalid); + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + auto &alloc = lstate.GetAllocator(); + + const auto &func_expr = state.expr.Cast(); + const auto &bind_data = func_expr.bind_info->Cast(); + const auto ignore_invalid = bind_data.ignore_invalid; + + sgl::ops::wkt_reader reader = {}; + reader.alloc = &alloc; + + UnaryExecutor::ExecuteWithNulls( + args.data[0], result, args.size(), [&](const string_t &wkt, ValidityMask &mask, idx_t row_idx) { + const auto wkt_ptr = wkt.GetDataUnsafe(); + const auto wkt_len = wkt.GetSize(); + + reader.buf = wkt_ptr; + reader.end = wkt_ptr + wkt_len; + + sgl::geometry geom; + + if (!sgl::ops::wkt_reader_try_parse(&reader, &geom)) { + + if (ignore_invalid) { + mask.SetInvalid(row_idx); + return string_t {}; + } + + const auto error = sgl::ops::wkt_reader_get_error_message(&reader); + throw InvalidInputException(error); + } + + return lstate.Serialize(result, geom); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DOCUMENTATION = R"( + Deserialize a GEOMETRY from a WKT encoded string + )"; + + // TODO: add example + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_GeomFromText", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("wkt", LogicalType::VARCHAR); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetBind(Bind); + variant.SetFunction(Execute); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("wkt", LogicalType::VARCHAR); + variant.AddParameter("ignore_invalid", LogicalType::BOOLEAN); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetBind(Bind); + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription(DOCUMENTATION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "conversion"); + }); + } +}; + +//====================================================================================================================== +// ST_GeomFromWKB +//====================================================================================================================== + +struct ST_GeomFromWKB { + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteGeometry(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + auto &alloc = lstate.GetAllocator(); + + constexpr auto MAX_STACK_DEPTH = 128; + uint32_t recursion_stack[MAX_STACK_DEPTH]; + + sgl::ops::wkb_reader reader = {}; + reader.copy_vertices = false; + reader.alloc = &alloc; + reader.allow_mixed_zm = true; + reader.nan_as_empty = true; + + reader.stack_buf = recursion_stack; + reader.stack_cap = MAX_STACK_DEPTH; + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &wkb) { + reader.buf = wkb.GetDataUnsafe(); + reader.end = reader.buf + wkb.GetSize(); + + sgl::geometry geom(sgl::geometry_type::INVALID); + if (!sgl::ops::wkb_reader_try_parse(&reader, &geom)) { + const auto error = sgl::ops::wkb_reader_get_error_message(&reader); + auto msg = "Could not parse WKB input:" + error; + if (reader.error == sgl::ops::SGL_WKB_READER_UNSUPPORTED_TYPE) { + msg += "\n(You can use TRY_CAST instead to replace invalid geometries with NULL)"; + } + throw InvalidInputException(msg); + } + + if (reader.has_mixed_zm) { + sgl::ops::force_zm(alloc, &geom, reader.has_any_z, reader.has_any_m, 0, 0); + } + + return lstate.Serialize(result, geom); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // POINT_2D + //------------------------------------------------------------------------------------------------------------------ + static void ExecutePoint(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + auto &alloc = lstate.GetAllocator(); + + auto count = args.size(); + auto &input = args.data[0]; + + input.Flatten(count); + + const auto &point_children = StructVector::GetEntries(result); + const auto x_data = FlatVector::GetData(*point_children[0]); + const auto y_data = FlatVector::GetData(*point_children[1]); + + sgl::ops::wkb_reader reader = {}; + reader.copy_vertices = false; + reader.alloc = &alloc; + reader.allow_mixed_zm = true; + reader.nan_as_empty = true; + + // No recursion allowed! + reader.stack_buf = nullptr; + reader.stack_cap = 0; + + for (idx_t i = 0; i < count; i++) { + const auto &wkb = FlatVector::GetData(input)[i]; + + reader.buf = wkb.GetDataUnsafe(); + reader.end = reader.buf + wkb.GetSize(); + + sgl::geometry geom(sgl::geometry_type::INVALID); + if (!sgl::ops::wkb_reader_try_parse(&reader, &geom)) { + const auto error = sgl::ops::wkb_reader_get_error_message(&reader); + throw InvalidInputException("Could not parse WKB input: %s", error); + } + + if (geom.get_type() != sgl::geometry_type::POINT) { + throw InvalidInputException("ST_Point2DFromWKB: WKB is not a POINT"); + } + + const auto vertex = geom.get_vertex_xy(0); + + x_data[i] = vertex.x; + y_data[i] = vertex.y; + } + + if (args.AllConstant()) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // LINESTRING_2D + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteLineString(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + auto &alloc = lstate.GetAllocator(); + + D_ASSERT(args.data.size() == 1); + const auto count = args.size(); + auto &wkb_blobs = args.data[0]; + wkb_blobs.Flatten(count); + + auto &inner = ListVector::GetEntry(result); + const auto lines = ListVector::GetData(result); + const auto wkb_data = FlatVector::GetData(wkb_blobs); + + idx_t total_size = 0; + + sgl::ops::wkb_reader reader = {}; + reader.copy_vertices = false; + reader.alloc = &alloc; + reader.allow_mixed_zm = true; + reader.nan_as_empty = true; + + // No recursion allowed! + reader.stack_buf = nullptr; + reader.stack_cap = 0; + + for (idx_t i = 0; i < count; i++) { + auto wkb = wkb_data[i]; + + reader.buf = wkb.GetDataUnsafe(); + reader.end = reader.buf + wkb.GetSize(); + + sgl::geometry geom(sgl::geometry_type::INVALID); + if (!sgl::ops::wkb_reader_try_parse(&reader, &geom)) { + const auto error = sgl::ops::wkb_reader_get_error_message(&reader); + throw InvalidInputException("Could not parse WKB input: %s", error); + } + + if (geom.get_type() != sgl::geometry_type::LINESTRING) { + throw InvalidInputException("ST_LineString2DFromWKB: WKB is not a LINESTRING"); + } + + const auto line_size = geom.get_count(); + + lines[i].offset = total_size; + lines[i].length = line_size; + + ListVector::Reserve(result, total_size + line_size); + + // Since ListVector::Reserve potentially reallocates, we need to re-fetch the inner vector pointers + auto &children = StructVector::GetEntries(inner); + auto &x_child = children[0]; + auto &y_child = children[1]; + auto x_data = FlatVector::GetData(*x_child); + auto y_data = FlatVector::GetData(*y_child); + + for (idx_t j = 0; j < line_size; j++) { + const auto vertex = geom.get_vertex_xy(j); + x_data[total_size + j] = vertex.x; + y_data[total_size + j] = vertex.y; + } + + total_size += line_size; + } + + ListVector::SetListSize(result, total_size); + + if (args.AllConstant()) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // POLYGON_2D + //------------------------------------------------------------------------------------------------------------------ + static void ExecutePolygon(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + auto &alloc = lstate.GetAllocator(); + + D_ASSERT(args.data.size() == 1); + auto count = args.size(); + + // Set up input data + auto &wkb_blobs = args.data[0]; + wkb_blobs.Flatten(count); + auto wkb_data = FlatVector::GetData(wkb_blobs); + + // Set up output data + auto &ring_vec = ListVector::GetEntry(result); + auto polygons = ListVector::GetData(result); + + idx_t total_ring_count = 0; + idx_t total_point_count = 0; + + sgl::ops::wkb_reader reader = {}; + reader.copy_vertices = false; + reader.alloc = &alloc; + reader.allow_mixed_zm = true; + reader.nan_as_empty = true; + + // No recursion allowed! + reader.stack_buf = nullptr; + reader.stack_cap = 0; + + for (idx_t i = 0; i < count; i++) { + auto wkb = wkb_data[i]; + + reader.buf = wkb.GetDataUnsafe(); + reader.end = reader.buf + wkb.GetSize(); + + sgl::geometry geom(sgl::geometry_type::INVALID); + if (!sgl::ops::wkb_reader_try_parse(&reader, &geom)) { + const auto error = sgl::ops::wkb_reader_get_error_message(&reader); + throw InvalidInputException("Could not parse WKB input: %s", error); + } + + if (geom.get_type() != sgl::geometry_type::POLYGON) { + throw InvalidInputException("ST_Polygon2DFromWKB: WKB is not a POLYGON"); + } + + const auto ring_count = geom.get_count(); + + polygons[i].offset = total_ring_count; + polygons[i].length = ring_count; + + ListVector::Reserve(result, total_ring_count + ring_count); + // Since ListVector::Reserve potentially reallocates, we need to re-fetch the inner vector pointers + + const auto tail = geom.get_last_part(); + auto ring = tail; + if (ring) { + int j = 0; + do { + ring = ring->get_next(); + const auto point_count = ring->get_count(); + + ListVector::Reserve(ring_vec, total_point_count + point_count); + auto ring_entries = ListVector::GetData(ring_vec); + auto &inner = ListVector::GetEntry(ring_vec); + + auto &children = StructVector::GetEntries(inner); + auto &x_child = children[0]; + auto &y_child = children[1]; + auto x_data = FlatVector::GetData(*x_child); + auto y_data = FlatVector::GetData(*y_child); + + for (idx_t k = 0; k < point_count; k++) { + const auto vertex = ring->get_vertex_xy(k); + x_data[total_point_count + k] = vertex.x; + y_data[total_point_count + k] = vertex.y; + } + + ring_entries[total_ring_count + j].offset = total_point_count; + ring_entries[total_ring_count + j].length = point_count; + + total_point_count += point_count; + + j++; + + } while (ring != tail); + } + + total_ring_count += ring_count; + } + + ListVector::SetListSize(result, total_ring_count); + ListVector::SetListSize(ring_vec, total_point_count); + + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Deserializes a GEOMETRY from a WKB encoded blob + )"; + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Point2DFromWKB", [](ScalarFunctionBuilder &builder) { + builder.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("point", GeoTypes::POINT_2D()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecutePoint); + }); + + builder.SetDescription("Deserialize a POINT_2D from a WKB encoded blob"); + builder.SetExample(""); + builder.SetTag("ext", "spatial"); + builder.SetTag("category", "conversion"); + }); + + FunctionBuilder::RegisterScalar(db, "ST_LineString2DFromWKB", [](ScalarFunctionBuilder &builder) { + builder.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("linestring", GeoTypes::LINESTRING_2D()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteLineString); + }); + + builder.SetDescription("Deserialize a LINESTRING_2D from a WKB encoded blob"); + builder.SetExample(""); + builder.SetTag("ext", "spatial"); + builder.SetTag("category", "conversion"); + }); + + FunctionBuilder::RegisterScalar(db, "ST_Polygon2DFromWKB", [](ScalarFunctionBuilder &builder) { + builder.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("polygon", GeoTypes::POLYGON_2D()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecutePolygon); + }); + + builder.SetDescription("Deserialize a POLYGON_2D from a WKB encoded blob"); + builder.SetExample(""); + builder.SetTag("ext", "spatial"); + builder.SetTag("category", "conversion"); + }); + + FunctionBuilder::RegisterScalar(db, "ST_GeomFromWKB", [](ScalarFunctionBuilder &builder) { + builder.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("wkb", GeoTypes::WKB_BLOB()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteGeometry); + }); + + builder.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("blob", LogicalType::BLOB); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteGeometry); + }); + + builder.SetDescription(DESCRIPTION); + builder.SetExample(EXAMPLE); + builder.SetTag("ext", "spatial"); + builder.SetTag("category", "conversion"); + }); + } +}; + +//====================================================================================================================== +// ST_HasZ +//====================================================================================================================== + +struct ST_HasZ { + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteGeometry(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &blob) { + // TODO: Peek without deserializing! + const auto geom = lstate.Deserialize(blob); + return geom.has_z(); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // WKB + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteWKB(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::Execute(args.data[0], result, args.size(), [](const string_t &wkb) { + BinaryReader cursor(wkb.GetData(), wkb.GetSize()); + + const auto le = cursor.Read(); + const auto type = le ? cursor.Read() : cursor.ReadBE(); + + // Check for ISO WKB and EWKB Z flag; + const auto flags = (type & 0xffff) / 1000; + return flags == 1 || flags == 3 || ((type & 0x80000000) != 0); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = "Check if the input geometry has Z values."; + + static constexpr auto EXAMPLE = R"( + -- HasZ for a 2D geometry + SELECT ST_HasZ(ST_GeomFromText('POINT(1 1)')); + ---- + false + + -- HasZ for a 3DZ geometry + SELECT ST_HasZ(ST_GeomFromText('POINT Z(1 1 1)')); + ---- + true + + -- HasZ for a 3DM geometry + SELECT ST_HasZ(ST_GeomFromText('POINT M(1 1 1)')); + ---- + false + + -- HasZ for a 4D geometry + SELECT ST_HasZ(ST_GeomFromText('POINT ZM(1 1 1 1)')); + ---- + true + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_HasZ", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteGeometry); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("wkb", GeoTypes::WKB_BLOB()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetFunction(ExecuteWKB); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } +}; + +//====================================================================================================================== +// ST_HasM +//====================================================================================================================== + +struct ST_HasM { + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteGeometry(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &blob) { + // TODO: Peek without deserializing! + const auto geom = lstate.Deserialize(blob); + return geom.has_m(); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // WKB_BLOB + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteWKB(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::Execute(args.data[0], result, args.size(), [](const string_t &wkb) { + BinaryReader cursor(wkb.GetData(), wkb.GetSize()); + + const auto le = cursor.Read(); + const auto type = le ? cursor.Read() : cursor.ReadBE(); + + // Check for ISO WKB and EWKB M flag; + const auto flags = (type & 0xffff) / 1000; + return flags == 2 || flags == 3 || ((type & 0x40000000) != 0); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = "Check if the input geometry has M values."; + + static constexpr auto EXAMPLE = R"( + -- HasM for a 2D geometry + SELECT ST_HasM(ST_GeomFromText('POINT(1 1)')); + ---- + false + + -- HasM for a 3DZ geometry + SELECT ST_HasM(ST_GeomFromText('POINT Z(1 1 1)')); + ---- + false + + -- HasM for a 3DM geometry + SELECT ST_HasM(ST_GeomFromText('POINT M(1 1 1)')); + ---- + true + + -- HasM for a 4D geometry + SELECT ST_HasM(ST_GeomFromText('POINT ZM(1 1 1 1)')); + ---- + true + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_HasM", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteGeometry); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("wkb", GeoTypes::WKB_BLOB()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetFunction(ExecuteWKB); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } +}; + +//====================================================================================================================== +// ST_ZMFlag +//====================================================================================================================== + +struct ST_ZMFlag { + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteGeometry(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &blob) { + const auto geom = lstate.Deserialize(blob); + const auto has_z = geom.has_z(); + const auto has_m = geom.has_m(); + + if (has_z && has_m) { + return 3; + } + if (has_z) { + return 2; + } + if (has_m) { + return 1; + } + return 0; + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // WKB + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteWKB(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::Execute(args.data[0], result, args.size(), [](const string_t &wkb) { + BinaryReader cursor(wkb.GetData(), wkb.GetSize()); + + const auto le = cursor.Read(); + const auto type = le ? cursor.Read() : cursor.ReadBE(); + + // Check for ISO WKB and EWKB Z and M flags + const uint32_t iso_wkb_props = (type & 0xffff) / 1000; + const auto has_z = (iso_wkb_props == 1) || (iso_wkb_props == 3) || ((type & 0x80000000) != 0); + const auto has_m = (iso_wkb_props == 2) || (iso_wkb_props == 3) || ((type & 0x40000000) != 0); + + if (has_z && has_m) { + return 3; + } + if (has_z) { + return 2; + } + if (has_m) { + return 1; + } + return 0; + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Returns a flag indicating the presence of Z and M values in the input geometry. + 0 = No Z or M values + 1 = M values only + 2 = Z values only + 3 = Z and M values + )"; + + static constexpr auto EXAMPLE = R"( + -- ZMFlag for a 2D geometry + SELECT ST_ZMFlag(ST_GeomFromText('POINT(1 1)')); + ---- + 0 + + -- ZMFlag for a 3DZ geometry + SELECT ST_ZMFlag(ST_GeomFromText('POINT Z(1 1 1)')); + ---- + 2 + + -- ZMFlag for a 3DM geometry + SELECT ST_ZMFlag(ST_GeomFromText('POINT M(1 1 1)')); + ---- + 1 + + -- ZMFlag for a 4D geometry + SELECT ST_ZMFlag(ST_GeomFromText('POINT ZM(1 1 1 1)')); + ---- + 3 + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_ZMFlag", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::UTINYINT); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteGeometry); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("wkb", GeoTypes::WKB_BLOB()); + variant.SetReturnType(LogicalType::UTINYINT); + + variant.SetFunction(ExecuteWKB); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } +}; + +//====================================================================================================================== +// ST_Distance_Sphere +//====================================================================================================================== + +struct ST_Distance_Sphere { + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteGeometry(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + BinaryExecutor::Execute( + args.data[0], args.data[1], result, args.size(), [&](const string_t &l_blob, const string_t &r_blob) { + const auto lhs = lstate.Deserialize(l_blob); + const auto rhs = lstate.Deserialize(r_blob); + + if (lhs.get_type() != sgl::geometry_type::POINT || rhs.get_type() != sgl::geometry_type::POINT) { + throw InvalidInputException("ST_Distance_Sphere only accepts POINT geometries"); + } + + if (lhs.is_empty() || rhs.is_empty()) { + throw InvalidInputException("ST_Distance_Sphere does not accept empty geometries"); + } + + const auto lv = lhs.get_vertex_xy(0); + const auto rv = rhs.get_vertex_xy(0); + + return sgl::util::haversine_distance(lv.x, lv.y, rv.x, rv.y); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // POINT_2D + //------------------------------------------------------------------------------------------------------------------ + static void ExecutePoint(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 2); + auto &left = args.data[0]; + auto &right = args.data[1]; + auto count = args.size(); + + using POINT_TYPE = StructTypeBinary; + using DISTANCE_TYPE = PrimitiveType; + + GenericExecutor::ExecuteBinary( + left, right, result, count, [&](POINT_TYPE left, POINT_TYPE right) { + return sgl::util::haversine_distance(left.a_val, left.b_val, right.a_val, right.b_val); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Returns the haversine (great circle) distance between two geometries. + + - Only supports POINT geometries. + - Returns the distance in meters. + - The input is expected to be in WGS84 (EPSG:4326) coordinates, using a [latitude, longitude] axis order. + )"; + + // TODO: Example + static constexpr auto EXAMPLE = R"()"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Distance_Sphere", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom1", GeoTypes::GEOMETRY()); + variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::DOUBLE); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteGeometry); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("point1", GeoTypes::POINT_2D()); + variant.AddParameter("point2", GeoTypes::POINT_2D()); + variant.SetReturnType(LogicalType::DOUBLE); + + variant.SetFunction(ExecutePoint); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } +}; + +//====================================================================================================================== +// ST_Hilbert +//====================================================================================================================== +struct ST_Hilbert { + + //------------------------------------------------------------------------------------------------------------------ + // Hilbert Curve Encoding + // From (Public Domain): https://github.com/rawrunprotected/hilbert_curves + //------------------------------------------------------------------------------------------------------------------ + static uint32_t Interleave(uint32_t x) { + x = (x | (x << 8)) & 0x00FF00FF; + x = (x | (x << 4)) & 0x0F0F0F0F; + x = (x | (x << 2)) & 0x33333333; + x = (x | (x << 1)) & 0x55555555; + return x; + } + + static uint32_t HilbertEncode(uint32_t n, uint32_t x, uint32_t y) { + x = x << (16 - n); + y = y << (16 - n); + + // Initial prefix scan round, prime with x and y + uint32_t a = x ^ y; + uint32_t b = 0xFFFF ^ a; + uint32_t c = 0xFFFF ^ (x | y); + uint32_t d = x & (y ^ 0xFFFF); + uint32_t A = a | (b >> 1); + uint32_t B = (a >> 1) ^ a; + uint32_t C = ((c >> 1) ^ (b & (d >> 1))) ^ c; + uint32_t D = ((a & (c >> 1)) ^ (d >> 1)) ^ d; + + a = A; + b = B; + c = C; + d = D; + A = ((a & (a >> 2)) ^ (b & (b >> 2))); + B = ((a & (b >> 2)) ^ (b & ((a ^ b) >> 2))); + C ^= ((a & (c >> 2)) ^ (b & (d >> 2))); + D ^= ((b & (c >> 2)) ^ ((a ^ b) & (d >> 2))); + + a = A; + b = B; + c = C; + d = D; + A = ((a & (a >> 4)) ^ (b & (b >> 4))); + B = ((a & (b >> 4)) ^ (b & ((a ^ b) >> 4))); + C ^= ((a & (c >> 4)) ^ (b & (d >> 4))); + D ^= ((b & (c >> 4)) ^ ((a ^ b) & (d >> 4))); + + // Final round and projection + a = A; + b = B; + c = C; + d = D; + C ^= ((a & (c >> 8)) ^ (b & (d >> 8))); + D ^= ((b & (c >> 8)) ^ ((a ^ b) & (d >> 8))); + + // Undo transformation prefix scan + a = C ^ (C >> 1); + b = D ^ (D >> 1); + + // Recover index bits + uint32_t i0 = x ^ y; + uint32_t i1 = b | (0xFFFF ^ (i0 | a)); + + return ((Interleave(i1) << 1) | Interleave(i0)) >> (32 - 2 * n); + } + + static uint32_t FloatToUint32(float f) { + if (std::isnan(f)) { + return 0xFFFFFFFF; + } + uint32_t res; + memcpy(&res, &f, sizeof(res)); + if ((res & 0x80000000) != 0) { + res ^= 0xFFFFFFFF; + } else { + res |= 0x80000000; + } + return res; + } + + //------------------------------------------------------------------------------------------------------------------ + // BOX_2D / BOX_2F + //------------------------------------------------------------------------------------------------------------------ + template + static void ExecuteBox(DataChunk &args, ExpressionState &state, Vector &result) { + auto &input_vec = args.data[0]; + auto &bounds_vec = args.data[1]; + auto count = args.size(); + + constexpr auto max_hilbert = std::numeric_limits::max(); + + using BOX_TYPE = StructTypeQuaternary; + using UINT32_TYPE = PrimitiveType; + + GenericExecutor::ExecuteBinary( + input_vec, bounds_vec, result, count, [&](BOX_TYPE &box, BOX_TYPE &bounds) { + const auto x = box.a_val + (box.c_val - box.a_val) / static_cast(2); + const auto y = box.b_val + (box.d_val - box.b_val) / static_cast(2); + + const auto hilbert_width = max_hilbert / (bounds.c_val - bounds.a_val); + const auto hilbert_height = max_hilbert / (bounds.d_val - bounds.b_val); + + // TODO: Check for overflow + const auto hilbert_x = static_cast((x - bounds.a_val) * hilbert_width); + const auto hilbert_y = static_cast((y - bounds.b_val) * hilbert_height); + const auto h = HilbertEncode(16, hilbert_x, hilbert_y); + return UINT32_TYPE {h}; + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // LON/LAT + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteLonlat(DataChunk &args, ExpressionState &state, Vector &result) { + using DOUBLE_TYPE = PrimitiveType; + using UINT32_TYPE = PrimitiveType; + using BOX_TYPE = StructTypeQuaternary; + + auto constexpr max_hilbert = std::numeric_limits::max(); + + GenericExecutor::ExecuteTernary( + args.data[0], args.data[1], args.data[3], result, args.size(), + [&](DOUBLE_TYPE x, DOUBLE_TYPE y, BOX_TYPE &box) { + const auto hilbert_width = max_hilbert / (box.c_val - box.a_val); + const auto hilbert_height = max_hilbert / (box.d_val - box.b_val); + + // TODO: Check for overflow + const auto hilbert_x = static_cast((x.val - box.a_val) * hilbert_width); + const auto hilbert_y = static_cast((y.val - box.b_val) * hilbert_height); + const auto h = HilbertEncode(16, hilbert_x, hilbert_y); + return UINT32_TYPE {h}; + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteGeometry(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::ExecuteWithNulls( + args.data[0], result, args.size(), + [&](const geometry_t &geom, ValidityMask &mask, idx_t out_idx) -> uint32_t { + // TODO: This is shit, dont rely on cached bounds + Box2D bounds; + if (!geom.TryGetCachedBounds(bounds)) { + mask.SetInvalid(out_idx); + return 0; + } + + Box2D bounds_f; + bounds_f.min.x = MathUtil::DoubleToFloatDown(bounds.min.x); + bounds_f.min.y = MathUtil::DoubleToFloatDown(bounds.min.y); + bounds_f.max.x = MathUtil::DoubleToFloatUp(bounds.max.x); + bounds_f.max.y = MathUtil::DoubleToFloatUp(bounds.max.y); + + const auto dx = bounds_f.min.x + (bounds_f.max.x - bounds_f.min.x) / 2; + const auto dy = bounds_f.min.y + (bounds_f.max.y - bounds_f.min.y) / 2; + + const auto hx = FloatToUint32(dx); + const auto hy = FloatToUint32(dy); + + return HilbertEncode(16, hx, hy); + }); + } + + static void ExecuteGeometryWithBounds(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + auto constexpr max_hilbert = std::numeric_limits::max(); + + using BOX_TYPE = StructTypeQuaternary; + using GEOM_TYPE = PrimitiveType; + using UINT32_TYPE = PrimitiveType; + + GenericExecutor::ExecuteBinary( + args.data[0], args.data[1], result, args.size(), [&](const GEOM_TYPE &geom_type, const BOX_TYPE &bounds) { + const auto blob = geom_type.val; + + const auto geom = lstate.Deserialize(blob); + + // TODO: Dont deserialize, just get the bounds from blob instead. + sgl::box_xy geom_bounds = {}; + + if (!sgl::ops::try_get_extent_xy(&geom, &geom_bounds)) { + throw InvalidInputException("ST_Hilbert(geom, bounds) does not support empty geometries"); + } + + const auto dx = geom_bounds.min.x + (geom_bounds.max.x - geom_bounds.min.x) / 2; + const auto dy = geom_bounds.min.y + (geom_bounds.max.y - geom_bounds.min.y) / 2; + + const auto hilbert_width = max_hilbert / (bounds.c_val - bounds.a_val); + const auto hilbert_height = max_hilbert / (bounds.d_val - bounds.b_val); + // TODO: Check for overflow + const auto hilbert_x = static_cast((dx - bounds.a_val) * hilbert_width); + const auto hilbert_y = static_cast((dy - bounds.b_val) * hilbert_height); + + const auto h = HilbertEncode(16, hilbert_x, hilbert_y); + return UINT32_TYPE {h}; + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Encodes the X and Y values as the hilbert curve index for a curve covering the given bounding box. + If a geometry is provided, the center of the approximate bounding box is used as the point to encode. + If no bounding box is provided, the hilbert curve index is mapped to the full range of a single-presicion float. + For the BOX_2D and BOX_2DF variants, the center of the box is used as the point to encode. + )"; + + // TODO: example + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + // TODO: All of these needs examples and docs + + FunctionBuilder::RegisterScalar(db, "ST_Hilbert", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("x", LogicalType::DOUBLE); + variant.AddParameter("y", LogicalType::DOUBLE); + variant.AddParameter("bounds", GeoTypes::BOX_2D()); + variant.SetReturnType(LogicalType::UINTEGER); + + variant.SetFunction(ExecuteLonlat); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("bounds", GeoTypes::BOX_2D()); + variant.SetReturnType(LogicalType::UINTEGER); + + variant.SetFunction(ExecuteGeometryWithBounds); + variant.SetInit(LocalState::Init); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::UINTEGER); + + variant.SetFunction(ExecuteGeometry); + variant.SetInit(LocalState::Init); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("box", GeoTypes::BOX_2D()); + variant.AddParameter("bounds", GeoTypes::BOX_2D()); + variant.SetReturnType(LogicalType::UINTEGER); + + variant.SetFunction(ExecuteBox); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("box", GeoTypes::BOX_2DF()); + variant.AddParameter("bounds", GeoTypes::BOX_2DF()); + variant.SetReturnType(LogicalType::UINTEGER); + + variant.SetFunction(ExecuteBox); + }); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + }); + } +}; + +//====================================================================================================================== +// ST_Intersects +//====================================================================================================================== + +struct ST_Intersects { + + //------------------------------------------------------------------------------------------------------------------ + // BOX_2D + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteBox(DataChunk &args, ExpressionState &state, Vector &result) { + using BOX_TYPE = StructTypeQuaternary; + using BOOL_TYPE = PrimitiveType; + + GenericExecutor::ExecuteBinary( + args.data[0], args.data[1], result, args.size(), [&](BOX_TYPE &left, BOX_TYPE &right) { + return !(left.a_val > right.c_val || left.c_val < right.a_val || left.b_val > right.d_val || + left.d_val < right.b_val); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + // TODO: Add docs + static constexpr auto DESCRIPTION = ""; + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Intersects", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("box1", GeoTypes::BOX_2D()); + variant.AddParameter("box2", GeoTypes::BOX_2D()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetFunction(ExecuteBox); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "relation"); + }); + } +}; + +//====================================================================================================================== +// ST_IntersectsExtent +//====================================================================================================================== + +struct ST_IntersectsExtent { + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + BinaryExecutor::Execute(args.data[0], args.data[1], result, args.size(), + [&](const string_t &lhs_blob, const string_t &rhs_blob) { + // TODO: In the future we should store if the geom is + // empty/vertex count in the blob + + const auto lhs_geom = lstate.Deserialize(lhs_blob); + + sgl::box_xy lhs_ext = {}; + if (!sgl::ops::try_get_extent_xy(&lhs_geom, &lhs_ext)) { + return false; + } + + const auto rhs_geom = lstate.Deserialize(rhs_blob); + + sgl::box_xy rhs_ext = {}; + if (!sgl::ops::try_get_extent_xy(&rhs_geom, &rhs_ext)) { + return false; + } + + return lhs_ext.intersects(rhs_ext); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Returns true if the extent of two geometries intersects + )"; + + // TODO: Add examples + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Intersects_Extent", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom1", GeoTypes::GEOMETRY()); + variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "relation"); + }); + } +}; + +//====================================================================================================================== +// ST_IsClosed +//====================================================================================================================== + +struct ST_IsClosed { + + //------------------------------------------------------------------------------------------------------------------ + // Execute (GEOMETRY) + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &blob) { + const auto geom = lstate.Deserialize(blob); + switch (geom.get_type()) { + case sgl::geometry_type::LINESTRING: + return sgl::linestring::is_closed(&geom); + case sgl::geometry_type::MULTI_LINESTRING: + return sgl::multi_linestring::is_closed(&geom); + default: + // TODO: We should support more than just LINESTRING and MULTILINESTRING (like PostGIS does) + throw InvalidInputException("ST_IsClosed only accepts LINESTRING and MULTILINESTRING geometries"); + } + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = "Check if a geometry is 'closed'"; + // TODO: add example + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_IsClosed", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } +}; + +//====================================================================================================================== +// ST_IsEmpty +//====================================================================================================================== + +struct ST_IsEmpty { + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteGeometry(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &blob) { + const auto geom = lstate.Deserialize(blob); + const auto vertex_count = sgl::ops::vertex_count(&geom); + return vertex_count == 0; + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // LINESTRING_2D + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteLinestring(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::Execute(args.data[0], result, args.size(), + [&](const list_entry_t &line) { return line.length == 0; }); + } + + //------------------------------------------------------------------------------------------------------------------ + // POLYGON_2D + //------------------------------------------------------------------------------------------------------------------ + static void ExecutePolygon(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::Execute(args.data[0], result, args.size(), + [&](const list_entry_t &poly) { return poly.length == 0; }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Returns true if the geometry is "empty". + )"; + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_IsEmpty", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteGeometry); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("linestring", GeoTypes::LINESTRING_2D()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetFunction(ExecuteLinestring); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("polygon", GeoTypes::POLYGON_2D()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetFunction(ExecutePolygon); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } +}; + +//====================================================================================================================== +// ST_Length +//====================================================================================================================== + +struct ST_Length { + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteGeometry(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &blob) { + const auto geom = lstate.Deserialize(blob); + return sgl::ops::length(&geom); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // LINESTRING_2D + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteLinestring(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 1); + + auto &line_vec = args.data[0]; + auto count = args.size(); + + auto &coord_vec = ListVector::GetEntry(line_vec); + auto &coord_vec_children = StructVector::GetEntries(coord_vec); + auto x_data = FlatVector::GetData(*coord_vec_children[0]); + auto y_data = FlatVector::GetData(*coord_vec_children[1]); + + UnaryExecutor::Execute(line_vec, result, count, [&](const list_entry_t &line) { + auto offset = line.offset; + auto length = line.length; + double sum = 0; + // Loop over the segments + for (idx_t j = offset; j < offset + length - 1; j++) { + auto x1 = x_data[j]; + auto y1 = y_data[j]; + auto x2 = x_data[j + 1]; + auto y2 = y_data[j + 1]; + sum += std::sqrt(std::pow(x1 - x2, 2) + std::pow(y1 - y2, 2)); + } + return sum; + }); + + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Returns the length of the input line geometry + )"; + + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Length", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::DOUBLE); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteGeometry); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("linestring", GeoTypes::LINESTRING_2D()); + variant.SetReturnType(LogicalType::DOUBLE); + + variant.SetFunction(ExecuteLinestring); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } +}; + +//====================================================================================================================== +// ST_MakeEnvelope +//====================================================================================================================== + +struct ST_MakeEnvelope { + + //------------------------------------------------------------------------------------------------------------------ + // Execute (GEOMETRY) + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + auto &min_x_vec = args.data[0]; + auto &min_y_vec = args.data[1]; + auto &max_x_vec = args.data[2]; + auto &max_y_vec = args.data[3]; + + using DOUBLE_TYPE = PrimitiveType; + using STRING_TYPE = PrimitiveType; + + GenericExecutor::ExecuteQuaternary( + min_x_vec, min_y_vec, max_x_vec, max_y_vec, result, args.size(), + [&](const DOUBLE_TYPE vmin_x, const DOUBLE_TYPE vmin_y, const DOUBLE_TYPE vmax_x, + const DOUBLE_TYPE vmax_y) { + const auto min_x = vmin_x.val; + const auto min_y = vmin_y.val; + const auto max_x = vmax_x.val; + const auto max_y = vmax_y.val; + + // This is pretty cool, we dont even need to allocate anything + const double buffer[10] = {min_x, min_y, min_x, max_y, max_x, max_y, max_x, min_y, min_x, min_y}; + + auto ring = sgl::linestring::make_empty(false, false); + ring.set_vertex_data(reinterpret_cast(buffer), 5); + + auto poly = sgl::polygon::make_empty(); + poly.append_part(&ring); + + return lstate.Serialize(result, poly); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Create a rectangular polygon from min/max coordinates + )"; + static constexpr auto EXAMPLE = ""; // todo: example + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_MakeEnvelope", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("min_x", LogicalType::DOUBLE); + variant.AddParameter("min_y", LogicalType::DOUBLE); + variant.AddParameter("max_x", LogicalType::DOUBLE); + variant.AddParameter("max_y", LogicalType::DOUBLE); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +//====================================================================================================================== +// ST_MakeLine +//====================================================================================================================== + +struct ST_MakeLine { + + //------------------------------------------------------------------------------------------------------------------ + // Execute (LIST) + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteList(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + auto &child_vec = ListVector::GetEntry(args.data[0]); + auto child_len = ListVector::GetListSize(args.data[0]); + + UnifiedVectorFormat format; + child_vec.ToUnifiedFormat(child_len, format); + + UnaryExecutor::Execute( + args.data[0], result, args.size(), [&](const list_entry_t &entry) { + const auto offset = entry.offset; + const auto length = entry.length; + + uint32_t line_length = 0; + // First pass, filter types, count non-null entries + + for (idx_t i = offset; i < offset + length; i++) { + const auto mapped_idx = format.sel->get_index(i); + if (!format.validity.RowIsValid(mapped_idx)) { + continue; + } + auto &blob = UnifiedVectorFormat::GetData(format)[mapped_idx]; + + // TODO: Peek without deserializing + const auto geom = lstate.Deserialize(blob); + if (geom.get_type() != sgl::geometry_type::POINT) { + throw InvalidInputException("ST_MakeLine only accepts POINT geometries"); + } + + // TODO: Support Z and M + if (geom.has_z() || geom.has_m()) { + throw InvalidInputException( + "ST_MakeLine from list does not accept POINT geometries with Z or M values"); + } + + if (geom.is_empty()) { + continue; + } + + line_length++; + } + + if (line_length == 0) { + // Empty line + return lstate.Serialize(result, sgl::linestring::make_empty(false, false)); + } + + if (line_length == 1) { + throw InvalidInputException("ST_MakeLine requires zero or two or more POINT geometries"); + } + + const auto line_data = lstate.GetArena().AllocateAligned(line_length * 2 * sizeof(double)); + + // Second pass, copy over the vertex data + uint32_t vertex_idx = 0; + for (idx_t i = offset; i < offset + length; i++) { + D_ASSERT(vertex_idx < line_length); + + const auto mapped_idx = format.sel->get_index(i); + if (!format.validity.RowIsValid(mapped_idx)) { + continue; + } + auto &blob = UnifiedVectorFormat::GetData(format)[mapped_idx]; + + const auto point = lstate.Deserialize(blob); + const auto point_data = point.get_vertex_data(); + + memcpy(line_data + vertex_idx * 2 * sizeof(double), point_data, 2 * sizeof(double)); + vertex_idx++; + } + + D_ASSERT(vertex_idx == line_length); + + auto line = sgl::linestring::make_empty(false, false); + line.set_vertex_data(line_data, line_length); + + return lstate.Serialize(result, line); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute (GEOMETRY, GEOMETRY) + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteBinary(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + BinaryExecutor::Execute( + args.data[0], args.data[1], result, args.size(), [&](const string_t &l_blob, const string_t &r_blob) { + const auto l_geom = lstate.Deserialize(l_blob); + const auto r_geom = lstate.Deserialize(r_blob); + + if (l_geom.get_type() != sgl::geometry_type::POINT || r_geom.get_type() != sgl::geometry_type::POINT) { + throw InvalidInputException("ST_MakeLine only accepts POINT geometries"); + } + + if (l_geom.is_empty() && r_geom.is_empty()) { + return lstate.Serialize(result, sgl::linestring::make_empty(false, false)); + } + + if (l_geom.is_empty() || r_geom.is_empty()) { + throw InvalidInputException("ST_MakeLine requires zero or two or more POINT geometries"); + } + + const auto has_z = l_geom.has_z() || r_geom.has_z(); + const auto has_m = l_geom.has_m() || r_geom.has_m(); + + auto linestring = sgl::linestring::make_empty(has_z, has_m); + + // Create a buffer large enough to store two vertices + double buffer[8] = {0}; + + const auto v1 = l_geom.get_vertex_xyzm(0); + const auto v2 = r_geom.get_vertex_xyzm(0); + + // TODO: this is a bit ugly, add proper append method to sgl instead + idx_t idx = 0; + buffer[idx++] = v1.x; + buffer[idx++] = v1.y; + if (has_z) { + buffer[idx++] = l_geom.has_z() ? v1.zm : 0; + } + if (has_m) { + buffer[idx++] = l_geom.has_m() ? l_geom.has_z() ? v1.m : v1.zm : 0; + } + buffer[idx++] = v2.x; + buffer[idx++] = v2.y; + if (has_z) { + buffer[idx++] = r_geom.has_z() ? v2.zm : 0; + } + if (has_m) { + buffer[idx++] = r_geom.has_m() ? r_geom.has_z() ? v2.m : v2.zm : 0; + } + + linestring.set_vertex_data(reinterpret_cast(buffer), 2); + + return lstate.Serialize(result, linestring); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION_LIST = R"( + Create a LINESTRING from a list of POINT geometries + )"; + static constexpr auto EXAMPLE_LIST = R"( + SELECT ST_MakeLine([ST_Point(0, 0), ST_Point(1, 1)]); + ---- + LINESTRING(0 0, 1 1) + )"; + + static constexpr auto DESCRIPTION_BINARY = R"( + Create a LINESTRING from two POINT geometries + )"; + static constexpr auto EXAMPLE_BINARY = R"( + SELECT ST_MakeLine(ST_Point(0, 0), ST_Point(1, 1)); + ---- + LINESTRING(0 0, 1 1) + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_MakeLine", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geoms", LogicalType::LIST(GeoTypes::GEOMETRY())); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteList); + + variant.SetDescription(DESCRIPTION_LIST); + variant.SetExample(EXAMPLE_LIST); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("start", GeoTypes::GEOMETRY()); + variant.AddParameter("end", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteBinary); + + variant.SetDescription(DESCRIPTION_BINARY); + variant.SetExample(EXAMPLE_BINARY); + }); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +//====================================================================================================================== +// ST_MakePolygon +//====================================================================================================================== + +struct ST_MakePolygon { + + //------------------------------------------------------------------------------------------------------------------ + // Execute (LINESTRING) + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteFromShell(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &blob) { + auto line = lstate.Deserialize(blob); + + if (line.get_type() != sgl::geometry_type::LINESTRING) { + throw InvalidInputException("ST_MakePolygon only accepts LINESTRING geometries"); + } + + if (line.get_count() < 4) { + throw InvalidInputException("ST_MakePolygon shell requires at least 4 vertices"); + } + + if (!sgl::linestring::is_closed(&line)) { + throw std::runtime_error("ST_MakePolygon shell must be closed (first and last vertex must be equal)"); + } + + auto polygon = sgl::polygon::make_empty(line.has_z(), line.has_m()); + polygon.append_part(&line); + + return lstate.Serialize(result, polygon); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute (LINESTRING, LIST) + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteFromRings(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + auto &child_vec = ListVector::GetEntry(args.data[1]); + auto child_len = ListVector::GetListSize(args.data[1]); + + UnifiedVectorFormat child_format; + child_vec.ToUnifiedFormat(child_len, child_format); + + BinaryExecutor::Execute( + args.data[0], args.data[1], result, args.size(), [&](const string_t &blob, const list_entry_t &hole_list) { + // First, setup shell + auto shell = lstate.Deserialize(blob); + if (shell.get_type() != sgl::geometry_type::LINESTRING) { + throw InvalidInputException("ST_MakePolygon only accepts LINESTRING geometries"); + } + // TODO: Support Z and M + if (shell.has_z() || shell.has_m()) { + throw InvalidInputException("ST_MakePolygon from list does not support Z or M values"); + } + if (shell.get_count() < 4) { + throw InvalidInputException("ST_MakePolygon shell requires at least 4 vertices"); + } + if (!sgl::linestring::is_closed(&shell)) { + throw InvalidInputException( + "ST_MakePolygon shell must be closed (first and last vertex must be equal)"); + } + + // Make a polygon! + auto polygon = sgl::polygon::make_empty(false, false); + + // Append the shell + polygon.append_part(&shell); + + // Now setup the rings + const auto holes_offset = hole_list.offset; + const auto holes_length = hole_list.length; + + for (idx_t hole_idx = 0; hole_idx < holes_length; hole_idx++) { + const auto mapped_idx = child_format.sel->get_index(holes_offset + hole_idx); + if (!child_format.validity.RowIsValid(mapped_idx)) { + continue; + } + + const auto &hole_blob = UnifiedVectorFormat::GetData(child_format)[mapped_idx]; + + // Allocate a new hole and deserialize into the memory + auto hole_mem = lstate.GetArena().AllocateAligned(sizeof(sgl::geometry)); + const auto hole = new (hole_mem) sgl::geometry(); + + // TODO: Make this nicer... Add a deserialize in place method to the context + *hole = lstate.Deserialize(hole_blob); + + if (hole->get_type() != sgl::geometry_type::LINESTRING) { + throw InvalidInputException("ST_MakePolygon hole #%lu is not a LINESTRING geometry", + hole_idx + 1); + } + if (hole->has_z() || hole->has_m()) { + throw InvalidInputException("ST_MakePolygon hole #%lu has Z or M values", hole_idx + 1); + } + if (hole->get_count() < 4) { + throw InvalidInputException("ST_MakePolygon hole #%lu requires at least 4 vertices", + hole_idx + 1); + } + if (!sgl::linestring::is_closed(hole)) { + throw InvalidInputException( + "ST_MakePolygon hole #%lu must be closed (first and last vertex must be equal)", + hole_idx + 1); + } + + // Add the hole to the polygon + polygon.append_part(hole); + } + + // Now serialize the polygon + return lstate.Serialize(result, polygon); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_MakePolygon", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("shell", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteFromShell); + + // TODO: Set example & docs + variant.SetDescription("Create a POLYGON from a LINESTRING shell"); + variant.SetExample("SELECT ST_MakePolygon(ST_LineString([ST_Point(0, 0), ST_Point(1, 0), ST_Point(1, " + "1), ST_Point(0, 0)]));"); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("shell", GeoTypes::GEOMETRY()); + variant.AddParameter("holes", LogicalType::LIST(GeoTypes::GEOMETRY())); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteFromRings); + + // TODO: Set example & docs + variant.SetDescription("Create a POLYGON from a LINESTRING shell and a list of LINESTRING holes"); + variant.SetExample("SELECT ST_MakePolygon(ST_LineString([ST_Point(0, 0), ST_Point(1, 0), ST_Point(1, " + "1), ST_Point(0, 0)]), [ST_LineString([ST_Point(0.25, 0.25), ST_Point(0.75, 0.25), " + "ST_Point(0.75, 0.75), ST_Point(0.25, 0.25)])]);"); + }); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +//====================================================================================================================== +// ST_Multi +//====================================================================================================================== + +struct ST_Multi { + + //------------------------------------------------------------------------------------------------------------------ + // Execute + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &blob) { + auto geom = lstate.Deserialize(blob); + const auto has_z = geom.has_z(); + const auto has_m = geom.has_m(); + + switch (geom.get_type()) { + case sgl::geometry_type::POINT: { + auto mpoint = sgl::multi_point::make_empty(has_z, has_m); + mpoint.append_part(&geom); + return lstate.Serialize(result, mpoint); + } + case sgl::geometry_type::LINESTRING: { + auto mline = sgl::multi_linestring::make_empty(has_z, has_m); + mline.append_part(&geom); + return lstate.Serialize(result, mline); + } + case sgl::geometry_type::POLYGON: { + auto mpoly = sgl::multi_polygon::make_empty(has_z, has_m); + mpoly.append_part(&geom); + return lstate.Serialize(result, mpoly); + } + default: + // Just return the original geometry + return blob; + } + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Turns a single geometry into a multi geometry. + + If the geometry is already a multi geometry, it is returned as is. + )"; + + static constexpr auto EXAMPLE = R"( + SELECT ST_Multi(ST_GeomFromText('POINT(1 2)')); + ---- + MULTIPOINT (1 2) + + SELECT ST_Multi(ST_GeomFromText('LINESTRING(1 1, 2 2)')); + ---- + MULTILINESTRING ((1 1, 2 2)) + + SELECT ST_Multi(ST_GeomFromText('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')); + ---- + MULTIPOLYGON (((0 0, 0 1, 1 1, 1 0, 0 0))) + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Multi", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +//====================================================================================================================== +// ST_NGeometries / ST_NumGeometries +//====================================================================================================================== + +struct ST_NGeometries { + + //------------------------------------------------------------------------------------------------------------------ + // Execute + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &blob) { + const auto geom = lstate.Deserialize(blob); + switch (geom.get_type()) { + case sgl::geometry_type::POINT: + case sgl::geometry_type::LINESTRING: + case sgl::geometry_type::POLYGON: + return geom.is_empty() ? 0 : 1; + case sgl::geometry_type::MULTI_POINT: + case sgl::geometry_type::MULTI_LINESTRING: + case sgl::geometry_type::MULTI_POLYGON: + case sgl::geometry_type::MULTI_GEOMETRY: + return static_cast(geom.get_count()); + default: + D_ASSERT(false); + return 0; + } + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Returns the number of component geometries in a collection geometry. + If the input geometry is not a collection, this function returns 0 or 1 depending on if the geometry is empty or not. + )"; + + // TODO: add example + static constexpr auto EXAMPLE = R"( + + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + // TODO: Maybe make a macro for the aliases + for (auto &alias : {"ST_NumGeometries", "ST_NGeometries"}) { + FunctionBuilder::RegisterScalar(db, alias, [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::INTEGER); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } + } +}; + +//====================================================================================================================== +// ST_NumInteriorRings / ST_NInteriorRings +//====================================================================================================================== + +struct ST_NInteriorRings { + + //------------------------------------------------------------------------------------------------------------------ + // Execute (GEOMETRY) + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::ExecuteWithNulls( + args.data[0], result, args.size(), [&](const string_t &blob, ValidityMask &validity, idx_t idx) { + const auto geom = lstate.Deserialize(blob); + + if (geom.get_type() != sgl::geometry_type::POLYGON) { + validity.SetInvalid(idx); + return 0; + } + + const auto n_rings = static_cast(geom.get_count()); + return n_rings == 0 ? 0 : n_rings - 1; + }); + } + + //------------------------------------------------------------------------------ + // Execute (POLYGON_2D) + //------------------------------------------------------------------------------ + static void ExecutePolygon(DataChunk &args, ExpressionState &state, Vector &result) { + UnaryExecutor::Execute( + args.data[0], result, args.size(), [&](const list_entry_t &polygon) { + const auto rings = polygon.length; + return rings == 0 ? rings : static_cast(polygon.length) - 1; // -1 for the exterior ring + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Returns the number if interior rings of a polygon + )"; + + // TODO: add example + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + // TODO: maybe make a macro for the aliases + for (auto &alias : {"ST_NumInteriorRings", "ST_NInteriorRings"}) { + FunctionBuilder::RegisterScalar(db, alias, [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::INTEGER); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("polygon", GeoTypes::POLYGON_2D()); + variant.SetReturnType(LogicalType::INTEGER); + + variant.SetFunction(ExecutePolygon); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } + } +}; + +//====================================================================================================================== +// ST_NPoints +//====================================================================================================================== + +struct ST_NPoints { + + //------------------------------------------------------------------------------------------------------------------ + // Execute (POINT_2D) + //------------------------------------------------------------------------------------------------------------------ + static void ExecutePoint(DataChunk &args, ExpressionState &state, Vector &result) { + using POINT_TYPE = StructTypeBinary; + using COUNT_TYPE = PrimitiveType; + + GenericExecutor::ExecuteUnary(args.data[0], result, args.size(), + [](POINT_TYPE) { return 1; }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute (LINESTRING_2D) + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteLineString(DataChunk &args, ExpressionState &state, Vector &result) { + auto input = args.data[0]; + UnaryExecutor::Execute(input, result, args.size(), + [](list_entry_t input) { return input.length; }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute (POLYGON_2D) + //------------------------------------------------------------------------------------------------------------------ + static void ExecutePolygon(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 1); + + auto &input = args.data[0]; + auto count = args.size(); + auto &ring_vec = ListVector::GetEntry(input); + auto ring_entries = ListVector::GetData(ring_vec); + + UnaryExecutor::Execute(input, result, count, [&](list_entry_t polygon) { + auto polygon_offset = polygon.offset; + auto polygon_length = polygon.length; + idx_t npoints = 0; + for (idx_t ring_idx = polygon_offset; ring_idx < polygon_offset + polygon_length; ring_idx++) { + auto ring = ring_entries[ring_idx]; + npoints += ring.length; + } + return npoints; + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute (BOX_2D) + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteBox(DataChunk &args, ExpressionState &state, Vector &result) { + + using BOX_TYPE = StructTypeQuaternary; + using COUNT_TYPE = PrimitiveType; + + GenericExecutor::ExecuteUnary(args.data[0], result, args.size(), + [](BOX_TYPE) { return 4; }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute (GEOMETRY) + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteGeometry(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &blob) { + const auto geom = lstate.Deserialize(blob); + return sgl::ops::vertex_count(&geom); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Returns the number of vertices within a geometry + )"; + + // TODO: add example + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + + for (const auto &alias : {"ST_NumPoints", "ST_NPoints"}) { + FunctionBuilder::RegisterScalar(db, alias, [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::UINTEGER); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteGeometry); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("point", GeoTypes::POINT_2D()); + variant.SetReturnType(LogicalType::UBIGINT); + variant.SetFunction(ExecutePoint); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("linestring", GeoTypes::LINESTRING_2D()); + variant.SetReturnType(LogicalType::UBIGINT); + variant.SetFunction(ExecuteLineString); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("polygon", GeoTypes::POLYGON_2D()); + variant.SetReturnType(LogicalType::UBIGINT); + variant.SetFunction(ExecutePolygon); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("box", GeoTypes::BOX_2D()); + variant.SetReturnType(LogicalType::UBIGINT); + variant.SetFunction(ExecuteBox); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } + } +}; + +//====================================================================================================================== +// ST_Perimeter +//====================================================================================================================== + +struct ST_Perimeter { + + //------------------------------------------------------------------------------ + // Execute (POLYGON_2D) + //------------------------------------------------------------------------------ + static void ExecutePolygon(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 1); + + auto &input = args.data[0]; + auto count = args.size(); + + auto &ring_vec = ListVector::GetEntry(input); + auto ring_entries = ListVector::GetData(ring_vec); + auto &coord_vec = ListVector::GetEntry(ring_vec); + auto &coord_vec_children = StructVector::GetEntries(coord_vec); + auto x_data = FlatVector::GetData(*coord_vec_children[0]); + auto y_data = FlatVector::GetData(*coord_vec_children[1]); + + UnaryExecutor::Execute(input, result, count, [&](list_entry_t polygon) { + auto polygon_offset = polygon.offset; + auto polygon_length = polygon.length; + double perimeter = 0; + for (idx_t ring_idx = polygon_offset; ring_idx < polygon_offset + polygon_length; ring_idx++) { + auto ring = ring_entries[ring_idx]; + auto ring_offset = ring.offset; + auto ring_length = ring.length; + + for (idx_t coord_idx = ring_offset; coord_idx < ring_offset + ring_length - 1; coord_idx++) { + auto x1 = x_data[coord_idx]; + auto y1 = y_data[coord_idx]; + auto x2 = x_data[coord_idx + 1]; + auto y2 = y_data[coord_idx + 1]; + perimeter += std::sqrt(std::pow(x1 - x2, 2) + std::pow(y1 - y2, 2)); + } + } + return perimeter; + }); + + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------ + // Execute (BOX_2D) + //------------------------------------------------------------------------------ + static void ExecuteBox(DataChunk &args, ExpressionState &state, Vector &result) { + using BOX_TYPE = StructTypeQuaternary; + using PERIMETER_TYPE = PrimitiveType; + + GenericExecutor::ExecuteUnary(args.data[0], result, args.size(), [&](BOX_TYPE &box) { + auto minx = box.a_val; + auto miny = box.b_val; + auto maxx = box.c_val; + auto maxy = box.d_val; + return 2 * (maxx - minx + maxy - miny); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute (GEOMETRY) + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &blob) { + const auto geom = lstate.Deserialize(blob); + return sgl::ops::perimeter(&geom); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Returns the length of the perimeter of the geometry + )"; + + // TODO: Add example + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Perimeter", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::DOUBLE); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("polygon", GeoTypes::POLYGON_2D()); + variant.SetReturnType(LogicalType::DOUBLE); + variant.SetFunction(ExecutePolygon); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("box", GeoTypes::BOX_2D()); + variant.SetReturnType(LogicalType::DOUBLE); + variant.SetFunction(ExecuteBox); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } +}; + +//====================================================================================================================== +// ST_Point +//====================================================================================================================== + +struct ST_Point { + + //------------------------------------------------------------------------------ + // POINT_2D + //------------------------------------------------------------------------------ + static void ExecutePoint2D(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 2); + auto count = args.size(); + + auto &x = args.data[0]; + auto &y = args.data[1]; + + x.Flatten(count); + y.Flatten(count); + + auto &children = StructVector::GetEntries(result); + auto &x_child = children[0]; + auto &y_child = children[1]; + + x_child->Reference(x); + y_child->Reference(y); + + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------ + // POINT_3D + //------------------------------------------------------------------------------ + static void ExecutePoint3D(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 3); + auto count = args.size(); + + auto &x = args.data[0]; + auto &y = args.data[1]; + auto &z = args.data[2]; + + x.Flatten(count); + y.Flatten(count); + z.Flatten(count); + + auto &children = StructVector::GetEntries(result); + auto &x_child = children[0]; + auto &y_child = children[1]; + auto &z_child = children[2]; + + x_child->Reference(x); + y_child->Reference(y); + z_child->Reference(z); + + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------ + // POINT_4D + //------------------------------------------------------------------------------ + static void ExecutePoint4D(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 4); + auto count = args.size(); + + auto &x = args.data[0]; + auto &y = args.data[1]; + auto &z = args.data[2]; + auto &m = args.data[3]; + + x.Flatten(count); + y.Flatten(count); + z.Flatten(count); + m.Flatten(count); + + auto &children = StructVector::GetEntries(result); + auto &x_child = children[0]; + auto &y_child = children[1]; + auto &z_child = children[2]; + auto &m_child = children[3]; + + x_child->Reference(x); + y_child->Reference(y); + z_child->Reference(z); + m_child->Reference(m); + + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute (GEOMETRY) + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteGeometry(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + BinaryExecutor::Execute( + args.data[0], args.data[1], result, args.size(), [&](const double x, const double y) { + const double buffer[2] = {x, y}; + + sgl::geometry geometry; + geometry.set_type(sgl::geometry_type::POINT); + geometry.set_vertex_data(reinterpret_cast(buffer), 1); + + return lstate.Serialize(result, geometry); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Creates a GEOMETRY point + )"; + + // TODO: example + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Point", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("x", LogicalType::DOUBLE); + variant.AddParameter("y", LogicalType::DOUBLE); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetFunction(ExecuteGeometry); + variant.SetInit(LocalState::Init); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + + FunctionBuilder::RegisterScalar(db, "ST_Point2D", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("x", LogicalType::DOUBLE); + variant.AddParameter("y", LogicalType::DOUBLE); + variant.SetReturnType(GeoTypes::POINT_2D()); + variant.SetFunction(ExecutePoint2D); + + variant.SetDescription("Creates a POINT_2D"); + }); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + + FunctionBuilder::RegisterScalar(db, "ST_Point3D", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("x", LogicalType::DOUBLE); + variant.AddParameter("y", LogicalType::DOUBLE); + variant.AddParameter("z", LogicalType::DOUBLE); + variant.SetReturnType(GeoTypes::POINT_3D()); + variant.SetFunction(ExecutePoint3D); + + variant.SetDescription("Creates a POINT_3D"); + }); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + + FunctionBuilder::RegisterScalar(db, "ST_Point4D", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("x", LogicalType::DOUBLE); + variant.AddParameter("y", LogicalType::DOUBLE); + variant.AddParameter("z", LogicalType::DOUBLE); + variant.AddParameter("m", LogicalType::DOUBLE); + variant.SetReturnType(GeoTypes::POINT_4D()); + variant.SetFunction(ExecutePoint4D); + + variant.SetDescription("Creates a POINT_4D"); + }); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +//====================================================================================================================== +// ST_PointN +//====================================================================================================================== + +struct ST_PointN { + + //------------------------------------------------------------------------------------------------------------------ + // Execute (GEOMETRY) + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + BinaryExecutor::ExecuteWithNulls( + args.data[0], args.data[1], result, args.size(), + [&](const string_t &blob, const int32_t index, ValidityMask &mask, const idx_t row_idx) { + // TODO: peek type without deserializing + const auto geom = lstate.Deserialize(blob); + + if (geom.get_type() != sgl::geometry_type::LINESTRING) { + mask.SetInvalid(row_idx); + return string_t {}; + } + + const auto point_count = geom.get_count(); + + const auto is_empty = point_count == 0; + const auto is_under = index == 0 || index < -static_cast(point_count); + const auto is_above = index > static_cast(point_count); + + if (is_empty || is_under || is_above) { + mask.SetInvalid(row_idx); + return string_t {}; + } + + const auto vertex_elem = index < 0 ? point_count + index : index - 1; + const auto vertex_size = geom.get_vertex_size(); + const auto vertex_data = geom.get_vertex_data(); + + // Reference the existing vertex data + sgl::geometry point(sgl::geometry_type::POINT, geom.has_z(), geom.has_m()); + point.set_vertex_data(vertex_data + vertex_elem * vertex_size, 1); + + return lstate.Serialize(result, point); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute (LINESTRING_2D) + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteLineString(DataChunk &args, ExpressionState &state, Vector &result) { + + auto geom_vec = args.data[0]; + auto index_vec = args.data[1]; + auto count = args.size(); + UnifiedVectorFormat geom_format; + geom_vec.ToUnifiedFormat(count, geom_format); + UnifiedVectorFormat index_format; + index_vec.ToUnifiedFormat(count, index_format); + + auto line_vertex_entries = ListVector::GetData(geom_vec); + auto &line_vertex_vec = ListVector::GetEntry(geom_vec); + auto &line_vertex_vec_children = StructVector::GetEntries(line_vertex_vec); + auto line_x_data = FlatVector::GetData(*line_vertex_vec_children[0]); + auto line_y_data = FlatVector::GetData(*line_vertex_vec_children[1]); + + auto &point_vertex_children = StructVector::GetEntries(result); + auto point_x_data = FlatVector::GetData(*point_vertex_children[0]); + auto point_y_data = FlatVector::GetData(*point_vertex_children[1]); + + auto index_data = FlatVector::GetData(index_vec); + + for (idx_t out_row_idx = 0; out_row_idx < count; out_row_idx++) { + + auto in_row_idx = geom_format.sel->get_index(out_row_idx); + auto in_idx_idx = index_format.sel->get_index(out_row_idx); + if (geom_format.validity.RowIsValid(in_row_idx) && index_format.validity.RowIsValid(in_idx_idx)) { + auto line = line_vertex_entries[in_row_idx]; + auto line_offset = line.offset; + auto line_length = line.length; + auto index = index_data[in_idx_idx]; + + if (line_length == 0 || index == 0 || index < -static_cast(line_length) || + index > static_cast(line_length)) { + FlatVector::SetNull(result, out_row_idx, true); + continue; + } + auto actual_index = index < 0 ? line_length + index : index - 1; + point_x_data[out_row_idx] = line_x_data[line_offset + actual_index]; + point_y_data[out_row_idx] = line_y_data[line_offset + actual_index]; + } else { + FlatVector::SetNull(result, out_row_idx, true); + } + } + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Returns the n'th vertex from the input geometry as a point geometry + )"; + + // TODO: add example + static constexpr auto EXAMPLe = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_PointN", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("index", LogicalType::INTEGER); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("linestring", GeoTypes::LINESTRING_2D()); + variant.AddParameter("index", LogicalType::INTEGER); + variant.SetReturnType(GeoTypes::POINT_2D()); + variant.SetFunction(ExecuteLineString); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLe); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +//====================================================================================================================== +// ST_Points +//====================================================================================================================== + +struct ST_Points { + + //------------------------------------------------------------------------------------------------------------------ + // Execute (GEOMETRY) + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &blob) { + // Deserialize the geometry + const auto geom = lstate.Deserialize(blob); + const auto has_z = geom.has_z(); + const auto has_m = geom.has_m(); + + // Create a new result multipoint + auto mpoint = sgl::multi_point::make_empty(has_z, has_m); + + sgl::ops::visit_vertices(&geom, [&](const uint8_t *vertex_data) { + // Allocate a new point + auto point_mem = lstate.GetArena().AllocateAligned(sizeof(sgl::geometry)); + + // Create a new point + const auto point = new (point_mem) sgl::geometry(sgl::geometry_type::POINT, has_z, has_m); + point->set_vertex_data(vertex_data, 1); + + // Append the point to the multipoint + mpoint.append_part(point); + }); + + // Serialize the multipoint + return lstate.Serialize(result, mpoint); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Collects all the vertices in the geometry into a MULTIPOINT + )"; + + static constexpr auto EXAMPLE = R"( + select st_points('LINESTRING(1 1, 2 2)'::geometry); + ---- + MULTIPOINT (1 1, 2 2) + + select st_points('MULTIPOLYGON Z EMPTY'::geometry); + ---- + MULTIPOINT Z EMPTY + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Points", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +//====================================================================================================================== +// ST_QuadKey +//====================================================================================================================== + +struct ST_QuadKey { + + //------------------------------------------------------------------------------------------------------------------ + // Helpers + //------------------------------------------------------------------------------------------------------------------ + static void GetQuadKey(double lon, double lat, int32_t level, char *buffer) { + + lat = std::max(-85.05112878, std::min(85.05112878, lat)); + lon = std::max(-180.0, std::min(180.0, lon)); + + const auto lat_rad = lat * PI / 180.0; + const auto x = static_cast((lon + 180.0) / 360.0 * (1 << level)); + const auto y = static_cast((1.0 - std::log(std::tan(lat_rad) + 1.0 / std::cos(lat_rad)) / PI) / 2.0 * + (1 << level)); + + for (int i = level; i > 0; --i) { + char digit = '0'; + const int32_t mask = 1 << (i - 1); + if ((x & mask) != 0) { + digit += 1; + } + if ((y & mask) != 0) { + digit += 2; + } + buffer[level - i] = digit; + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute (GEOMETRY) + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteGeometry(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + auto &point_in = args.data[0]; + auto &level_in = args.data[1]; + + BinaryExecutor::Execute( + point_in, level_in, result, args.size(), [&](const string_t &blob, const int32_t level) { + if (level < 1 || level > 23) { + throw InvalidInputException("ST_QuadKey: Level must be between 1 and 23"); + } + + const auto point = lstate.Deserialize(blob); + if (point.get_type() != sgl::geometry_type::POINT) { + throw InvalidInputException("ST_QuadKey: Only POINT geometries are supported"); + } + + if (point.is_empty()) { + throw InvalidInputException("ST_QuadKey: Empty geometries are not supported"); + } + + const auto vertex = point.get_vertex_xy(0); + + char buffer[64]; + GetQuadKey(vertex.x, vertex.y, level, buffer); + return StringVector::AddString(result, buffer, level); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute (LON/LAT) + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteLonLat(DataChunk &args, ExpressionState &state, Vector &result) { + + auto &lon_in = args.data[0]; + auto &lat_in = args.data[1]; + auto &lev_in = args.data[2]; + + TernaryExecutor::Execute( + lon_in, lat_in, lev_in, result, args.size(), [&](const double lon, const double lat, const int32_t level) { + if (level < 1 || level > 23) { + throw InvalidInputException("ST_QuadKey: Level must be between 1 and 23"); + } + char buffer[64]; + GetQuadKey(lon, lat, level, buffer); + return StringVector::AddString(result, buffer, level); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Compute the [quadkey](https://learn.microsoft.com/en-us/bingmaps/articles/bing-maps-tile-system) for a given lon/lat point at a given level. + Note that the parameter order is __longitude__, __latitude__. + + `level` has to be between 1 and 23, inclusive. + + The input coordinates will be clamped to the lon/lat bounds of the earth (longitude between -180 and 180, latitude between -85.05112878 and 85.05112878). + + The geometry overload throws an error if the input geometry is not a `POINT` + )"; + + static constexpr auto EXAMPLE = R"( + SELECT ST_QuadKey(st_point(11.08, 49.45), 10); + ---- + 1333203202 + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_QuadKey", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("longitude", LogicalType::DOUBLE); + variant.AddParameter("latitude", LogicalType::DOUBLE); + variant.AddParameter("level", LogicalType::INTEGER); + variant.SetReturnType(LogicalType::VARCHAR); + variant.SetFunction(ExecuteLonLat); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("point", GeoTypes::GEOMETRY()); + variant.AddParameter("level", LogicalType::INTEGER); + variant.SetReturnType(LogicalType::VARCHAR); + variant.SetFunction(ExecuteGeometry); + variant.SetInit(LocalState::Init); + }); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + }); + } +}; + +//====================================================================================================================== +// ST_RemoveRepeatedPoints +//====================================================================================================================== + +struct ST_RemoveRepeatedPoints { + + //------------------------------------------------------------------------------------------------------------------ + // LINESTRING_2D + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteLineString(DataChunk &args, ExpressionState &state, Vector &result) { + auto input = args.data[0]; + auto count = args.size(); + UnifiedVectorFormat format; + input.ToUnifiedFormat(count, format); + + auto in_line_entries = ListVector::GetData(input); + auto &in_line_vertex_vec = StructVector::GetEntries(ListVector::GetEntry(input)); + auto in_x_data = FlatVector::GetData(*in_line_vertex_vec[0]); + auto in_y_data = FlatVector::GetData(*in_line_vertex_vec[1]); + + auto out_line_entries = ListVector::GetData(result); + auto &out_line_vertex_vec = StructVector::GetEntries(ListVector::GetEntry(result)); + + idx_t out_offset = 0; + for (idx_t out_row_idx = 0; out_row_idx < count; out_row_idx++) { + + auto in_row_idx = format.sel->get_index(out_row_idx); + if (!format.validity.RowIsValid(in_row_idx)) { + FlatVector::SetNull(result, out_row_idx, true); + continue; + } + auto in = in_line_entries[in_row_idx]; + auto in_offset = in.offset; + auto in_length = in.length; + + // Special case: if the line has less than 3 points, we can't remove any points + if (in_length < 3) { + + ListVector::Reserve(result, out_offset + in_length); + auto out_x_data = FlatVector::GetData(*out_line_vertex_vec[0]); + auto out_y_data = FlatVector::GetData(*out_line_vertex_vec[1]); + + // If the line has less than 3 points, we can't remove any points + // so we just copy the line + out_line_entries[out_row_idx] = list_entry_t {out_offset, in_length}; + for (idx_t coord_idx = 0; coord_idx < in_length; coord_idx++) { + out_x_data[out_offset + coord_idx] = in_x_data[in_offset + coord_idx]; + out_y_data[out_offset + coord_idx] = in_y_data[in_offset + coord_idx]; + } + out_offset += in_length; + continue; + } + + // First pass, calculate how many points we need to keep + // We always keep the first and last point, so we start at 2 + uint32_t points_to_keep = 0; + + auto last_x = in_x_data[in_offset]; + auto last_y = in_y_data[in_offset]; + points_to_keep++; + + for (idx_t i = 1; i < in_length; i++) { + auto curr_x = in_x_data[in_offset + i]; + auto curr_y = in_y_data[in_offset + i]; + + if (curr_x != last_x || curr_y != last_y) { + points_to_keep++; + last_x = curr_x; + last_y = curr_y; + } + } + + // Special case: there is only 1 unique point in the line, so just keep + // the start and end points + if (points_to_keep == 1) { + out_line_entries[out_row_idx] = list_entry_t {out_offset, 2}; + ListVector::Reserve(result, out_offset + 2); + auto out_x_data = FlatVector::GetData(*out_line_vertex_vec[0]); + auto out_y_data = FlatVector::GetData(*out_line_vertex_vec[1]); + out_x_data[out_offset] = in_x_data[in_offset]; + out_y_data[out_offset] = in_y_data[in_offset]; + out_x_data[out_offset + 1] = in_x_data[in_offset + in_length - 1]; + out_y_data[out_offset + 1] = in_y_data[in_offset + in_length - 1]; + out_offset += 2; + continue; + } + + // Set the list entry + out_line_entries[out_row_idx] = list_entry_t {out_offset, points_to_keep}; + + // Second pass, copy the points we need to keep + ListVector::Reserve(result, out_offset + points_to_keep); + auto out_x_data = FlatVector::GetData(*out_line_vertex_vec[0]); + auto out_y_data = FlatVector::GetData(*out_line_vertex_vec[1]); + + // Copy the first point + out_x_data[out_offset] = in_x_data[in_offset]; + out_y_data[out_offset] = in_y_data[in_offset]; + out_offset++; + + // Copy the middle points (skip the last one, we'll copy it at the end) + last_x = in_x_data[in_offset]; + last_y = in_y_data[in_offset]; + + for (idx_t i = 1; i < in_length; i++) { + auto curr_x = in_x_data[in_offset + i]; + auto curr_y = in_y_data[in_offset + i]; + + if (curr_x != last_x || curr_y != last_y) { + out_x_data[out_offset] = curr_x; + out_y_data[out_offset] = curr_y; + last_x = curr_x; + last_y = curr_y; + out_offset++; + } + } + } + ListVector::SetListSize(result, out_offset); + + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // LINESTRING_2D (With Tolerance) + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteLineStringWithTolerance(DataChunk &args, ExpressionState &state, Vector &result) { + auto input = args.data[0]; + auto tolerance = args.data[1]; + auto count = args.size(); + UnifiedVectorFormat format; + input.ToUnifiedFormat(count, format); + + UnifiedVectorFormat tolerance_format; + tolerance.ToUnifiedFormat(count, tolerance_format); + + auto in_line_entries = ListVector::GetData(input); + auto &in_line_vertex_vec = StructVector::GetEntries(ListVector::GetEntry(input)); + auto in_x_data = FlatVector::GetData(*in_line_vertex_vec[0]); + auto in_y_data = FlatVector::GetData(*in_line_vertex_vec[1]); + + auto out_line_entries = ListVector::GetData(result); + auto &out_line_vertex_vec = StructVector::GetEntries(ListVector::GetEntry(result)); + + idx_t out_offset = 0; + + for (idx_t out_row_idx = 0; out_row_idx < count; out_row_idx++) { + auto in_row_idx = format.sel->get_index(out_row_idx); + auto in_tol_idx = tolerance_format.sel->get_index(out_row_idx); + if (!format.validity.RowIsValid(in_row_idx) || !tolerance_format.validity.RowIsValid(in_tol_idx)) { + FlatVector::SetNull(result, out_row_idx, true); + continue; + } + + auto in = in_line_entries[in_row_idx]; + auto in_offset = in.offset; + auto in_length = in.length; + + auto tolerance = Load(tolerance_format.data + in_tol_idx); + auto tolerance_squared = tolerance * tolerance; + + if (in_length < 3) { + + ListVector::Reserve(result, out_offset + in_length); + auto out_x_data = FlatVector::GetData(*out_line_vertex_vec[0]); + auto out_y_data = FlatVector::GetData(*out_line_vertex_vec[1]); + + // If the line has less than 3 points, we can't remove any points + // so we just copy the line + out_line_entries[out_row_idx] = list_entry_t {out_offset, in_length}; + for (idx_t coord_idx = 0; coord_idx < in_length; coord_idx++) { + out_x_data[out_offset + coord_idx] = in_x_data[in_offset + coord_idx]; + out_y_data[out_offset + coord_idx] = in_y_data[in_offset + coord_idx]; + } + out_offset += in_length; + continue; + } + + // First pass, calculate how many points we need to keep + uint32_t points_to_keep = 0; + + auto last_x = in_x_data[in_offset]; + auto last_y = in_y_data[in_offset]; + points_to_keep++; + + for (idx_t i = 1; i < in_length; i++) { + auto curr_x = in_x_data[in_offset + i]; + auto curr_y = in_y_data[in_offset + i]; + + auto dist_squared = (curr_x - last_x) * (curr_x - last_x) + (curr_y - last_y) * (curr_y - last_y); + + if (dist_squared > tolerance_squared) { + last_x = curr_x; + last_y = curr_y; + points_to_keep++; + } + } + + // Special case: there is only 1 unique point in the line, so just keep + // the start and end points + if (points_to_keep == 1) { + out_line_entries[out_row_idx] = list_entry_t {out_offset, 2}; + ListVector::Reserve(result, out_offset + 2); + auto out_x_data = FlatVector::GetData(*out_line_vertex_vec[0]); + auto out_y_data = FlatVector::GetData(*out_line_vertex_vec[1]); + out_x_data[out_offset] = in_x_data[in_offset]; + out_y_data[out_offset] = in_y_data[in_offset]; + out_x_data[out_offset + 1] = in_x_data[in_offset + in_length - 1]; + out_y_data[out_offset + 1] = in_y_data[in_offset + in_length - 1]; + out_offset += 2; + continue; + } + + // Set the list entry + out_line_entries[out_row_idx] = list_entry_t {out_offset, points_to_keep}; + + // Second pass, copy the points we need to keep + ListVector::Reserve(result, out_offset + points_to_keep); + auto out_x_data = FlatVector::GetData(*out_line_vertex_vec[0]); + auto out_y_data = FlatVector::GetData(*out_line_vertex_vec[1]); + + // Copy the first point + out_x_data[out_offset] = in_x_data[in_offset]; + out_y_data[out_offset] = in_y_data[in_offset]; + out_offset++; + + // With tolerance its different, we always keep the first and last point + // regardless of distance to the previous point + // Copy the middle points + last_x = in_x_data[in_offset]; + last_y = in_y_data[in_offset]; + + for (idx_t i = 1; i < in_length - 1; i++) { + + auto curr_x = in_x_data[in_offset + i]; + auto curr_y = in_y_data[in_offset + i]; + + auto dist_squared = (curr_x - last_x) * (curr_x - last_x) + (curr_y - last_y) * (curr_y - last_y); + if (dist_squared > tolerance_squared) { + out_x_data[out_offset] = curr_x; + out_y_data[out_offset] = curr_y; + last_x = curr_x; + last_y = curr_y; + out_offset++; + } + } + + // Copy the last point + out_x_data[points_to_keep - 1] = in_x_data[in_offset + in_length - 1]; + out_y_data[points_to_keep - 1] = in_y_data[in_offset + in_length - 1]; + out_offset++; + } + ListVector::SetListSize(result, out_offset); + + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Remove repeated points from a LINESTRING. + )"; + + // TODO: example + static constexpr auto EXAMPLE = R"()"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_RemoveRepeatedPoints", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("line", GeoTypes::LINESTRING_2D()); + variant.SetReturnType(GeoTypes::LINESTRING_2D()); + + variant.SetFunction(ExecuteLineString); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("line", GeoTypes::LINESTRING_2D()); + variant.AddParameter("tolerance", LogicalType::DOUBLE); + variant.SetReturnType(GeoTypes::LINESTRING_2D()); + + variant.SetFunction(ExecuteLineStringWithTolerance); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "construction"); + }); + } +}; + +//====================================================================================================================== +// ST_StartPoint +//====================================================================================================================== + +struct ST_StartPoint { + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteGeometry(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + UnaryExecutor::ExecuteWithNulls( + args.data[0], result, args.size(), [&](const string_t &blob, ValidityMask &mask, const idx_t idx) { + // TODO: Peek without deserializing! + const auto geom = lstate.Deserialize(blob); + + if (geom.get_type() != sgl::geometry_type::LINESTRING) { + mask.SetInvalid(idx); + return string_t {}; + } + + if (geom.is_empty()) { + mask.SetInvalid(idx); + return string_t {}; + } + + const auto vertex_data = geom.get_vertex_data(); + auto point = sgl::geometry(sgl::geometry_type::POINT, geom.has_z(), geom.has_m()); + point.set_vertex_data(vertex_data, 1); + + return lstate.Serialize(result, point); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // LINESTRING_2D + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteLineString(DataChunk &args, ExpressionState &state, Vector &result) { + auto geom_vec = args.data[0]; + auto count = args.size(); + + UnifiedVectorFormat geom_format; + geom_vec.ToUnifiedFormat(count, geom_format); + + auto line_vertex_entries = ListVector::GetData(geom_vec); + auto &line_vertex_vec = ListVector::GetEntry(geom_vec); + auto &line_vertex_vec_children = StructVector::GetEntries(line_vertex_vec); + auto line_x_data = FlatVector::GetData(*line_vertex_vec_children[0]); + auto line_y_data = FlatVector::GetData(*line_vertex_vec_children[1]); + + auto &point_vertex_children = StructVector::GetEntries(result); + auto point_x_data = FlatVector::GetData(*point_vertex_children[0]); + auto point_y_data = FlatVector::GetData(*point_vertex_children[1]); + + for (idx_t out_row_idx = 0; out_row_idx < count; out_row_idx++) { + auto in_row_idx = geom_format.sel->get_index(out_row_idx); + + if (!geom_format.validity.RowIsValid(in_row_idx)) { + FlatVector::SetNull(result, out_row_idx, true); + continue; + } + + auto line = line_vertex_entries[in_row_idx]; + auto line_offset = line.offset; + auto line_length = line.length; + + if (line_length == 0) { + FlatVector::SetNull(result, out_row_idx, true); + continue; + } + + point_x_data[out_row_idx] = line_x_data[line_offset]; + point_y_data[out_row_idx] = line_y_data[line_offset]; + } + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Returns the start point of a LINESTRING. + )"; + + // todo: add example + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_StartPoint", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteGeometry); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("line", GeoTypes::LINESTRING_2D()); + variant.SetReturnType(GeoTypes::POINT_2D()); + + variant.SetFunction(ExecuteLineString); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } +}; + +//====================================================================================================================== +// ST_EndPoint +//====================================================================================================================== + +struct ST_EndPoint { + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteGeometry(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + UnaryExecutor::ExecuteWithNulls( + args.data[0], result, args.size(), [&](const string_t &blob, ValidityMask &mask, const idx_t idx) { + // TODO: Peek without deserializing! + const auto geom = lstate.Deserialize(blob); + + if (geom.get_type() != sgl::geometry_type::LINESTRING) { + mask.SetInvalid(idx); + return string_t {}; + } + + if (geom.is_empty()) { + mask.SetInvalid(idx); + return string_t {}; + } + + const auto vertex_count = geom.get_count(); + const auto vertex_size = geom.get_vertex_size(); + const auto vertex_data = geom.get_vertex_data(); + + const auto point_data = vertex_data + ((vertex_count - 1) * vertex_size); + + auto point = sgl::geometry(sgl::geometry_type::POINT, geom.has_z(), geom.has_m()); + point.set_vertex_data(point_data, 1); + + return lstate.Serialize(result, point); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // LINESTRING_2D + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteLineString(DataChunk &args, ExpressionState &state, Vector &result) { + auto geom_vec = args.data[0]; + auto count = args.size(); + + UnifiedVectorFormat geom_format; + geom_vec.ToUnifiedFormat(count, geom_format); + + auto line_vertex_entries = ListVector::GetData(geom_vec); + auto &line_vertex_vec = ListVector::GetEntry(geom_vec); + auto &line_vertex_vec_children = StructVector::GetEntries(line_vertex_vec); + auto line_x_data = FlatVector::GetData(*line_vertex_vec_children[0]); + auto line_y_data = FlatVector::GetData(*line_vertex_vec_children[1]); + + auto &point_vertex_children = StructVector::GetEntries(result); + auto point_x_data = FlatVector::GetData(*point_vertex_children[0]); + auto point_y_data = FlatVector::GetData(*point_vertex_children[1]); + + for (idx_t out_row_idx = 0; out_row_idx < count; out_row_idx++) { + auto in_row_idx = geom_format.sel->get_index(out_row_idx); + + if (!geom_format.validity.RowIsValid(in_row_idx)) { + FlatVector::SetNull(result, out_row_idx, true); + continue; + } + + auto line = line_vertex_entries[in_row_idx]; + auto line_offset = line.offset; + auto line_length = line.length; + + if (line_length == 0) { + FlatVector::SetNull(result, out_row_idx, true); + continue; + } + + point_x_data[out_row_idx] = line_x_data[line_offset + line_length - 1]; + point_y_data[out_row_idx] = line_y_data[line_offset + line_length - 1]; + } + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Returns the end point of a LINESTRING. + )"; + + // TODO: add example + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_EndPoint", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteGeometry); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("line", GeoTypes::LINESTRING_2D()); + variant.SetReturnType(GeoTypes::POINT_2D()); + + variant.SetFunction(ExecuteLineString); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } +}; + +//====================================================================================================================== +// ST_Within +//====================================================================================================================== + +struct ST_Within { + + //------------------------------------------------------------------------------------------------------------------ + // POINT_2D -> POLYGON_2D + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + auto &point_in = args.data[0]; + auto &polygon_in = args.data[1]; + + // Just execute ST_Contains, but reversed + ST_Contains::Operation(point_in, polygon_in, result, args.size()); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + // TODO: add example + static constexpr auto DESCRIPTION = ""; + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + // ST_Within is the inverse of ST_Contains + FunctionBuilder::RegisterScalar(db, "ST_Within", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom1", GeoTypes::POINT_2D()); + variant.AddParameter("geom2", GeoTypes::POLYGON_2D()); + variant.SetReturnType(LogicalType::BOOLEAN); + + variant.SetFunction(Execute); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "relation"); + }); + } +}; + +enum class VertexOrdinate { X, Y, Z, M }; + +template +struct PointAccessFunctionBase { + static size_t GetOrdinateOffset(const sgl::geometry &geom) { + switch (OP::ORDINATE) { + case VertexOrdinate::X: + return 0; + case VertexOrdinate::Y: + return 1; + case VertexOrdinate::Z: + return 2; + case VertexOrdinate::M: + return geom.has_z() ? 3 : 2; + default: + return 0; + } + } + + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + UnaryExecutor::ExecuteWithNulls( + args.data[0], result, args.size(), [&](const string_t &blob, ValidityMask &mask, const idx_t idx) { + const auto geom = lstate.Deserialize(blob); + + if (geom.get_type() != sgl::geometry_type::POINT) { + throw InvalidInputException("%s only supports POINT geometries", OP::NAME); + } + + if (geom.is_empty()) { + mask.SetInvalid(idx); + return 0.0; + } + + if (OP::ORDINATE == VertexOrdinate::Z && !geom.has_z()) { + mask.SetInvalid(idx); + return 0.0; + } + + if (OP::ORDINATE == VertexOrdinate::M && !geom.has_m()) { + mask.SetInvalid(idx); + return 0.0; + } + + const auto vertex_data = geom.get_vertex_data(); + const auto offset = GetOrdinateOffset(geom); + + double res = 0.0; + memcpy(&res, vertex_data + offset * sizeof(double), sizeof(double)); + return res; + }); + } + + static void ExecutePoint(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 1); + + // Only defined for X and Y + D_ASSERT(OP::ORDINATE == VertexOrdinate::X || OP::ORDINATE == VertexOrdinate::Y); + + auto &point = args.data[0]; + auto &point_children = StructVector::GetEntries(point); + auto &n_child = point_children[OP::ORDINATE == VertexOrdinate::X ? 0 : 1]; + result.Reference(*n_child); + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, OP::NAME, [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::DOUBLE); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + + variant.SetDescription(OP::DESCRIPTION); + variant.SetExample(OP::EXAMPLE); + }); + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + + if (OP::ORDINATE == VertexOrdinate::X || OP::ORDINATE == VertexOrdinate::Y) { + FunctionBuilder::RegisterScalar(db, OP::NAME, [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("point", GeoTypes::POINT_2D()); + variant.SetReturnType(LogicalType::DOUBLE); + + variant.SetFunction(ExecutePoint); + + variant.SetDescription(OP::DESCRIPTION); + variant.SetExample(OP::EXAMPLE); + }); + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } + } +}; + +struct VertexMinAggOp { + static constexpr auto MIN_NOT_MAX = true; + + static double Init() { + return std::numeric_limits::max(); + } + static double Merge(const double a, const double b) { + return std::min(a, b); + } +}; + +struct VertexMaxAggOp { + static constexpr auto MIN_NOT_MAX = false; + + static double Init() { + return std::numeric_limits::lowest(); + } + static double Merge(const double a, const double b) { + return std::max(a, b); + } +}; + +template +struct VertexAggFunctionBase { + static size_t GetOrdinateOffset(const sgl::geometry &geom) { + switch (OP::ORDINATE) { + case VertexOrdinate::X: + return 0; + case VertexOrdinate::Y: + return 1; + case VertexOrdinate::Z: + return 2; + case VertexOrdinate::M: + return geom.has_z() ? 3 : 2; + default: + return 0; + } + } + + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + UnaryExecutor::ExecuteWithNulls( + args.data[0], result, args.size(), [&](const string_t &blob, ValidityMask &mask, const idx_t idx) { + const auto geom = lstate.Deserialize(blob); + + if (geom.is_empty()) { + mask.SetInvalid(idx); + return 0.0; + } + if (OP::ORDINATE == VertexOrdinate::Z && !geom.has_z()) { + mask.SetInvalid(idx); + return 0.0; + } + if (OP::ORDINATE == VertexOrdinate::M && !geom.has_m()) { + mask.SetInvalid(idx); + return 0.0; + } + + const auto offset = GetOrdinateOffset(geom); + + double res = AGG::Init(); + + sgl::ops::visit_vertices(&geom, [&](const uint8_t *vertex) { + double val = 0.0; + memcpy(&val, vertex + offset * sizeof(double), sizeof(double)); + + res = AGG::Merge(res, val); + }); + + return res; + }); + } + + static void ExecutePoint(DataChunk &args, ExpressionState &, Vector &result) { + D_ASSERT(args.data.size() == 1); + auto &point = args.data[0]; + auto &point_children = StructVector::GetEntries(point); + + switch (OP::ORDINATE) { + case VertexOrdinate::X: + result.Reference(*point_children[0]); + break; + case VertexOrdinate::Y: + result.Reference(*point_children[1]); + break; + default: + D_ASSERT(false); + break; + } + } + + static void ExecuteLineString(DataChunk &args, ExpressionState &, Vector &result) { + D_ASSERT(args.data.size() == 1); + + auto &line_vec = args.data[0]; + auto &line_coords = ListVector::GetEntry(line_vec); + auto &line_coords_vec = StructVector::GetEntries(line_coords); + + const auto axis = OP::ORDINATE == VertexOrdinate::X ? 0 : 1; + auto ordinate_data = FlatVector::GetData(*line_coords_vec[axis]); + + UnaryExecutor::ExecuteWithNulls( + line_vec, result, args.size(), [&](const list_entry_t &line, ValidityMask &mask, idx_t idx) { + // Empty line, return NULL + if (line.length == 0) { + mask.SetInvalid(idx); + return 0.0; + } + + auto val = AGG::Init(); + for (idx_t i = line.offset; i < line.offset + line.length; i++) { + auto ordinate = ordinate_data[i]; + val = AGG::Merge(val, ordinate); + } + return val; + }); + + if (line_vec.GetVectorType() == VectorType::CONSTANT_VECTOR) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + static void ExecutePolygon(DataChunk &args, ExpressionState &, Vector &result) { + D_ASSERT(args.data.size() == 1); + + auto input = args.data[0]; + auto count = args.size(); + + UnifiedVectorFormat format; + input.ToUnifiedFormat(count, format); + + auto &ring_vec = ListVector::GetEntry(input); + auto ring_entries = ListVector::GetData(ring_vec); + auto &vertex_vec = ListVector::GetEntry(ring_vec); + auto &vertex_vec_children = StructVector::GetEntries(vertex_vec); + const auto axis = OP::ORDINATE == VertexOrdinate::X ? 0 : 1; + auto ordinate_data = FlatVector::GetData(*vertex_vec_children[axis]); + + UnaryExecutor::ExecuteWithNulls( + input, result, count, [&](const list_entry_t &polygon, ValidityMask &mask, idx_t idx) { + auto polygon_offset = polygon.offset; + + // Empty polygon, return NULL + if (polygon.length == 0) { + mask.SetInvalid(idx); + return 0.0; + } + + // We only have to check the outer shell + auto shell_ring = ring_entries[polygon_offset]; + auto ring_offset = shell_ring.offset; + auto ring_length = shell_ring.length; + + // Polygon is invalid. This should never happen but just in case + if (ring_length == 0) { + mask.SetInvalid(idx); + return 0.0; + } + + auto val = AGG::Init(); + for (idx_t coord_idx = ring_offset; coord_idx < ring_offset + ring_length - 1; coord_idx++) { + auto ordinate = ordinate_data[coord_idx]; + val = AGG::Merge(val, ordinate); + } + return val; + }); + } + + static void ExecuteBox(DataChunk &args, ExpressionState &, Vector &result) { + auto &input = args.data[0]; + auto &box_vec = StructVector::GetEntries(input); + + switch (OP::ORDINATE) { + case VertexOrdinate::X: + if (AGG::MIN_NOT_MAX) { + result.Reference(*box_vec[0]); + } else { + result.Reference(*box_vec[2]); + } + break; + case VertexOrdinate::Y: + if (AGG::MIN_NOT_MAX) { + result.Reference(*box_vec[1]); + } else { + result.Reference(*box_vec[3]); + } + break; + default: + D_ASSERT(false); + break; + } + } + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, OP::NAME, [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::DOUBLE); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + // These are only defined for X/Y variants + if (OP::ORDINATE == VertexOrdinate::X || OP::ORDINATE == VertexOrdinate::Y) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("point", GeoTypes::POINT_2D()); + variant.SetReturnType(LogicalType::DOUBLE); + + variant.SetFunction(ExecutePoint); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("line", GeoTypes::LINESTRING_2D()); + variant.SetReturnType(LogicalType::DOUBLE); + + variant.SetFunction(ExecuteLineString); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("polygon", GeoTypes::POLYGON_2D()); + variant.SetReturnType(LogicalType::DOUBLE); + + variant.SetFunction(ExecutePolygon); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("box", GeoTypes::BOX_2D()); + variant.SetReturnType(LogicalType::DOUBLE); + + variant.SetFunction(ExecuteBox); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("box", GeoTypes::BOX_2DF()); + variant.SetReturnType(LogicalType::FLOAT); + + variant.SetFunction(ExecuteBox); + }); + } + + func.SetDescription(OP::DESCRIPTION); + func.SetExample(OP::EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } +}; + +struct ST_X : PointAccessFunctionBase { + static constexpr auto NAME = "ST_X"; + static constexpr auto DESCRIPTION = "Returns the X coordinate of a point geometry"; + static constexpr auto EXAMPLE = "SELECT ST_X(ST_Point(1, 2))"; + static constexpr auto ORDINATE = VertexOrdinate::X; +}; + +struct ST_XMax : VertexAggFunctionBase { + static auto constexpr NAME = "ST_XMax"; + static auto constexpr DESCRIPTION = "Returns the maximum X coordinate of a geometry"; + static auto constexpr EXAMPLE = "SELECT ST_XMax(ST_Point(1, 2))"; + static auto constexpr ORDINATE = VertexOrdinate::X; +}; + +struct ST_XMin : VertexAggFunctionBase { + static constexpr auto NAME = "ST_XMin"; + static constexpr auto DESCRIPTION = "Returns the minimum X coordinate of a geometry"; + static constexpr auto EXAMPLE = "SELECT ST_XMin(ST_Point(1, 2))"; + static constexpr auto ORDINATE = VertexOrdinate::X; +}; + +struct ST_Y : PointAccessFunctionBase { + static constexpr auto NAME = "ST_Y"; + static constexpr auto DESCRIPTION = "Returns the Y coordinate of a point geometry"; + static constexpr auto EXAMPLE = "SELECT ST_Y(ST_Point(1, 2))"; + static constexpr auto ORDINATE = VertexOrdinate::Y; +}; + +struct ST_YMax : VertexAggFunctionBase { + static constexpr auto NAME = "ST_YMax"; + static constexpr auto DESCRIPTION = "Returns the maximum Y coordinate of a geometry"; + static constexpr auto EXAMPLE = "SELECT ST_YMax(ST_Point(1, 2))"; + static constexpr auto ORDINATE = VertexOrdinate::Y; +}; + +struct ST_YMin : VertexAggFunctionBase { + static constexpr auto NAME = "ST_YMin"; + static constexpr auto DESCRIPTION = "Returns the minimum Y coordinate of a geometry"; + static constexpr auto EXAMPLE = "SELECT ST_YMin(ST_Point(1, 2))"; + static constexpr auto ORDINATE = VertexOrdinate::Y; +}; + +struct ST_Z : PointAccessFunctionBase { + static constexpr auto NAME = "ST_Z"; + static constexpr auto DESCRIPTION = "Returns the Z coordinate of a point geometry"; + static constexpr auto EXAMPLE = "SELECT ST_Z(ST_Point(1, 2, 3))"; + static constexpr auto ORDINATE = VertexOrdinate::Z; +}; + +struct ST_ZMax : VertexAggFunctionBase { + static auto constexpr NAME = "ST_ZMax"; + static auto constexpr DESCRIPTION = "Returns the maximum Z coordinate of a geometry"; + static auto constexpr EXAMPLE = "SELECT ST_ZMax(ST_Point(1, 2, 3))"; + static auto constexpr ORDINATE = VertexOrdinate::Z; +}; + +struct ST_ZMin : VertexAggFunctionBase { + static constexpr auto NAME = "ST_ZMin"; + static constexpr auto DESCRIPTION = "Returns the minimum Z coordinate of a geometry"; + static constexpr auto EXAMPLE = "SELECT ST_ZMin(ST_Point(1, 2, 3))"; + static constexpr auto ORDINATE = VertexOrdinate::Z; +}; + +struct ST_M : PointAccessFunctionBase { + static constexpr auto NAME = "ST_M"; + static constexpr auto DESCRIPTION = "Returns the M coordinate of a point geometry"; + static constexpr auto EXAMPLE = "SELECT ST_M(ST_Point(1, 2, 3, 4))"; + static constexpr auto ORDINATE = VertexOrdinate::M; +}; + +struct ST_MMax : VertexAggFunctionBase { + static constexpr auto NAME = "ST_MMax"; + static constexpr auto DESCRIPTION = "Returns the maximum M coordinate of a geometry"; + static constexpr auto EXAMPLE = "SELECT ST_MMax(ST_Point(1, 2, 3, 4))"; + static constexpr auto ORDINATE = VertexOrdinate::M; +}; + +struct ST_MMin : VertexAggFunctionBase { + static constexpr auto NAME = "ST_MMin"; + static constexpr auto DESCRIPTION = "Returns the minimum M coordinate of a geometry"; + static constexpr auto EXAMPLE = "SELECT ST_MMin(ST_Point(1, 2, 3, 4))"; + static constexpr auto ORDINATE = VertexOrdinate::M; +}; + +} // namespace + +//###################################################################################################################### +// Register +//###################################################################################################################### + +void RegisterSpatialScalarFunctions(DatabaseInstance &db) { + ST_Area::Register(db); + + // 1 functions to go! + ST_AsGeoJSON::Register(db); + ST_AsText::Register(db); + ST_AsWKB::Register(db); + ST_AsHEXWKB::Register(db); + ST_AsSVG::Register(db); + ST_Centroid::Register(db); + ST_Collect::Register(db); + ST_CollectionExtract::Register(db); + ST_Contains::Register(db); + ST_Dimension::Register(db); + ST_Distance::Register(db); + ST_Dump::Register(db); + ST_EndPoint::Register(db); + ST_Extent::Register(db); + ST_Extent_Approx::Register(db); + ST_ExteriorRing::Register(db); + ST_FlipCoordinates::Register(db); + ST_Force2D::Register(db); + ST_Force3DZ::Register(db); + ST_Force3DM::Register(db); + ST_Force4D::Register(db); + ST_GeometryType::Register(db); + ST_GeomFromHEXWKB::Register(db); + ST_GeomFromGeoJSON::Register(db); + ST_GeomFromText::Register(db); + ST_GeomFromWKB::Register(db); + ST_HasZ::Register(db); + ST_HasM::Register(db); + ST_ZMFlag::Register(db); + ST_Distance_Sphere::Register(db); + ST_Hilbert::Register(db); + ST_Intersects::Register(db); + ST_IntersectsExtent::Register(db); + ST_IsClosed::Register(db); + ST_IsEmpty::Register(db); + ST_Length::Register(db); + ST_MakeEnvelope::Register(db); + ST_MakeLine::Register(db); + ST_MakePolygon::Register(db); + ST_Multi::Register(db); + ST_NGeometries::Register(db); + ST_NInteriorRings::Register(db); + ST_NPoints::Register(db); + ST_Perimeter::Register(db); + ST_Point::Register(db); + ST_PointN::Register(db); + ST_Points::Register(db); + ST_QuadKey::Register(db); + ST_RemoveRepeatedPoints::Register(db); + ST_StartPoint::Register(db); + ST_Within::Register(db); + ST_X::Register(db); + ST_XMax::Register(db); + ST_XMin::Register(db); + ST_Y::Register(db); + ST_YMax::Register(db); + ST_YMin::Register(db); + ST_Z::Register(db); + ST_ZMax::Register(db); + ST_ZMin::Register(db); + ST_M::Register(db); + ST_MMax::Register(db); + ST_MMin::Register(db); +} + +} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/modules/main/spatial_functions_table.cpp b/src/spatial/modules/main/spatial_functions_table.cpp new file mode 100644 index 00000000..7a442bd8 --- /dev/null +++ b/src/spatial/modules/main/spatial_functions_table.cpp @@ -0,0 +1,137 @@ +#include "spatial/modules/main/spatial_functions.hpp" +#include "spatial/spatial_types.hpp" +#include "spatial/geometry/bbox.hpp" + +#include "duckdb/main/extension_util.hpp" + +namespace duckdb { + +namespace { + +//###################################################################################################################### +// Table Functions +//###################################################################################################################### + +//====================================================================================================================== +// ST_GeneratePoints +//====================================================================================================================== + +struct ST_GeneratePoints { + + //------------------------------------------------------------------------------------------------------------------ + // Bind + //------------------------------------------------------------------------------------------------------------------ + struct GeneratePointsBindData final : TableFunctionData { + idx_t count = 0; + int64_t seed = -1; + Box2D bbox; + }; + + static unique_ptr Bind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + auto result = make_uniq(); + + return_types.push_back(GeoTypes::POINT_2D()); + names.push_back("point"); + + // Extract the bounding box + const auto &box_value = input.inputs[0]; + auto &box_components = StructValue::GetChildren(box_value); + result->bbox.min.x = box_components[0].GetValue(); + result->bbox.min.y = box_components[1].GetValue(); + result->bbox.max.x = box_components[2].GetValue(); + result->bbox.max.y = box_components[3].GetValue(); + + // Extract the count + const auto &count_value = input.inputs[1]; + const auto count = count_value.GetValue(); + if (count < 0) { + throw BinderException("Count must be a non-negative integer"); + } + result->count = UnsafeNumericCast(count); + + // Extract the seed (optional) + if (input.inputs.size() == 3) { + result->seed = input.inputs[2].GetValue(); + } + + return std::move(result); + } + + //------------------------------------------------------------------------------------------------------------------ + // Init + //------------------------------------------------------------------------------------------------------------------ + struct GeneratePointsState final : GlobalTableFunctionState { + RandomEngine rng; + idx_t current_idx; + + explicit GeneratePointsState(const int64_t seed) : rng(seed), current_idx(0) { + } + }; + + static unique_ptr Init(ClientContext &context, TableFunctionInitInput &input) { + auto &bind_data = input.bind_data->Cast(); + auto result = make_uniq(bind_data.seed); + return std::move(result); + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute + //------------------------------------------------------------------------------------------------------------------ + static void Execute(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { + auto &bind_data = data_p.bind_data->Cast(); + auto &state = data_p.global_state->Cast(); + + const auto &point_vec = StructVector::GetEntries(output.data[0]); + const auto &x_data = FlatVector::GetData(*point_vec[0]); + const auto &y_data = FlatVector::GetData(*point_vec[1]); + + const auto chunk_size = MinValue(STANDARD_VECTOR_SIZE, bind_data.count - state.current_idx); + for (idx_t i = 0; i < chunk_size; i++) { + + x_data[i] = state.rng.NextRandom32(bind_data.bbox.min.x, bind_data.bbox.max.x); + y_data[i] = state.rng.NextRandom32(bind_data.bbox.min.y, bind_data.bbox.max.y); + + state.current_idx++; + } + output.SetCardinality(chunk_size); + } + + //------------------------------------------------------------------------------------------------------------------ + // Cardinality + //------------------------------------------------------------------------------------------------------------------ + static unique_ptr Cardinality(ClientContext &context, const FunctionData *bind_data_p) { + auto &bind_data = bind_data_p->Cast(); + return make_uniq(bind_data.count, bind_data.count); + } + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + // TODO: Dont overload, make seed named parameter instead + TableFunctionSet set("ST_GeneratePoints"); + + TableFunction generate_points({GeoTypes::BOX_2D(), LogicalType::BIGINT}, Execute, Bind, Init); + generate_points.cardinality = Cardinality; + + // Overload without seed + set.AddFunction(generate_points); + + // Overload with seed + generate_points.arguments = {GeoTypes::BOX_2D(), LogicalType::BIGINT, LogicalType::BIGINT}; + set.AddFunction(generate_points); + ExtensionUtil::RegisterFunction(db, set); + } +}; + +} // namespace + +//###################################################################################################################### +// Register +//###################################################################################################################### +void RegisterSpatialTableFunctions(DatabaseInstance &db) { + ST_GeneratePoints::Register(db); +} + +} // namespace duckdb \ No newline at end of file diff --git a/spatial/src/spatial/core/io/CMakeLists.txt b/src/spatial/modules/osm/CMakeLists.txt similarity index 58% rename from spatial/src/spatial/core/io/CMakeLists.txt rename to src/spatial/modules/osm/CMakeLists.txt index 71cb98f4..961cc245 100644 --- a/spatial/src/spatial/core/io/CMakeLists.txt +++ b/src/spatial/modules/osm/CMakeLists.txt @@ -1,7 +1,5 @@ -add_subdirectory(osm) -add_subdirectory(shapefile) - set(EXTENSION_SOURCES ${EXTENSION_SOURCES} + ${CMAKE_CURRENT_SOURCE_DIR}/osm_module.cpp PARENT_SCOPE ) \ No newline at end of file diff --git a/spatial/src/spatial/core/io/osm/st_read_osm.cpp b/src/spatial/modules/osm/osm_module.cpp similarity index 96% rename from spatial/src/spatial/core/io/osm/st_read_osm.cpp rename to src/spatial/modules/osm/osm_module.cpp index 751ed337..397862fc 100644 --- a/spatial/src/spatial/core/io/osm/st_read_osm.cpp +++ b/src/spatial/modules/osm/osm_module.cpp @@ -1,24 +1,25 @@ -#include "duckdb/parser/parsed_data/create_table_function_info.hpp" -#include "duckdb/storage/buffer_manager.hpp" +#include "spatial/modules/osm/osm_module.hpp" + #include "duckdb/function/replacement_scan.hpp" +#include "duckdb/main/database.hpp" +#include "duckdb/main/extension_util.hpp" #include "duckdb/parser/expression/constant_expression.hpp" #include "duckdb/parser/expression/function_expression.hpp" #include "duckdb/parser/tableref/table_function_ref.hpp" - -#include "spatial/common.hpp" -#include "spatial/core/functions/table.hpp" -#include "spatial/core/types.hpp" - +#include "duckdb/storage/buffer_manager.hpp" #include "protozero/pbf_reader.hpp" +#include "spatial/spatial_types.hpp" #include "zlib.h" -namespace spatial { +#include -namespace core { +namespace duckdb { +namespace { //------------------------------------------------------------------------------ // Utils //------------------------------------------------------------------------------ + namespace pz = protozero; static int32_t ReadInt32BigEndian(data_ptr_t ptr) { @@ -29,10 +30,10 @@ static int32_t ReadInt32BigEndian(data_ptr_t ptr) { // OSM Table Function //------------------------------------------------------------------------------ -struct BindData : TableFunctionData { +struct BindData final : TableFunctionData { string file_name; - BindData(string file_name) : file_name(file_name) { + explicit BindData(string file_name) : file_name(std::move(file_name)) { } }; @@ -116,7 +117,7 @@ struct FileBlock { static unique_ptr DecompressBlob(ClientContext &context, OsmBlob &blob) { auto &buffer_manager = BufferManager::GetBufferManager(context); - pz::pbf_reader reader((const char *)blob.data.get(), blob.size); + pz::pbf_reader reader(reinterpret_cast(blob.data.get()), blob.size); // TODO: For now we assume they are all zlib compressed reader.next(2); @@ -146,7 +147,7 @@ static unique_ptr DecompressBlob(ClientContext &context, OsmBlob &blo return make_uniq(blob.type, std::move(uncompressed_handle), blob_uncompressed_size, blob.blob_idx); }; -class GlobalState : public GlobalTableFunctionState { +class GlobalState final : public GlobalTableFunctionState { mutex lock; unique_ptr handle; idx_t file_size; @@ -162,7 +163,7 @@ class GlobalState : public GlobalTableFunctionState { max_threads(max_threads) { } - double GetProgress() { + double GetProgress() const { return 100 * ((double)bytes_read / (double)file_size); } @@ -250,7 +251,7 @@ static unique_ptr InitGlobal(ClientContext &context, T return std::move(global_state); } -struct LocalState : LocalTableFunctionState { +struct LocalState final : LocalTableFunctionState { unique_ptr block; vector string_table; int32_t granularity; @@ -327,24 +328,24 @@ struct LocalState : LocalTableFunctionState { case ParseState::Group: if (group_reader.next()) { switch (group_reader.tag()) { - // Nodes + // Nodes case 1: { ScanNode(output, index, capacity); } break; - // Dense nodes + // Dense nodes case 2: { PrepareDenseNodes(output, index, capacity); state = ParseState::DenseNodes; } break; - // Way + // Way case 3: { ScanWay(output, index, capacity); } break; - // Relation + // Relation case 4: { ScanRelation(output, index, capacity); } break; - // Changeset + // Changeset case 5: { // Skip for now. group_reader.skip(); @@ -786,10 +787,8 @@ struct LocalState : LocalTableFunctionState { static unique_ptr InitLocal(ExecutionContext &context, TableFunctionInitInput &input, GlobalTableFunctionState *global_state) { - // auto &bind_data = (BindData &)*input.bind_data; - auto &global = (GlobalState &)*global_state; - - auto blob = global.GetNextBlob(context.client); + auto &global = global_state->Cast(); + const auto blob = global.GetNextBlob(context.client); if (blob == nullptr) { return nullptr; } @@ -805,8 +804,8 @@ static void Execute(ClientContext &context, TableFunctionInput &input, DataChunk } // auto &bind_data = (BindData &)*input.bind_data; - auto &global_state = (GlobalState &)*input.global_state; - auto &local_state = (LocalState &)*input.local_state; + auto &global_state = input.global_state->Cast(); + auto &local_state = input.local_state->Cast(); idx_t row_id = 0; idx_t capacity = STANDARD_VECTOR_SIZE; @@ -827,7 +826,7 @@ static void Execute(ClientContext &context, TableFunctionInput &input, DataChunk static double Progress(ClientContext &context, const FunctionData *bind_data, const GlobalTableFunctionState *global_state) { - auto &state = (GlobalState &)*global_state; + const auto &state = global_state->Cast(); return state.GetProgress(); } @@ -858,7 +857,7 @@ static unique_ptr ReadOsmPBFReplacementScan(ClientContext &context, Re // Documentation //------------------------------------------------------------------------------ -static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}}; +// static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}}; static constexpr const char *DOC_DESCRIPTION = R"( The `ST_ReadOsm()` table function enables reading compressed OpenStreetMap data directly from a `.osm.pbf file.` @@ -889,23 +888,23 @@ static constexpr const char *DOC_EXAMPLE = R"( └──────────────────────┴────────┴──────────────────────┴─────────┴────────────────────┴────────────┴───────────┴────────────────────────┘ )"; +} // namespace + //------------------------------------------------------------------------------ // Register //------------------------------------------------------------------------------ -void CoreTableFunctions::RegisterOsmTableFunction(DatabaseInstance &db) { +void RegisterOSMModule(DatabaseInstance &db) { TableFunction read("ST_ReadOSM", {LogicalType::VARCHAR}, Execute, Bind, InitGlobal, InitLocal); read.get_partition_data = GetPartitionData; read.table_scan_progress = Progress; ExtensionUtil::RegisterFunction(db, read); - DocUtil::AddDocumentation(db, "ST_ReadOSM", DOC_DESCRIPTION, DOC_EXAMPLE, DOC_TAGS); + FunctionBuilder::AddTableFunctionDocs(db, "ST_ReadOSM", DOC_DESCRIPTION, DOC_EXAMPLE); // Replacement scan auto &config = DBConfig::GetConfig(db); config.replacement_scans.emplace_back(ReadOsmPBFReplacementScan); } -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/modules/osm/osm_module.hpp b/src/spatial/modules/osm/osm_module.hpp new file mode 100644 index 00000000..2bbbffa5 --- /dev/null +++ b/src/spatial/modules/osm/osm_module.hpp @@ -0,0 +1,9 @@ +#pragma once + +namespace duckdb { + +class DatabaseInstance; + +void RegisterOSMModule(DatabaseInstance &db); + +} // namespace duckdb \ No newline at end of file diff --git a/spatial/src/spatial/proj/CMakeLists.txt b/src/spatial/modules/proj/CMakeLists.txt similarity index 54% rename from spatial/src/spatial/proj/CMakeLists.txt rename to src/spatial/modules/proj/CMakeLists.txt index 9a422b58..2658ae84 100644 --- a/spatial/src/spatial/proj/CMakeLists.txt +++ b/src/spatial/modules/proj/CMakeLists.txt @@ -1,8 +1,7 @@ set(EXTENSION_SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/module.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/proj_module.cpp ${CMAKE_CURRENT_SOURCE_DIR}/proj_db.c - ${CMAKE_CURRENT_SOURCE_DIR}/functions.cpp ${EXTENSION_SOURCES} PARENT_SCOPE -) \ No newline at end of file +) diff --git a/spatial/src/spatial/proj/proj_db.c b/src/spatial/modules/proj/proj_db.c similarity index 100% rename from spatial/src/spatial/proj/proj_db.c rename to src/spatial/modules/proj/proj_db.c diff --git a/src/spatial/modules/proj/proj_module.cpp b/src/spatial/modules/proj/proj_module.cpp new file mode 100644 index 00000000..67222887 --- /dev/null +++ b/src/spatial/modules/proj/proj_module.cpp @@ -0,0 +1,1101 @@ +#include "spatial/modules/proj/proj_module.hpp" +#include "spatial/spatial_types.hpp" +#include "spatial/util/function_builder.hpp" +#include "spatial/geometry/sgl.hpp" +#include "spatial/geometry/geometry_serialization.hpp" + +#include "duckdb/common/vector_operations/generic_executor.hpp" +#include "duckdb/parser/parsed_data/create_table_function_info.hpp" +#include "duckdb/execution/expression_executor.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" + +#include "proj.h" +#include "geodesic.h" +#include "sqlite3.h" + +// We embed the whole proj.db in the proj_db.c file, which we then link into the extension binary +// We can then use the sqlite3 "memvfs" (which we also statically link to) to point to the proj.db database in memory +// To genereate the proj_db.c file, we use the following command: +// `xxd -i proj.db > proj_db.c` +// Then rename the array to proj_db and the length to proj_db_len if necessary +// We link these from the proj_db.c file externally instead of #include:ing so our IDE doesnt go haywire +extern "C" unsigned char proj_db[]; +extern "C" unsigned int proj_db_len; +extern "C" int sqlite3_memvfs_init(sqlite3 *, char **, const sqlite3_api_routines *); + +// Specialize hash for std::pair so we can use it as a key in an unordered_map +template <> +struct std::hash> { + size_t operator()(pair const &v) const noexcept { + const auto lhs = std::hash {}(v.first); + const auto rhs = std::hash {}(v.second); + // Shift by one so we dont match the hash of the reversed pair + return lhs ^ (rhs << 1); + } +}; + +namespace duckdb { + +namespace { + +//###################################################################################################################### +// PROJ Module & SQLITE VFS Registration +//###################################################################################################################### + +struct ProjModule { + static void RegisterVFS(DatabaseInstance &db); + static PJ_CONTEXT *GetThreadProjContext(); +}; + +PJ_CONTEXT *ProjModule::GetThreadProjContext() { + + const auto ctx = proj_context_create(); + + // We set the default context proj.db path to the one in the binary here + // Otherwise GDAL will try to load the proj.db from the system + // Any PJ_CONTEXT we create after this will inherit these settings + const auto path = StringUtil::Format("file:/proj.db?ptr=%llu&sz=%lu&max=%lu", static_cast(proj_db), + proj_db_len, proj_db_len); + + proj_context_set_sqlite3_vfs_name(ctx, "memvfs"); + const auto ok = proj_context_set_database_path(ctx, path.c_str(), nullptr, nullptr); + if (!ok) { + throw InternalException("Could not set proj.db path"); + } + + // Dont log errors to stderr + proj_log_level(ctx, PJ_LOG_NONE); + + // Dont allow network + proj_context_set_enable_network(ctx, false); + + return ctx; +} + +// IMPORTANT: Make sure this module is loaded before any other modules that use proj (like GDAL) +void ProjModule::RegisterVFS(DatabaseInstance &db) { + // we use the sqlite "memvfs" to store the proj.db database in the extension binary itself + // this way we don't have to worry about the user having the proj.db database installed + // on their system. We therefore have to tell proj to use memvfs as the sqlite3 vfs and + // point it to the segment of the binary that contains the proj.db database + + sqlite3_initialize(); + sqlite3_memvfs_init(nullptr, nullptr, nullptr); + const auto vfs = sqlite3_vfs_find("memvfs"); + if (!vfs) { + throw InternalException("Could not find sqlite memvfs extension"); + } + sqlite3_vfs_register(vfs, 0); + + // We set the default context proj.db path to the one in the binary here + // Otherwise GDAL will try to load the proj.db from the system + // Any PJ_CONTEXT we create after this will inherit these settings (on this thread?) + const auto path = StringUtil::Format("file:/proj.db?ptr=%llu&sz=%lu&max=%lu", static_cast(proj_db), + proj_db_len, proj_db_len); + + proj_context_set_sqlite3_vfs_name(nullptr, "memvfs"); + + const auto ok = proj_context_set_database_path(nullptr, path.c_str(), nullptr, nullptr); + if (!ok) { + throw InternalException("Could not set proj.db path"); + } +} + +//###################################################################################################################### +// Coordinate Transformation Functions +//###################################################################################################################### + +//====================================================================================================================== +// Local State +//====================================================================================================================== + +struct ProjCRSDelete { + void operator()(PJ *crs) const { + proj_destroy(crs); + } +}; + +using ProjCRS = unique_ptr; + +struct ProjFunctionLocalState final : FunctionLocalState { + + PJ_CONTEXT *proj_ctx; + ArenaAllocator arena; + GeometryAllocator allocator; + + // Cache for PJ* objects + unordered_map, ProjCRS> crs_cache; + + // Not copyable + ProjFunctionLocalState(const ProjFunctionLocalState &) = delete; + ProjFunctionLocalState &operator=(const ProjFunctionLocalState &) = delete; + + // Not movable + ProjFunctionLocalState(ProjFunctionLocalState &&) = delete; + ProjFunctionLocalState &operator=(ProjFunctionLocalState &&) = delete; + + explicit ProjFunctionLocalState(ClientContext &context) + : proj_ctx(ProjModule::GetThreadProjContext()), arena(BufferAllocator::Get(context)), + allocator(arena) { + } + + ~ProjFunctionLocalState() override { + // We need to clear the cache so that the unique_ptrs are destroyed before the context + crs_cache.clear(); + proj_context_destroy(proj_ctx); + } + + sgl::geometry Deserialize(const string_t &blob); + string_t Serialize(Vector &vector, const sgl::geometry &geom); + + static unique_ptr Init(ExpressionState &state, const BoundFunctionExpression &expr, + FunctionData *bind_data) { + auto result = make_uniq(state.GetContext()); + return std::move(result); + } + + static ProjFunctionLocalState &ResetAndGet(ExpressionState &state) { + auto &local_state = ExecuteFunctionState::GetFunctionState(state)->Cast(); + local_state.arena.Reset(); + return local_state; + } + + PJ *GetOrCreateProjection(const string &source, const string &target, bool normalize) { + const auto crs_entry = crs_cache.find({source, target}); + if (crs_entry != crs_cache.end()) { + return crs_entry->second.get(); + } + + auto crs = proj_create_crs_to_crs(proj_ctx, source.c_str(), target.c_str(), nullptr); + if (!crs) { + throw InvalidInputException("Could not create projection: " + source + " -> " + target); + } + + if (normalize) { + const auto normalized_crs = proj_normalize_for_visualization(proj_ctx, crs); + proj_destroy(crs); + if (!normalized_crs) { + throw InvalidInputException("Could not normalize projection: " + source + " -> " + target); + } + crs = normalized_crs; + } + + crs_cache[{source, target}] = ProjCRS(crs); + return crs; + } +}; + +sgl::geometry ProjFunctionLocalState::Deserialize(const string_t &blob) { + sgl::geometry geom; + Serde::Deserialize(geom, arena, blob.GetDataUnsafe(), blob.GetSize()); + return geom; +} + +string_t ProjFunctionLocalState::Serialize(Vector &vector, const sgl::geometry &geom) { + const auto size = Serde::GetRequiredSize(geom); + auto blob = StringVector::EmptyString(vector, size); + Serde::Serialize(geom, blob.GetDataWriteable(), size); + blob.Finalize(); + return blob; +} + +//====================================================================================================================== +// ST_Transform +//====================================================================================================================== + +struct ST_Transform { + + //------------------------------------------------------------------------------------------------------------------ + // Bind + //------------------------------------------------------------------------------------------------------------------ + struct BindData final : FunctionData { + bool normalize = false; + + unique_ptr Copy() const override { + auto result = make_uniq(); + result->normalize = normalize; + return std::move(result); + } + + bool Equals(const FunctionData &other) const override { + auto &data = other.Cast(); + return normalize == data.normalize; + } + }; + + static unique_ptr Bind(ClientContext &ctx, ScalarFunction &, vector> &args) { + auto result = make_uniq(); + if (args.size() == 4) { + // Ensure the "always_xy" parameter is a constant + const auto &arg = args[3]; + if (arg->HasParameter()) { + throw InvalidInputException("The 'always_xy' parameter must be a constant"); + } + if (!arg->IsFoldable()) { + throw InvalidInputException("The 'always_xy' parameter must be a constant"); + } + result->normalize = BooleanValue::Get(ExpressionExecutor::EvaluateScalar(ctx, *arg)); + } + return std::move(result); + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute (POINT_2D) + //------------------------------------------------------------------------------------------------------------------ + static void ExecutePoint(DataChunk &args, ExpressionState &state, Vector &result) { + using POINT_TYPE = StructTypeBinary; + using PROJ_TYPE = PrimitiveType; + + auto &lstate = ProjFunctionLocalState::ResetAndGet(state); + auto &func_expr = state.expr.Cast(); + const auto &info = func_expr.bind_info->Cast(); + + GenericExecutor::ExecuteTernary( + args.data[0], args.data[1], args.data[2], result, args.size(), + [&](const POINT_TYPE &point_in, const PROJ_TYPE &source, const PROJ_TYPE target) { + const auto source_str = source.val.GetString(); + const auto target_str = target.val.GetString(); + + const auto crs = lstate.GetOrCreateProjection(source_str, target_str, info.normalize); + + POINT_TYPE point_out; + const auto transformed = proj_trans(crs, PJ_FWD, proj_coord(point_in.a_val, point_in.b_val, 0, 0)).xy; + point_out.a_val = transformed.x; + point_out.b_val = transformed.y; + + return point_out; + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute (BOX_2D) + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteBox(DataChunk &args, ExpressionState &state, Vector &result) { + using BOX_TYPE = StructTypeQuaternary; + using PROJ_TYPE = PrimitiveType; + + auto &lstate = ProjFunctionLocalState::ResetAndGet(state); + auto &func_expr = state.expr.Cast(); + const auto &info = func_expr.bind_info->Cast(); + + GenericExecutor::ExecuteTernary( + args.data[0], args.data[1], args.data[2], result, args.size(), + [&](const BOX_TYPE &box_in, const PROJ_TYPE source, const PROJ_TYPE &target) { + const auto source_str = source.val.GetString(); + const auto target_str = target.val.GetString(); + + const auto crs = lstate.GetOrCreateProjection(source_str, target_str, info.normalize); + + // TODO: this may be interesting to use, but at that point we can only return a BOX_TYPE + constexpr int densify_pts = 0; + BOX_TYPE box_out; + proj_trans_bounds(lstate.proj_ctx, crs, PJ_FWD, box_in.a_val, box_in.b_val, box_in.c_val, box_in.d_val, + &box_out.a_val, &box_out.b_val, &box_out.c_val, &box_out.d_val, densify_pts); + return box_out; + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute (GEOMETRY) + //------------------------------------------------------------------------------------------------------------------ + static void ExecuteGeometry(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = ProjFunctionLocalState::ResetAndGet(state); + auto &alloc = lstate.allocator; + auto &func_expr = state.expr.Cast(); + const auto &info = func_expr.bind_info->Cast(); + + TernaryExecutor::Execute( + args.data[0], args.data[1], args.data[2], result, args.size(), + [&](const string_t &input_geom, const string_t &source, const string_t &target) { + const auto source_str = source.GetString(); + const auto target_str = target.GetString(); + + const auto crs = lstate.GetOrCreateProjection(source_str, target_str, info.normalize); + + auto geom = lstate.Deserialize(input_geom); + + sgl::ops::replace_vertices(&alloc, &geom, crs, [](void *arg, sgl::vertex_xyzm *vertex) { + const auto crs_ptr = static_cast(arg); + const auto transformed = proj_trans(crs_ptr, PJ_FWD, proj_coord(vertex->x, vertex->y, vertex->zm, 0)).xy; + vertex->x = transformed.x; + vertex->y = transformed.y; + }); + + return lstate.Serialize(result, geom); + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Transforms a geometry between two coordinate systems + + The source and target coordinate systems can be specified using any format that the [PROJ library](https://proj.org) supports. + + The third optional `always_xy` parameter can be used to force the input and output geometries to be interpreted as having a [easting, northing] coordinate axis order regardless of what the source and target coordinate system definition says. This is particularly useful when transforming to/from the [WGS84/EPSG:4326](https://en.wikipedia.org/wiki/World_Geodetic_System) coordinate system (what most people think of when they hear "longitude"/"latitude" or "GPS coordinates"), which is defined as having a [latitude, longitude] axis order even though [longitude, latitude] is commonly used in practice (e.g. in [GeoJSON](https://tools.ietf.org/html/rfc7946)). More details available in the [PROJ documentation](https://proj.org/en/9.3/faq.html#why-is-the-axis-ordering-in-proj-not-consistent). + + DuckDB spatial vendors its own static copy of the PROJ database of coordinate systems, so if you have your own installation of PROJ on your system the available coordinate systems may differ to what's available in other GIS software. + )"; + + static constexpr auto EXAMPLE = R"( + -- Transform a geometry from EPSG:4326 to EPSG:3857 (WGS84 to WebMercator) + -- Note that since WGS84 is defined as having a [latitude, longitude] axis order + -- we follow the standard and provide the input geometry using that axis order, + -- but the output will be [easting, northing] because that is what's defined by + -- WebMercator. + + SELECT ST_AsText( + ST_Transform( + st_point(52.373123, 4.892360), + 'EPSG:4326', + 'EPSG:3857' + ) + ); + ---- + POINT (544615.0239773799 6867874.103539125) + + -- Alternatively, let's say we got our input point from e.g. a GeoJSON file, + -- which uses WGS84 but with [longitude, latitude] axis order. We can use the + -- `always_xy` parameter to force the input geometry to be interpreted as having + -- a [northing, easting] axis order instead, even though the source coordinate + -- reference system definition (WGS84) says otherwise. + + SELECT ST_AsText( + ST_Transform( + -- note the axis order is reversed here + st_point(4.892360, 52.373123), + 'EPSG:4326', + 'EPSG:3857', + always_xy := true + ) + ); + ---- + POINT (544615.0239773799 6867874.103539125) + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Transform", [](ScalarFunctionBuilder &func) { + func.AddVariant([&](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("box", GeoTypes::BOX_2D()); + variant.AddParameter("source_crs", LogicalType::VARCHAR); + variant.AddParameter("target_crs", LogicalType::VARCHAR); + variant.SetReturnType(GeoTypes::BOX_2D()); + + variant.SetInit(ProjFunctionLocalState::Init); + variant.SetBind(Bind); + variant.SetFunction(ExecuteBox); + }); + + func.AddVariant([&](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("box", GeoTypes::BOX_2D()); + variant.AddParameter("source_crs", LogicalType::VARCHAR); + variant.AddParameter("target_crs", LogicalType::VARCHAR); + variant.AddParameter("always_xy", LogicalType::BOOLEAN); + variant.SetReturnType(GeoTypes::BOX_2D()); + + variant.SetInit(ProjFunctionLocalState::Init); + variant.SetBind(Bind); + variant.SetFunction(ExecuteBox); + }); + + func.AddVariant([&](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("point", GeoTypes::POINT_2D()); + variant.AddParameter("source_crs", LogicalType::VARCHAR); + variant.AddParameter("target_crs", LogicalType::VARCHAR); + variant.SetReturnType(GeoTypes::POINT_2D()); + + variant.SetInit(ProjFunctionLocalState::Init); + variant.SetBind(Bind); + variant.SetFunction(ExecutePoint); + }); + + func.AddVariant([&](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("point", GeoTypes::POINT_2D()); + variant.AddParameter("source_crs", LogicalType::VARCHAR); + variant.AddParameter("target_crs", LogicalType::VARCHAR); + variant.AddParameter("always_xy", LogicalType::BOOLEAN); + variant.SetReturnType(GeoTypes::POINT_2D()); + + variant.SetInit(ProjFunctionLocalState::Init); + variant.SetBind(Bind); + variant.SetFunction(ExecutePoint); + }); + + func.AddVariant([&](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("source_crs", LogicalType::VARCHAR); + variant.AddParameter("target_crs", LogicalType::VARCHAR); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(ProjFunctionLocalState::Init); + variant.SetBind(Bind); + variant.SetFunction(ExecuteGeometry); + }); + + func.AddVariant([&](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("source_crs", LogicalType::VARCHAR); + variant.AddParameter("target_crs", LogicalType::VARCHAR); + variant.AddParameter("always_xy", LogicalType::BOOLEAN); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(ProjFunctionLocalState::Init); + variant.SetBind(Bind); + variant.SetFunction(ExecuteGeometry); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "conversion"); + }); + } +}; + +//###################################################################################################################### +// Geodesic Functions +//###################################################################################################################### + +constexpr auto EARTH_A = 6378137; +constexpr auto EARTH_F = 1 / 298.257223563; + +//====================================================================================================================== +// Local State +//====================================================================================================================== + +struct GeodesicLocalState final : FunctionLocalState { + + ArenaAllocator arena; + GeometryAllocator alloc; + geod_geodesic geod = {}; + geod_polygon poly = {}; + double accum = 0; + + explicit GeodesicLocalState(ClientContext &context, bool is_line) + : arena(BufferAllocator::Get(context)), alloc(arena) { + + // Initialize the geodesic object for earth + geod_init(&geod, EARTH_A, EARTH_F); + geod_polygon_init(&poly, is_line ? 1 : 0); + } + + static unique_ptr InitPolygon(ExpressionState &state, const BoundFunctionExpression &expr, + FunctionData *bind_data) { + return make_uniq(state.GetContext(), false); + } + + static unique_ptr InitLine(ExpressionState &state, const BoundFunctionExpression &expr, + FunctionData *bind_data) { + return make_uniq(state.GetContext(), true); + } + + static GeodesicLocalState &ResetAndGet(ExpressionState &state) { + auto &local_state = ExecuteFunctionState::GetFunctionState(state)->Cast(); + local_state.arena.Reset(); + return local_state; + } + + sgl::geometry Deserialize(const string_t &blob) { + sgl::geometry geom; + Serde::Deserialize(geom, arena, blob.GetDataUnsafe(), blob.GetSize()); + return geom; + } +}; + +//====================================================================================================================== +// ST_Area_Spheroid +//====================================================================================================================== + +struct ST_Area_Spheroid { + + //------------------------------------------------------------------------------------------------------------------ + // Execute (POLYGON_2D) + //------------------------------------------------------------------------------------------------------------------ + + static void ExecutePolygon(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 1); + + auto &input = args.data[0]; + auto count = args.size(); + + auto &ring_vec = ListVector::GetEntry(input); + auto ring_entries = ListVector::GetData(ring_vec); + auto &coord_vec = ListVector::GetEntry(ring_vec); + auto &coord_vec_children = StructVector::GetEntries(coord_vec); + auto x_data = FlatVector::GetData(*coord_vec_children[0]); + auto y_data = FlatVector::GetData(*coord_vec_children[1]); + + geod_geodesic geod = {}; + geod_init(&geod, EARTH_A, EARTH_F); + + geod_polygon poly = {}; + geod_polygon_init(&poly, 0); + + UnaryExecutor::Execute(input, result, count, [&](list_entry_t polygon) { + const auto polygon_offset = polygon.offset; + const auto polygon_length = polygon.length; + + bool first = true; + double area = 0; + for (idx_t ring_idx = polygon_offset; ring_idx < polygon_offset + polygon_length; ring_idx++) { + const auto ring = ring_entries[ring_idx]; + const auto ring_offset = ring.offset; + const auto ring_length = ring.length; + + geod_polygon_clear(&poly); + // Note: the last point is the same as the first point, but geographiclib doesn't know that, + // so skip it. + for (idx_t coord_idx = ring_offset; coord_idx < ring_offset + ring_length - 1; coord_idx++) { + geod_polygon_addpoint(&geod, &poly, x_data[coord_idx], y_data[coord_idx]); + } + double ring_area; + geod_polygon_compute(&geod, &poly, 0, 1, &ring_area, nullptr); + + if (first) { + // Add outer ring + area += std::abs(ring_area); + first = false; + } else { + // Subtract holes + area -= std::abs(ring_area); + } + } + return std::abs(area); + }); + + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute (GEOMETRY) + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + + auto &lstate = GeodesicLocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &input) { + const auto geom = lstate.Deserialize(input); + + // Reset the state + lstate.accum = 0; + + // Visit all polygons + sgl::ops::visit_by_dimension(&geom, 2, &lstate, [](void *arg, const sgl::geometry *part) { + if (part->get_type() != sgl::geometry_type::POLYGON) { + return; + } + + auto &sstate = *static_cast(arg); + + // Calculate the area of the polygon + const auto tail = part->get_last_part(); + auto ring = tail; + if (!ring) { + return; + } + + const auto head = ring->get_next(); + + do { + ring = ring->get_next(); + + const auto vertex_count = ring->get_count(); + if (vertex_count < 4) { + continue; + } + + geod_polygon_clear(&sstate.poly); + + // Dont add the last vertex + for (uint32_t i = 0; i < vertex_count - 1; i++) { + const auto vertex = ring->get_vertex_xy(i); + geod_polygon_addpoint(&sstate.geod, &sstate.poly, vertex.x, vertex.y); + } + + double area = 0; + geod_polygon_compute(&sstate.geod, &sstate.poly, 0, 1, &area, nullptr); + + if (ring == head) { + sstate.accum += std::abs(area); + } else { + sstate.accum -= std::abs(area); + } + } while (ring != tail); + }); + + return lstate.accum; + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Returns the area of a geometry in meters, using an ellipsoidal model of the earth + + The input geometry is assumed to be in the [EPSG:4326](https://en.wikipedia.org/wiki/World_Geodetic_System) coordinate system (WGS84), with [latitude, longitude] axis order and the area is returned in square meters. This function uses the [GeographicLib](https://geographiclib.sourceforge.io/) library, calculating the area using an ellipsoidal model of the earth. This is a highly accurate method for calculating the area of a polygon taking the curvature of the earth into account, but is also the slowest. + + Returns `0.0` for any geometry that is not a `POLYGON`, `MULTIPOLYGON` or `GEOMETRYCOLLECTION` containing polygon geometries. + )"; + + // TODO: add example + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Area_Spheroid", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::DOUBLE); + + variant.SetInit(GeodesicLocalState::InitPolygon); + variant.SetFunction(Execute); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("poly", GeoTypes::POLYGON_2D()); + variant.SetReturnType(LogicalType::DOUBLE); + variant.SetFunction(ExecutePolygon); + }); + + func.SetExample(EXAMPLE); + func.SetDescription(DESCRIPTION); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + func.SetTag("category", "spheroid"); + }); + } +}; + +//====================================================================================================================== +// ST_Perimeter_Spheroid +//====================================================================================================================== + +struct ST_Perimeter_Spheroid { + + //------------------------------------------------------------------------------------------------------------------ + // Execute (POLYGON_2D) + //------------------------------------------------------------------------------------------------------------------ + static void ExecutePolygon(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 1); + + auto &input = args.data[0]; + auto count = args.size(); + + auto &ring_vec = ListVector::GetEntry(input); + auto ring_entries = ListVector::GetData(ring_vec); + auto &coord_vec = ListVector::GetEntry(ring_vec); + auto &coord_vec_children = StructVector::GetEntries(coord_vec); + auto x_data = FlatVector::GetData(*coord_vec_children[0]); + auto y_data = FlatVector::GetData(*coord_vec_children[1]); + + geod_geodesic geod = {}; + geod_init(&geod, EARTH_A, EARTH_F); + + geod_polygon poly = {}; + geod_polygon_init(&poly, 0); + + UnaryExecutor::Execute(input, result, count, [&](list_entry_t polygon) { + const auto polygon_offset = polygon.offset; + const auto polygon_length = polygon.length; + double perimeter = 0; + for (idx_t ring_idx = polygon_offset; ring_idx < polygon_offset + polygon_length; ring_idx++) { + const auto ring = ring_entries[ring_idx]; + const auto ring_offset = ring.offset; + const auto ring_length = ring.length; + + geod_polygon_clear(&poly); + // Note: the last point is the same as the first point, but geographiclib doesn't know that, + // so skip it. + for (idx_t coord_idx = ring_offset; coord_idx < ring_offset + ring_length - 1; coord_idx++) { + geod_polygon_addpoint(&geod, &poly, x_data[coord_idx], y_data[coord_idx]); + } + + double ring_perimeter; + geod_polygon_compute(&geod, &poly, 0, 1, nullptr, &ring_perimeter); + + perimeter += ring_perimeter; + } + return perimeter; + }); + + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute (GEOMETRY) + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + + auto &lstate = GeodesicLocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &input) { + const auto geom = lstate.Deserialize(input); + + // Reset the state + lstate.accum = 0; + + // Visit all polygons + sgl::ops::visit_by_dimension(&geom, 2, &lstate, [](void *arg, const sgl::geometry *part) { + if (part->get_type() != sgl::geometry_type::POLYGON) { + return; + } + + auto &sstate = *static_cast(arg); + + // Calculate the perimeter of the polygon + const auto tail = part->get_last_part(); + auto ring = tail; + if (!ring) { + return; + } + do { + ring = ring->get_next(); + + const auto vertex_count = ring->get_count(); + if (vertex_count < 4) { + continue; + } + + geod_polygon_clear(&sstate.poly); + + // Dont add the last vertex + for (uint32_t i = 0; i < vertex_count - 1; i++) { + const auto vertex = ring->get_vertex_xy(i); + geod_polygon_addpoint(&sstate.geod, &sstate.poly, vertex.x, vertex.y); + } + + double perimeter = 0; + geod_polygon_compute(&sstate.geod, &sstate.poly, 0, 1, nullptr, &perimeter); + // Add the perimeter of the ring + sstate.accum += perimeter; + + } while (ring != tail); + }); + + return lstate.accum; + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Returns the length of the perimeter in meters using an ellipsoidal model of the earths surface + + The input geometry is assumed to be in the [EPSG:4326](https://en.wikipedia.org/wiki/World_Geodetic_System) coordinate system (WGS84), with [latitude, longitude] axis order and the length is returned in meters. This function uses the [GeographicLib](https://geographiclib.sourceforge.io/) library, calculating the perimeter using an ellipsoidal model of the earth. This is a highly accurate method for calculating the perimeter of a polygon taking the curvature of the earth into account, but is also the slowest. + + Returns `0.0` for any geometry that is not a `POLYGON`, `MULTIPOLYGON` or `GEOMETRYCOLLECTION` containing polygon geometries. + )"; + + // TODO: add example + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Perimeter_Spheroid", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::DOUBLE); + + variant.SetInit(GeodesicLocalState::InitPolygon); + variant.SetFunction(Execute); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("poly", GeoTypes::POLYGON_2D()); + variant.SetReturnType(LogicalType::DOUBLE); + variant.SetFunction(ExecutePolygon); + }); + + func.SetExample(EXAMPLE); + func.SetDescription(DESCRIPTION); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + func.SetTag("category", "spheroid"); + }); + } +}; + +//====================================================================================================================== +// ST_Length_Spheroid +//====================================================================================================================== + +struct ST_Length_Spheroid { + + //------------------------------------------------------------------------------------------------------------------ + // Execute (LINESTRING) + //------------------------------------------------------------------------------------------------------------------ + + static void ExecuteLineString(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 1); + + auto &line_vec = args.data[0]; + auto count = args.size(); + + auto &coord_vec = ListVector::GetEntry(line_vec); + auto &coord_vec_children = StructVector::GetEntries(coord_vec); + auto x_data = FlatVector::GetData(*coord_vec_children[0]); + auto y_data = FlatVector::GetData(*coord_vec_children[1]); + + geod_geodesic geod = {}; + geod_init(&geod, EARTH_A, EARTH_F); + + geod_polygon poly = {}; + geod_polygon_init(&poly, 1); + + UnaryExecutor::Execute(line_vec, result, count, [&](list_entry_t line) { + geod_polygon_clear(&poly); + + const auto offset = line.offset; + const auto length = line.length; + // Loop over the segments + for (idx_t j = offset; j < offset + length; j++) { + geod_polygon_addpoint(&geod, &poly, x_data[j], y_data[j]); + } + double linestring_length; + geod_polygon_compute(&geod, &poly, 0, 1, &linestring_length, nullptr); + return linestring_length; + }); + + if (count == 1) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute (GEOMETRY) + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + + auto &lstate = GeodesicLocalState::ResetAndGet(state); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &input) { + const auto geom = lstate.Deserialize(input); + + // Reset the state + lstate.accum = 0; + + // Visit all polygons + sgl::ops::visit_by_dimension(&geom, 1, &lstate, [](void *arg, const sgl::geometry *part) { + if (part->get_type() != sgl::geometry_type::LINESTRING) { + return; + } + + auto &sstate = *static_cast(arg); + + const auto vertex_count = part->get_count(); + if (vertex_count < 2) { + return; + } + + geod_polygon_clear(&sstate.poly); + + for (uint32_t i = 0; i < vertex_count; i++) { + const auto vertex = part->get_vertex_xy(i); + geod_polygon_addpoint(&sstate.geod, &sstate.poly, vertex.x, vertex.y); + } + + // Calculate the length of the linestring + double length = 0; + geod_polygon_compute(&sstate.geod, &sstate.poly, 0, 1, nullptr, &length); + + sstate.accum += length; + }); + + return lstate.accum; + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Returns the length of the input geometry in meters, using a ellipsoidal model of the earth + + The input geometry is assumed to be in the [EPSG:4326](https://en.wikipedia.org/wiki/World_Geodetic_System) coordinate system (WGS84), with [latitude, longitude] axis order and the length is returned in square meters. This function uses the [GeographicLib](https://geographiclib.sourceforge.io/) library, calculating the length using an ellipsoidal model of the earth. This is a highly accurate method for calculating the length of a line geometry taking the curvature of the earth into account, but is also the slowest. + + Returns `0.0` for any geometry that is not a `LINESTRING`, `MULTILINESTRING` or `GEOMETRYCOLLECTION` containing line geometries. + )"; + + // TODO: add example + static constexpr auto EXAMPLE = ""; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Length_Spheroid", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::DOUBLE); + + variant.SetInit(GeodesicLocalState::InitLine); + variant.SetFunction(Execute); + }); + + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("line", GeoTypes::LINESTRING_2D()); + variant.SetReturnType(LogicalType::DOUBLE); + variant.SetFunction(ExecuteLineString); + }); + + func.SetExample(EXAMPLE); + func.SetDescription(DESCRIPTION); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + func.SetTag("category", "spheroid"); + }); + } +}; + +//====================================================================================================================== +// ST_Distance_Spheroid +//====================================================================================================================== + +struct ST_Distance_Spheroid { + + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + using POINT_TYPE = StructTypeBinary; + using DISTANCE_TYPE = PrimitiveType; + + geod_geodesic geod = {}; + geod_init(&geod, EARTH_A, EARTH_F); + + GenericExecutor::ExecuteBinary( + args.data[0], args.data[1], result, args.size(), [&](const POINT_TYPE &p1, const POINT_TYPE &p2) { + double distance; + geod_inverse(&geod, p1.a_val, p1.b_val, p2.a_val, p2.b_val, &distance, nullptr, nullptr); + return distance; + }); + } + + static constexpr auto DESCRIPTION = R"( + Returns the distance between two geometries in meters using a ellipsoidal model of the earths surface + + The input geometry is assumed to be in the [EPSG:4326](https://en.wikipedia.org/wiki/World_Geodetic_System) coordinate system (WGS84), with [latitude, longitude] axis order and the distance limit is expected to be in meters. This function uses the [GeographicLib](https://geographiclib.sourceforge.io/) library to solve the [inverse geodesic problem](https://en.wikipedia.org/wiki/Geodesics_on_an_ellipsoid#Solution_of_the_direct_and_inverse_problems), calculating the distance between two points using an ellipsoidal model of the earth. This is a highly accurate method for calculating the distance between two arbitrary points taking the curvature of the earths surface into account, but is also the slowest. + )"; + + static constexpr auto EXAMPLE = R"( + -- Note: the coordinates are in WGS84 and [latitude, longitude] axis order + -- Whats the distance between New York and Amsterdam (JFK and AMS airport)? + SELECT st_distance_spheroid( + st_point(40.6446, -73.7797), + st_point(52.3130, 4.7725) + ); + ---- + 5863418.7459356235 + -- Roughly 5863km! + )"; + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_Distance_Spheroid", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("p1", GeoTypes::POINT_2D()); + variant.AddParameter("p2", GeoTypes::POINT_2D()); + variant.SetReturnType(LogicalType::DOUBLE); + + variant.SetFunction(Execute); + }); + + func.SetExample(EXAMPLE); + func.SetDescription(DESCRIPTION); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "relation"); + func.SetTag("category", "spheroid"); + }); + } +}; + +//====================================================================================================================== +// ST_DWithin_Spheroid +//====================================================================================================================== + +struct ST_DWithin_Spheroid { + + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + using POINT_TYPE = StructTypeBinary; + using DISTANCE_TYPE = PrimitiveType; + using BOOL_TYPE = PrimitiveType; + + geod_geodesic geod = {}; + geod_init(&geod, EARTH_A, EARTH_F); + + GenericExecutor::ExecuteTernary( + args.data[0], args.data[1], args.data[2], result, args.size(), + [&](const POINT_TYPE &p1, const POINT_TYPE &p2, const DISTANCE_TYPE &limit) { + double distance; + geod_inverse(&geod, p1.a_val, p1.b_val, p2.a_val, p2.b_val, &distance, nullptr, nullptr); + return distance <= limit.val; + }); + } + + static constexpr auto DESCRIPTION = R"( + Returns if two POINT_2D's are within a target distance in meters, using an ellipsoidal model of the earths surface + + The input geometry is assumed to be in the [EPSG:4326](https://en.wikipedia.org/wiki/World_Geodetic_System) coordinate system (WGS84), with [latitude, longitude] axis order and the distance is returned in meters. This function uses the [GeographicLib](https://geographiclib.sourceforge.io/) library to solve the [inverse geodesic problem](https://en.wikipedia.org/wiki/Geodesics_on_an_ellipsoid#Solution_of_the_direct_and_inverse_problems), calculating the distance between two points using an ellipsoidal model of the earth. This is a highly accurate method for calculating the distance between two arbitrary points taking the curvature of the earths surface into account, but is also the slowest. + )"; + + // TODO: add example + static constexpr auto EXAMPLE = ""; + + static void Register(DatabaseInstance &db) { + FunctionBuilder::RegisterScalar(db, "ST_DWithin_Spheroid", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("p1", GeoTypes::POINT_2D()); + variant.AddParameter("p2", GeoTypes::POINT_2D()); + variant.SetReturnType(LogicalType::DOUBLE); + + variant.SetFunction(Execute); + }); + + func.SetExample(EXAMPLE); + func.SetDescription(DESCRIPTION); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "relation"); + func.SetTag("category", "spheroid"); + + }); + } +}; + +} // namespace + +//###################################################################################################################### +// Module Registration +//###################################################################################################################### +void RegisterProjModule(DatabaseInstance &db) { + + // Register the VFS for the proj.db database + ProjModule::RegisterVFS(db); + + // Coordinate Transform Function + ST_Transform::Register(db); + + // Geodesic Functions + ST_Area_Spheroid::Register(db); + ST_Perimeter_Spheroid::Register(db); + ST_Length_Spheroid::Register(db); + ST_Distance_Spheroid::Register(db); + ST_DWithin_Spheroid::Register(db); +} + +} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/modules/proj/proj_module.hpp b/src/spatial/modules/proj/proj_module.hpp new file mode 100644 index 00000000..9292cedc --- /dev/null +++ b/src/spatial/modules/proj/proj_module.hpp @@ -0,0 +1,9 @@ +#pragma once + +namespace duckdb { + +class DatabaseInstance; + +void RegisterProjModule(DatabaseInstance &db); + +} // namespace duckdb \ No newline at end of file diff --git a/spatial/src/spatial/core/io/osm/CMakeLists.txt b/src/spatial/modules/shapefile/CMakeLists.txt similarity index 56% rename from spatial/src/spatial/core/io/osm/CMakeLists.txt rename to src/spatial/modules/shapefile/CMakeLists.txt index 74bf39b4..b1663c12 100644 --- a/spatial/src/spatial/core/io/osm/CMakeLists.txt +++ b/src/spatial/modules/shapefile/CMakeLists.txt @@ -1,5 +1,5 @@ set(EXTENSION_SOURCES ${EXTENSION_SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/st_read_osm.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/shapefile_module.cpp PARENT_SCOPE ) \ No newline at end of file diff --git a/src/spatial/modules/shapefile/README.md b/src/spatial/modules/shapefile/README.md new file mode 100644 index 00000000..f7fccd41 --- /dev/null +++ b/src/spatial/modules/shapefile/README.md @@ -0,0 +1,11 @@ + +# Shapefile Module + +## TODO: + +- [ ] Handle Z/M values +- [ ] Cleanup shapefile_meta +- [ ] Test large files +- [ ] Add multifilereader support +- [ ] Add COPY TO support (export) +- [ ] Add COPY FROM support (import) \ No newline at end of file diff --git a/src/spatial/modules/shapefile/shapefile_module.cpp b/src/spatial/modules/shapefile/shapefile_module.cpp new file mode 100644 index 00000000..fb9e9052 --- /dev/null +++ b/src/spatial/modules/shapefile/shapefile_module.cpp @@ -0,0 +1,1085 @@ +#include "spatial/modules/shapefile/shapefile_module.hpp" +#include "spatial/geometry/geometry_serialization.hpp" +#include "spatial/geometry/sgl.hpp" +#include "spatial/spatial_types.hpp" + +#include "duckdb/common/multi_file_reader.hpp" +#include "duckdb/function/replacement_scan.hpp" +#include "duckdb/main/extension_util.hpp" +#include "duckdb/parser/expression/constant_expression.hpp" +#include "duckdb/parser/expression/function_expression.hpp" +#include "duckdb/parser/tableref/table_function_ref.hpp" + +#include "utf8proc_wrapper.hpp" + +#include "shapefil.h" + +void SASetupDefaultHooks(SAHooks *hooks) { + // Should never be called, use OpenLL and pass in the hooks + throw duckdb::InternalException("SASetupDefaultHooks"); +} + +namespace duckdb { + +namespace { + +//###################################################################################################################### +// Shapefile Utilities and Wrappers +//###################################################################################################################### + +struct SHPHandleDeleter { + void operator()(SHPInfo *info) const { + if (info) { + SHPClose(info); + } + } +}; +using SHPHandlePtr = unique_ptr; + +struct DBFHandleDeleter { + void operator()(DBFInfo *info) const { + if (info) { + DBFClose(info); + } + } +}; +using DBFHandlePtr = unique_ptr; + +struct SHPObjectDeleter { + void operator()(SHPObject *obj) const { + if (obj) { + SHPDestroyObject(obj); + } + } +}; +using SHPObjectPtr = unique_ptr; + +enum class AttributeEncoding { + UTF8, + LATIN1, + BLOB, +}; + +// TODO: DuckDB can do this natively now. We dont need this. +struct EncodingUtil { + static inline uint8_t GetUTF8ByteLength(data_t first_char) { + if (first_char < 0x80) + return 1; + if (!(first_char & 0x20)) + return 2; + if (!(first_char & 0x10)) + return 3; + if (!(first_char & 0x08)) + return 4; + if (!(first_char & 0x04)) + return 5; + return 6; + } + static inline data_t UTF8ToLatin1Char(const_data_ptr_t ptr) { + auto len = GetUTF8ByteLength(*ptr); + if (len == 1) { + return *ptr; + } + uint32_t res = static_cast(*ptr & (0xff >> (len + 1))) << ((len - 1) * 6); + while (--len) { + res |= (*(++ptr) - 0x80) << ((len - 1) * 6); + } + // TODO: Throw exception instead if character can't be encoded? + return res > 0xff ? '?' : static_cast(res); + } + + // Convert UTF-8 to ISO-8859-1 + // out must be at least the size of in + static void UTF8ToLatin1Buffer(const_data_ptr_t in, data_ptr_t out) { + while (*in) { + *out++ = UTF8ToLatin1Char(in); + } + *out = 0; + } + + // convert ISO-8859-1 to UTF-8 + // mind = blown + // out must be at least 2x the size of in + static idx_t LatinToUTF8Buffer(const_data_ptr_t in, data_ptr_t out) { + idx_t len = 0; + while (*in) { + if (*in < 128) { + *out++ = *in++; + len += 1; + } else { + *out++ = 0xc2 + (*in > 0xbf); + *out++ = (*in++ & 0x3f) + 0x80; + len += 2; + } + } + return len; + } +}; + +//====================================================================================================================== +// File System Hooks +//====================================================================================================================== +SAFile DuckDBShapefileOpen(void *userData, const char *filename, const char *access_mode) { + try { + auto &fs = *static_cast(userData); + constexpr auto flags = FileFlags::FILE_FLAGS_READ | FileFlags::FILE_FLAGS_NULL_IF_NOT_EXISTS; + auto file_handle = fs.OpenFile(filename, flags); + if (!file_handle) { + return nullptr; + } + return reinterpret_cast(file_handle.release()); + } catch (...) { + return nullptr; + } +} + +SAOffset DuckDBShapefileRead(void *p, SAOffset size, SAOffset nmemb, SAFile file) { + const auto handle = reinterpret_cast(file); + const auto read_bytes = handle->Read(p, size * nmemb); + return read_bytes / size; +} + +SAOffset DuckDBShapefileWrite(const void *p, SAOffset size, SAOffset nmemb, SAFile file) { + const auto handle = reinterpret_cast(file); + const auto written_bytes = handle->Write(const_cast(p), size * nmemb); + return written_bytes / size; +} + +SAOffset DuckDBShapefileSeek(SAFile file, SAOffset offset, int whence) { + const auto file_handle = reinterpret_cast(file); + switch (whence) { + case SEEK_SET: + file_handle->Seek(offset); + break; + case SEEK_CUR: + file_handle->Seek(file_handle->SeekPosition() + offset); + break; + case SEEK_END: + file_handle->Seek(file_handle->GetFileSize() + offset); + break; + default: + throw InternalException("Unknown seek type"); + } + return 0; +} + +SAOffset DuckDBShapefileTell(SAFile file) { + const auto handle = reinterpret_cast(file); + return handle->SeekPosition(); +} + +int DuckDBShapefileFlush(SAFile file) { + try { + const auto handle = reinterpret_cast(file); + handle->Sync(); + return 0; + } catch (...) { + return -1; + } +} + +int DuckDBShapefileClose(SAFile file) { + try { + const auto handle = reinterpret_cast(file); + handle->Close(); + delete handle; + return 0; + } catch (...) { + return -1; + } +} + +int DuckDBShapefileRemove(void *userData, const char *filename) { + try { + auto &fs = *reinterpret_cast(userData); + constexpr auto flags = FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_NULL_IF_NOT_EXISTS; + const auto file = fs.OpenFile(filename, flags); + if (!file) { + return -1; + } + const auto file_type = fs.GetFileType(*file); + if (file_type == FileType::FILE_TYPE_DIR) { + fs.RemoveDirectory(filename); + } else { + fs.RemoveFile(filename); + } + return 0; + } catch (...) { + return -1; + } +} + +void DuckDBShapefileError(const char *message) { + // TODO: Fix this? + // We cant throw an exception here because the shapefile library is not + // exception safe. Instead we should store it somewhere... + // Maybe another client context cache? + + // Note that we need to copy the message + + fprintf(stderr, "%s\n", message); +} + +SAHooks GetDuckDBHooks(FileSystem &fs) { + SAHooks hooks; + hooks.FOpen = DuckDBShapefileOpen; + hooks.FRead = DuckDBShapefileRead; + hooks.FWrite = DuckDBShapefileWrite; + hooks.FSeek = DuckDBShapefileSeek; + hooks.FTell = DuckDBShapefileTell; + hooks.FFlush = DuckDBShapefileFlush; + hooks.FClose = DuckDBShapefileClose; + hooks.Remove = DuckDBShapefileRemove; + + hooks.Error = DuckDBShapefileError; + hooks.Atof = std::atof; + hooks.userData = &fs; + return hooks; +} + +DBFHandlePtr OpenDBFFile(FileSystem &fs, const string &filename) { + const auto hooks = GetDuckDBHooks(fs); + const auto handle = DBFOpenLL(filename.c_str(), "rb", &hooks); + + if (!handle) { + throw IOException("Failed to open DBF file %s", filename.c_str()); + } + + return DBFHandlePtr(handle); +} + +SHPHandlePtr OpenSHPFile(FileSystem &fs, const string &filename) { + const auto hooks = GetDuckDBHooks(fs); + const auto handle = SHPOpenLL(filename.c_str(), "rb", &hooks); + if (!handle) { + throw IOException("Failed to open SHP file %s", filename); + } + return SHPHandlePtr(handle); +} + +//###################################################################################################################### +// Table Functions +//###################################################################################################################### + +//====================================================================================================================== +// ST_ReadSHP +//====================================================================================================================== +// +// TODO: This does not handle Z and M values +// TODO: also double check error reporting +// +struct ST_ReadSHP { + + //------------------------------------------------------------------------------------------------------------------ + // Bind + //------------------------------------------------------------------------------------------------------------------ + struct ShapefileBindData final : TableFunctionData { + string file_name; + int shape_count; + int shape_type; + double min_bound[4]; + double max_bound[4]; + AttributeEncoding attribute_encoding; + vector attribute_types; + + explicit ShapefileBindData(string file_name_p) + : file_name(std::move(file_name_p)), shape_count(0), + shape_type(0), min_bound {0, 0, 0, 0}, max_bound {0, 0, 0, 0}, + attribute_encoding(AttributeEncoding::LATIN1) { + } + }; + + static unique_ptr Bind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + + auto file_name = StringValue::Get(input.inputs[0]); + auto result = make_uniq(file_name); + + auto &fs = FileSystem::GetFileSystem(context); + auto shp_handle = OpenSHPFile(fs, file_name); + + // Get info about the geometry + SHPGetInfo(shp_handle.get(), &result->shape_count, &result->shape_type, result->min_bound, result->max_bound); + + // Ensure we have a supported shape type + auto valid_types = {SHPT_NULL, SHPT_POINT, SHPT_ARC, SHPT_POLYGON, SHPT_MULTIPOINT}; + bool is_valid_type = false; + for (auto type : valid_types) { + if (result->shape_type == type) { + is_valid_type = true; + break; + } + } + if (!is_valid_type) { + throw InvalidInputException("Invalid shape type %d", result->shape_type); + } + + auto base_name = file_name.substr(0, file_name.find_last_of('.')); + + // A standards compliant shapefile should use ISO-8859-1 encoding for attributes, but it can be overridden + // by a .cpg file. So check if there is a .cpg file, if so use that to determine the encoding + auto cpg_file = base_name + ".cpg"; + if (fs.FileExists(cpg_file)) { + auto cpg_handle = fs.OpenFile(cpg_file, FileFlags::FILE_FLAGS_READ); + auto cpg_type = StringUtil::Lower(cpg_handle->ReadLine()); + if (cpg_type == "utf-8") { + result->attribute_encoding = AttributeEncoding::UTF8; + } else if (cpg_type == "iso-8859-1") { + result->attribute_encoding = AttributeEncoding::LATIN1; + } else { + // Otherwise, parse as blob + result->attribute_encoding = AttributeEncoding::BLOB; + } + } + + for (auto &kv : input.named_parameters) { + if (kv.first == "encoding") { + auto encoding = StringUtil::Lower(StringValue::Get(kv.second)); + if (encoding == "utf-8") { + result->attribute_encoding = AttributeEncoding::UTF8; + } else if (encoding == "iso-8859-1") { + result->attribute_encoding = AttributeEncoding::LATIN1; + } else if (encoding == "blob") { + // Otherwise, parse as blob + result->attribute_encoding = AttributeEncoding::BLOB; + } else { + vector candidates = {"utf-8", "iso-8859-1", "blob"}; + auto msg = StringUtil::CandidatesErrorMessage(candidates, encoding, "encoding"); + throw InvalidInputException("Invalid encoding %s", encoding.c_str()); + } + } + if (kv.first == "spatial_filter_box") { + auto filter_box = StructValue::GetChildren(kv.second); + } + } + + // Get info about the attributes + // Remove file extension and replace with .dbf + auto dbf_handle = OpenDBFFile(fs, base_name + ".dbf"); + + // TODO: Try to get the encoding from the dbf if there is no .cpg file + // auto code_page = DBFGetCodePage(dbf_handle.get()); + // if(!has_cpg_file && code_page != 0) { } + + // Then return the attributes + auto field_count = DBFGetFieldCount(dbf_handle.get()); + char field_name[12]; // Max field name length is 11 + null terminator + int field_width = 0; + int field_precision = 0; + memset(field_name, 0, sizeof(field_name)); + + for (int i = 0; i < field_count; i++) { + auto field_type = DBFGetFieldInfo(dbf_handle.get(), i, field_name, &field_width, &field_precision); + + LogicalType type; + switch (field_type) { + case FTString: + type = result->attribute_encoding == AttributeEncoding::BLOB ? LogicalType::BLOB : LogicalType::VARCHAR; + break; + case FTInteger: + type = LogicalType::INTEGER; + break; + case FTDouble: + if (field_precision == 0 && field_width < 19) { + type = LogicalType::BIGINT; + } else { + type = LogicalType::DOUBLE; + } + break; + case FTDate: + // Dates are stored as 8-char strings + // YYYYMMDD + type = LogicalType::DATE; + break; + case FTLogical: + type = LogicalType::BOOLEAN; + break; + default: + throw InvalidInputException("DBF field type %d not supported", field_type); + } + names.emplace_back(field_name); + return_types.push_back(type); + result->attribute_types.push_back(type); + } + + // Always return geometry last + return_types.push_back(GeoTypes::GEOMETRY()); + names.push_back("geom"); + + // Deduplicate field names if necessary + for (size_t i = 0; i < names.size(); i++) { + idx_t count = 1; + for (size_t j = i + 1; j < names.size(); j++) { + if (names[i] == names[j]) { + names[j] += "_" + std::to_string(count++); + } + } + } + + return std::move(result); + } + + //------------------------------------------------------------------------------------------------------------------ + // Init Global + //------------------------------------------------------------------------------------------------------------------ + struct ShapefileGlobalState final : GlobalTableFunctionState { + int shape_idx; + SHPHandlePtr shp_handle; + DBFHandlePtr dbf_handle; + ArenaAllocator arena; + vector column_ids; + + explicit ShapefileGlobalState(ClientContext &context, const string &file_name, vector column_ids_p) + : shape_idx(0), arena(BufferAllocator::Get(context)), column_ids(std::move(column_ids_p)) { + auto &fs = FileSystem::GetFileSystem(context); + + shp_handle = OpenSHPFile(fs, file_name); + + // Remove file extension and replace with .dbf + auto dot_idx = file_name.find_last_of('.'); + auto base_name = file_name.substr(0, dot_idx); + dbf_handle = OpenDBFFile(fs, base_name + ".dbf"); + } + }; + + static unique_ptr InitGlobal(ClientContext &context, TableFunctionInitInput &input) { + auto &bind_data = input.bind_data->Cast(); + auto result = make_uniq(context, bind_data.file_name, input.column_ids); + return std::move(result); + } + + //------------------------------------------------------------------------------------------------------------------ + // Geometry Conversion + //------------------------------------------------------------------------------------------------------------------ + struct ConvertPoint { + static sgl::geometry Convert(const SHPObjectPtr &shape, ArenaAllocator &arena) { + + // Create a point + auto point = sgl::point::make_empty(); + + // Allocate memory for the vertex + const auto vertex_mem = arena.AllocateAligned(sizeof(double) * 2); + const auto vertex_ptr = reinterpret_cast(vertex_mem); + + // Set the vertex data + vertex_ptr[0] = shape->padfX[0]; + vertex_ptr[1] = shape->padfY[0]; + + point.set_vertex_data(vertex_mem, 1); + + // Return the point + return point; + } + }; + + struct ConvertLineString { + static sgl::geometry Convert(const SHPObjectPtr &shape, ArenaAllocator &arena) { + if (shape->nParts == 1) { + // Create a line + auto line = sgl::linestring::make_empty(); + + // Allocate memory for the vertices + const auto vertex_mem = arena.AllocateAligned(sizeof(double) * 2 * shape->nVertices); + const auto vertex_ptr = reinterpret_cast(vertex_mem); + + // Set the vertex data + for (int i = 0; i < shape->nVertices; i++) { + vertex_ptr[i * 2] = shape->padfX[i]; + vertex_ptr[i * 2 + 1] = shape->padfY[i]; + } + line.set_vertex_data(vertex_mem, shape->nVertices); + + // Return the line + return line; + } + + // Else, create a multi-line + auto mline = sgl::multi_linestring::make_empty(); + + auto start = shape->panPartStart[0]; + for (int i = 0; i < shape->nParts; i++) { + const auto end = i == shape->nParts - 1 ? shape->nVertices : shape->panPartStart[i + 1]; + const auto line_size = end - start; + + // Allocate a new line + const auto line_mem = arena.AllocateAligned(sizeof(sgl::geometry)); + const auto line_ptr = new (line_mem) sgl::geometry(sgl::geometry_type::LINESTRING); + + // Allocate memory for the vertices + const auto vertex_mem = arena.AllocateAligned(sizeof(double) * 2 * line_size); + const auto vertex_ptr = reinterpret_cast(vertex_mem); + + for (int j = 0; j < line_size; j++) { + const auto offset = start + j; + + vertex_ptr[j * 2] = shape->padfX[offset]; + vertex_ptr[j * 2 + 1] = shape->padfY[offset]; + } + + // Set the vertex data and append to the multi-line + line_ptr->set_vertex_data(vertex_mem, line_size); + mline.append_part(line_ptr); + + start = end; + } + + return mline; + } + }; + + struct ConvertPolygon { + static sgl::geometry Convert(const SHPObjectPtr &shape, ArenaAllocator &arena) { + // First off, check if there are more than one polygon. + // Each polygon is identified by a part with clockwise winding order + // we calculate the winding order by checking the sign of the area + vector polygon_part_starts; + for (int i = 0; i < shape->nParts; i++) { + const auto start = shape->panPartStart[i]; + const auto end = i == shape->nParts - 1 ? shape->nVertices : shape->panPartStart[i + 1]; + double area = 0; + for (int j = start; j < end - 1; j++) { + area += (shape->padfX[j] * shape->padfY[j + 1]) - (shape->padfX[j + 1] * shape->padfY[j]); + } + if (area < 0) { + polygon_part_starts.push_back(i); + } + } + if (polygon_part_starts.size() < 2) { + // Single polygon, every part is an interior ring + // Even if the polygon is counter-clockwise (which should not happen for shapefiles). + // we still fall back and convert it to a single polygon. + auto poly = sgl::polygon::make_empty(); + + auto start = shape->panPartStart[0]; + for (int i = 0; i < shape->nParts; i++) { + const auto end = i == shape->nParts - 1 ? shape->nVertices : shape->panPartStart[i + 1]; + + const auto ring_size = end - start; + const auto ring_mem = arena.AllocateAligned(sizeof(sgl::geometry)); + const auto ring = new (ring_mem) sgl::geometry(sgl::geometry_type::LINESTRING); + + const auto vertex_mem = arena.AllocateAligned(sizeof(double) * 2 * ring_size); + const auto vertex_ptr = reinterpret_cast(vertex_mem); + + for (int j = 0; j < ring_size; j++) { + const auto offset = start + j; + vertex_ptr[j * 2] = shape->padfX[offset]; + vertex_ptr[j * 2 + 1] = shape->padfY[offset]; + } + + ring->set_vertex_data(vertex_mem, ring_size); + poly.append_part(ring); + + start = end; + } + + return poly; + } + + // Else, MultiPolygon + auto mpoly = sgl::multi_polygon::make_empty(); + for (size_t polygon_idx = 0; polygon_idx < polygon_part_starts.size(); polygon_idx++) { + const auto part_start = polygon_part_starts[polygon_idx]; + const auto part_end = polygon_idx == polygon_part_starts.size() - 1 + ? shape->nParts + : polygon_part_starts[polygon_idx + 1]; + + const auto poly_mem = arena.AllocateAligned(sizeof(sgl::geometry)); + const auto poly_ptr = new (poly_mem) sgl::geometry(sgl::geometry_type::POLYGON); + + for (auto ring_idx = part_start; ring_idx < part_end; ring_idx++) { + const auto start = shape->panPartStart[ring_idx]; + const auto end = + ring_idx == shape->nParts - 1 ? shape->nVertices : shape->panPartStart[ring_idx + 1]; + const auto ring_size = end - start; + + const auto ring_mem = arena.AllocateAligned(sizeof(sgl::geometry)); + const auto ring_ptr = new (ring_mem) sgl::geometry(sgl::geometry_type::LINESTRING); + + const auto vertex_mem = arena.AllocateAligned(sizeof(double) * 2 * ring_size); + const auto vertex_ptr = reinterpret_cast(vertex_mem); + + for (int j = 0; j < ring_size; j++) { + const auto offset = start + j; + vertex_ptr[j * 2] = shape->padfX[offset]; + vertex_ptr[j * 2 + 1] = shape->padfY[offset]; + } + + ring_ptr->set_vertex_data(vertex_mem, ring_size); + poly_ptr->append_part(ring_ptr); + } + + mpoly.append_part(poly_ptr); + } + + return mpoly; + } + }; + + struct ConvertMultiPoint { + static sgl::geometry Convert(const SHPObjectPtr &shape, ArenaAllocator &arena) { + auto mpoint = sgl::multi_point::make_empty(); + + for (int i = 0; i < shape->nVertices; i++) { + const auto point_mem = arena.AllocateAligned(sizeof(sgl::geometry)); + const auto point_ptr = new (point_mem) sgl::geometry(sgl::geometry_type::POINT); + + const auto vertex_mem = arena.AllocateAligned(sizeof(double) * 2); + const auto vertex_ptr = reinterpret_cast(vertex_mem); + + vertex_ptr[0] = shape->padfX[i]; + vertex_ptr[1] = shape->padfY[i]; + + point_ptr->set_vertex_data(vertex_mem, 1); + mpoint.append_part(point_ptr); + } + + return mpoint; + } + }; + + template + static void ConvertGeomLoop(Vector &result, int record_start, idx_t count, SHPHandle &shp_handle, + ArenaAllocator &arena) { + for (idx_t result_idx = 0; result_idx < count; result_idx++) { + auto shape = SHPObjectPtr(SHPReadObject(shp_handle, record_start++)); + if (shape->nSHPType == SHPT_NULL) { + FlatVector::SetNull(result, result_idx, true); + continue; + } + + // TODO: Handle Z and M + auto geom = OP::Convert(shape, arena); + + // Serialize into a blob + const auto size = Serde::GetRequiredSize(geom); + auto blob = StringVector::EmptyString(result, size); + Serde::Serialize(geom, blob.GetDataWriteable(), size); + blob.Finalize(); + + // Set the blob in the result vector + FlatVector::GetData(result)[result_idx] = blob; + } + } + + static void ConvertGeometryVector(Vector &result, int record_start, idx_t count, SHPHandle shp_handle, + ArenaAllocator &arena, int geom_type) { + switch (geom_type) { + case SHPT_NULL: + FlatVector::Validity(result).SetAllInvalid(count); + break; + case SHPT_POINT: + ConvertGeomLoop(result, record_start, count, shp_handle, arena); + break; + case SHPT_ARC: + ConvertGeomLoop(result, record_start, count, shp_handle, arena); + break; + case SHPT_POLYGON: + ConvertGeomLoop(result, record_start, count, shp_handle, arena); + break; + case SHPT_MULTIPOINT: + ConvertGeomLoop(result, record_start, count, shp_handle, arena); + break; + default: + throw InvalidInputException("Shape type %d not supported", geom_type); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Attribute Conversion + //------------------------------------------------------------------------------------------------------------------ + + struct ConvertBlobAttribute { + using TYPE = string_t; + static string_t Convert(Vector &result, DBFHandle dbf_handle, int record_idx, int field_idx) { + auto value = DBFReadStringAttribute(dbf_handle, record_idx, field_idx); + return StringVector::AddString(result, const_char_ptr_cast(value)); + } + }; + + struct ConvertIntegerAttribute { + using TYPE = int32_t; + static int32_t Convert(Vector &, DBFHandle dbf_handle, int record_idx, int field_idx) { + return DBFReadIntegerAttribute(dbf_handle, record_idx, field_idx); + } + }; + + struct ConvertBigIntAttribute { + using TYPE = int64_t; + static int64_t Convert(Vector &, DBFHandle dbf_handle, int record_idx, int field_idx) { + return static_cast(DBFReadDoubleAttribute(dbf_handle, record_idx, field_idx)); + } + }; + + struct ConvertDoubleAttribute { + using TYPE = double; + static double Convert(Vector &, DBFHandle dbf_handle, int record_idx, int field_idx) { + return DBFReadDoubleAttribute(dbf_handle, record_idx, field_idx); + } + }; + + struct ConvertDateAttribute { + using TYPE = date_t; + static date_t Convert(Vector &, DBFHandle dbf_handle, int record_idx, int field_idx) { + // XBase stores dates as 8-char strings (without separators) + // but DuckDB expects a date string with separators. + auto value = DBFReadStringAttribute(dbf_handle, record_idx, field_idx); + char date_with_separator[11]; + memcpy(date_with_separator, value, 4); + date_with_separator[4] = '-'; + memcpy(date_with_separator + 5, value + 4, 2); + date_with_separator[7] = '-'; + memcpy(date_with_separator + 8, value + 6, 2); + date_with_separator[10] = '\0'; + return Date::FromString(date_with_separator); + } + }; + + struct ConvertBooleanAttribute { + using TYPE = bool; + static bool Convert(Vector &result, DBFHandle dbf_handle, int record_idx, int field_idx) { + return *DBFReadLogicalAttribute(dbf_handle, record_idx, field_idx) == 'T'; + } + }; + + template + static void ConvertAttributeLoop(Vector &result, int record_start, idx_t count, DBFHandle dbf_handle, + int field_idx) { + int record_idx = record_start; + for (idx_t row_idx = 0; row_idx < count; row_idx++) { + if (DBFIsAttributeNULL(dbf_handle, record_idx, field_idx)) { + FlatVector::SetNull(result, row_idx, true); + } else { + FlatVector::GetData(result)[row_idx] = + OP::Convert(result, dbf_handle, record_idx, field_idx); + } + record_idx++; + } + } + + static void ConvertStringAttributeLoop(Vector &result, int record_start, idx_t count, DBFHandle dbf_handle, + int field_idx, AttributeEncoding attribute_encoding) { + int record_idx = record_start; + vector conversion_buffer; + for (idx_t row_idx = 0; row_idx < count; row_idx++) { + if (DBFIsAttributeNULL(dbf_handle, record_idx, field_idx)) { + FlatVector::SetNull(result, row_idx, true); + } else { + auto string_bytes = DBFReadStringAttribute(dbf_handle, record_idx, field_idx); + string_t result_str = {}; + if (attribute_encoding == AttributeEncoding::LATIN1) { + conversion_buffer.resize(strlen(string_bytes) * 2 + 1); // worst case (all non-ascii chars) + auto out_len = + EncodingUtil::LatinToUTF8Buffer(const_data_ptr_cast(string_bytes), conversion_buffer.data()); + result_str = + StringVector::AddString(result, const_char_ptr_cast(conversion_buffer.data()), out_len); + } else { + result_str = StringVector::AddString(result, const_char_ptr_cast(string_bytes)); + } + if (!Utf8Proc::IsValid(result_str.GetDataUnsafe(), result_str.GetSize())) { + throw InvalidInputException("Could not decode VARCHAR field as valid UTF-8, try passing " + "encoding='blob' to skip decoding of string attributes"); + } + FlatVector::GetData(result)[row_idx] = result_str; + } + record_idx++; + } + } + + static void ConvertAttributeVector(Vector &result, int record_start, idx_t count, DBFHandle dbf_handle, + int field_idx, AttributeEncoding attribute_encoding) { + switch (result.GetType().id()) { + case LogicalTypeId::BLOB: + ConvertAttributeLoop(result, record_start, count, dbf_handle, field_idx); + break; + case LogicalTypeId::VARCHAR: + ConvertStringAttributeLoop(result, record_start, count, dbf_handle, field_idx, attribute_encoding); + break; + case LogicalTypeId::INTEGER: + ConvertAttributeLoop(result, record_start, count, dbf_handle, field_idx); + break; + case LogicalTypeId::BIGINT: + ConvertAttributeLoop(result, record_start, count, dbf_handle, field_idx); + break; + case LogicalTypeId::DOUBLE: + ConvertAttributeLoop(result, record_start, count, dbf_handle, field_idx); + break; + case LogicalTypeId::DATE: + ConvertAttributeLoop(result, record_start, count, dbf_handle, field_idx); + break; + case LogicalTypeId::BOOLEAN: + ConvertAttributeLoop(result, record_start, count, dbf_handle, field_idx); + break; + default: + throw InvalidInputException("Attribute type %s not supported", result.GetType().ToString()); + } + } + + //------------------------------------------------------------------------------------------------------------------ + // Execute + //------------------------------------------------------------------------------------------------------------------ + + static void Execute(ClientContext &context, TableFunctionInput &input, DataChunk &output) { + auto &bind_data = input.bind_data->Cast(); + auto &gstate = input.global_state->Cast(); + + // Reset the buffer allocator + gstate.arena.Reset(); + + // Calculate how many record we can fit in the output + const auto output_size = std::min(STANDARD_VECTOR_SIZE, bind_data.shape_count - gstate.shape_idx); + const auto record_start = gstate.shape_idx; + for (idx_t col_idx = 0; col_idx < output.ColumnCount(); col_idx++) { + + // Projected column indices + const auto projected_col_idx = gstate.column_ids[col_idx]; + + auto &col_vec = output.data[col_idx]; + if (col_vec.GetType() == GeoTypes::GEOMETRY()) { + ConvertGeometryVector(col_vec, record_start, output_size, gstate.shp_handle.get(), gstate.arena, + bind_data.shape_type); + } else { + // The geometry is always last, so we can use the projected column index directly + const auto field_idx = static_cast(projected_col_idx); + ConvertAttributeVector(col_vec, record_start, output_size, gstate.dbf_handle.get(), field_idx, + bind_data.attribute_encoding); + } + } + // Update the shape index + gstate.shape_idx += output_size; + + // Set the cardinality of the output + output.SetCardinality(output_size); + } + + //------------------------------------------------------------------------------------------------------------------ + // Progress, Cardinality and Replacement Scans + //------------------------------------------------------------------------------------------------------------------ + + static double GetProgress(ClientContext &context, const FunctionData *bind_data_p, + const GlobalTableFunctionState *global_state) { + + auto &gstate = global_state->Cast(); + auto &bind_data = bind_data_p->Cast(); + + return static_cast(gstate.shape_idx) / static_cast(bind_data.shape_count); + } + + static unique_ptr GetCardinality(ClientContext &context, const FunctionData *data) { + auto &bind_data = data->Cast(); + auto result = make_uniq(); + + // This is the maximum number of shapes in a single file + result->has_max_cardinality = true; + result->max_cardinality = bind_data.shape_count; + + return result; + } + + static unique_ptr GetReplacementScan(ClientContext &context, ReplacementScanInput &input, + optional_ptr data) { + auto &table_name = input.table_name; + // Check if the table name ends with .shp + if (!StringUtil::EndsWith(StringUtil::Lower(table_name), ".shp")) { + return nullptr; + } + + auto table_function = make_uniq(); + vector> children; + children.push_back(make_uniq(Value(table_name))); + table_function->function = make_uniq("ST_ReadSHP", std::move(children)); + return std::move(table_function); + } + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(DatabaseInstance &db) { + TableFunction read_func("ST_ReadSHP", {LogicalType::VARCHAR}, Execute, Bind, InitGlobal); + + read_func.named_parameters["encoding"] = LogicalType::VARCHAR; + read_func.table_scan_progress = GetProgress; + read_func.cardinality = GetCardinality; + read_func.projection_pushdown = true; + ExtensionUtil::RegisterFunction(db, read_func); + + // Replacement scan + auto &config = DBConfig::GetConfig(db); + config.replacement_scans.emplace_back(GetReplacementScan); + } +}; + +//====================================================================================================================== +// shapefile_meta +//====================================================================================================================== +// +// TODO: This is a bit messy, rework +// + +struct ShapeTypeEntry { + int shp_type; + const char *shp_name; +}; + +constexpr ShapeTypeEntry shape_type_map[] = { + {SHPT_NULL, "NULL"}, + {SHPT_POINT, "POINT"}, + {SHPT_ARC, "LINESTRING"}, + {SHPT_POLYGON, "POLYGON"}, + {SHPT_MULTIPOINT, "MULTIPOINT"}, + {SHPT_POINTZ, "POINTZ"}, + {SHPT_ARCZ, "LINESTRINGZ"}, + {SHPT_POLYGONZ, "POLYGONZ"}, + {SHPT_MULTIPOINTZ, "MULTIPOINTZ"}, + {SHPT_POINTM, "POINTM"}, + {SHPT_ARCM, "LINESTRINGM"}, + {SHPT_POLYGONM, "POLYGONM"}, + {SHPT_MULTIPOINTM, "MULTIPOINTM"}, + {SHPT_MULTIPATCH, "MULTIPATCH"}, +}; + +struct Shapefile_Meta { + + struct ShapeFileMetaBindData final : TableFunctionData { + vector files; + }; + + static unique_ptr Bind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + + auto result = make_uniq(); + + auto multi_file_reader = MultiFileReader::Create(input.table_function); + auto file_list = multi_file_reader->CreateFileList(context, input.inputs[0], FileGlobOptions::ALLOW_EMPTY); + + for (auto &file : file_list->Files()) { + if (StringUtil::EndsWith(StringUtil::Lower(file), ".shp")) { + result->files.push_back(file); + } + } + + auto shape_type_count = sizeof(shape_type_map) / sizeof(ShapeTypeEntry); + auto varchar_vector = Vector(LogicalType::VARCHAR, shape_type_count); + auto varchar_data = FlatVector::GetData(varchar_vector); + for (idx_t i = 0; i < shape_type_count; i++) { + auto str = string_t(shape_type_map[i].shp_name); + varchar_data[i] = str.IsInlined() ? str : StringVector::AddString(varchar_vector, str); + } + auto shape_type_enum = LogicalType::ENUM("SHAPE_TYPE", varchar_vector, shape_type_count); + shape_type_enum.SetAlias("SHAPE_TYPE"); + + return_types.push_back(LogicalType::VARCHAR); + return_types.push_back(shape_type_enum); + return_types.push_back(GeoTypes::BOX_2D()); + return_types.push_back(LogicalType::INTEGER); + names.push_back("name"); + names.push_back("shape_type"); + names.push_back("bounds"); + names.push_back("count"); + return std::move(result); + } + + struct ShapeFileMetaGlobalState final : GlobalTableFunctionState { + ShapeFileMetaGlobalState() : current_file_idx(0) { + } + idx_t current_file_idx; + vector files; + }; + + static unique_ptr InitGlobal(ClientContext &context, TableFunctionInitInput &input) { + auto &bind_data = input.bind_data->Cast(); + auto result = make_uniq(); + + result->files = bind_data.files; + result->current_file_idx = 0; + + return std::move(result); + } + + static void Execute(ClientContext &context, TableFunctionInput &input, DataChunk &output) { + auto &bind_data = input.bind_data->Cast(); + auto &state = input.global_state->Cast(); + auto &fs = FileSystem::GetFileSystem(context); + + auto &file_name_vector = output.data[0]; + auto file_name_data = FlatVector::GetData(file_name_vector); + auto &shape_type_vector = output.data[1]; + auto shape_type_data = FlatVector::GetData(shape_type_vector); + auto &bounds_vector = output.data[2]; + auto &bounds_vector_children = StructVector::GetEntries(bounds_vector); + auto minx_data = FlatVector::GetData(*bounds_vector_children[0]); + auto miny_data = FlatVector::GetData(*bounds_vector_children[1]); + auto maxx_data = FlatVector::GetData(*bounds_vector_children[2]); + auto maxy_data = FlatVector::GetData(*bounds_vector_children[3]); + auto record_count_vector = output.data[3]; + auto record_count_data = FlatVector::GetData(record_count_vector); + + auto output_count = MinValue(STANDARD_VECTOR_SIZE, bind_data.files.size() - state.current_file_idx); + + for (idx_t out_idx = 0; out_idx < output_count; out_idx++) { + auto &file_name = bind_data.files[state.current_file_idx + out_idx]; + + auto file_handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ); + auto shp_handle = OpenSHPFile(fs, file_name.c_str()); + + double min_bound[4]; + double max_bound[4]; + int shape_type; + int record_count; + SHPGetInfo(shp_handle.get(), &record_count, &shape_type, min_bound, max_bound); + file_name_data[out_idx] = StringVector::AddString(file_name_vector, file_name); + shape_type_data[out_idx] = 0; + for (size_t shape_type_idx = 0; shape_type_idx < sizeof(shape_type_map) / sizeof(ShapeTypeEntry); + shape_type_idx++) { + if (shape_type_map[shape_type_idx].shp_type == shape_type) { + shape_type_data[out_idx] = shape_type_idx; + break; + } + } + minx_data[out_idx] = min_bound[0]; + miny_data[out_idx] = min_bound[1]; + maxx_data[out_idx] = max_bound[0]; + maxy_data[out_idx] = max_bound[1]; + record_count_data[out_idx] = record_count; + } + + state.current_file_idx += output_count; + output.SetCardinality(output_count); + } + + static double GetProgress(ClientContext &context, const FunctionData *bind_data, + const GlobalTableFunctionState *gstate) { + auto &state = gstate->Cast(); + return static_cast(state.current_file_idx) / static_cast(state.files.size()); + } + + static unique_ptr GetCardinality(ClientContext &context, const FunctionData *bind_data_p) { + auto &bind_data = bind_data_p->Cast(); + auto result = make_uniq(); + result->has_max_cardinality = true; + result->max_cardinality = bind_data.files.size(); + result->has_estimated_cardinality = true; + result->estimated_cardinality = bind_data.files.size(); + return result; + } + + static void Register(DatabaseInstance &db) { + TableFunction meta_func("shapefile_meta", {LogicalType::VARCHAR}, Execute, Bind, InitGlobal); + meta_func.table_scan_progress = GetProgress; + meta_func.cardinality = GetCardinality; + ExtensionUtil::RegisterFunction(db, MultiFileReader::CreateFunctionSet(meta_func)); + } +}; + +} // namespace + +//###################################################################################################################### +// Module Registration +//###################################################################################################################### + +void RegisterShapefileModule(DatabaseInstance &db) { + ST_ReadSHP::Register(db); + Shapefile_Meta::Register(db); +} + +} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/modules/shapefile/shapefile_module.hpp b/src/spatial/modules/shapefile/shapefile_module.hpp new file mode 100644 index 00000000..36b96d04 --- /dev/null +++ b/src/spatial/modules/shapefile/shapefile_module.hpp @@ -0,0 +1,9 @@ +#pragma once + +namespace duckdb { + +class DatabaseInstance; + +void RegisterShapefileModule(DatabaseInstance &db); + +} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/spatial_extension.cpp b/src/spatial/spatial_extension.cpp new file mode 100644 index 00000000..3cd2a3fd --- /dev/null +++ b/src/spatial/spatial_extension.cpp @@ -0,0 +1,68 @@ +#define DUCKDB_EXTENSION_MAIN + +#include "spatial/spatial_extension.hpp" + +#include "duckdb.hpp" +#include "index/rtree/rtree.hpp" +#include "spatial/index/rtree/rtree_module.hpp" +#include "spatial/modules/gdal/gdal_module.hpp" +#include "spatial/modules/geos/geos_module.hpp" +#include "spatial/modules/main/spatial_functions.hpp" +#include "spatial/modules/osm/osm_module.hpp" +#include "spatial/modules/proj/proj_module.hpp" +#include "spatial/modules/shapefile/shapefile_module.hpp" +#include "spatial/spatial_optimizers.hpp" +#include "spatial/spatial_types.hpp" +#include "spatial/spatial_geoarrow.hpp" + +namespace duckdb { + +static void LoadInternal(DatabaseInstance &instance) { + + // Register the types + GeoTypes::Register(instance); + + RegisterSpatialCastFunctions(instance); + RegisterSpatialScalarFunctions(instance); + RegisterSpatialAggregateFunctions(instance); + RegisterSpatialTableFunctions(instance); + RegisterSpatialOptimizers(instance); + GeoArrow::Register(instance); + + RegisterProjModule(instance); + RegisterGDALModule(instance); + RegisterGEOSModule(instance); + RegisterOSMModule(instance); + RegisterShapefileModule(instance); + + RTreeModule::RegisterIndex(instance); + RTreeModule::RegisterIndexPragmas(instance); + RTreeModule::RegisterIndexScan(instance); + RTreeModule::RegisterIndexPlanCreate(instance); + RTreeModule::RegisterIndexPlanScan(instance); +} + +void SpatialExtension::Load(DuckDB &db) { + LoadInternal(*db.instance); +} + +std::string SpatialExtension::Name() { + return "spatial"; +} + +} // namespace duckdb + +extern "C" { + +DUCKDB_EXTENSION_API void spatial_init(duckdb::DatabaseInstance &db) { + LoadInternal(db); +} + +DUCKDB_EXTENSION_API const char *spatial_version() { + return duckdb::DuckDB::LibraryVersion(); +} +} + +#ifndef DUCKDB_EXTENSION_MAIN +#error DUCKDB_EXTENSION_MAIN not defined +#endif diff --git a/spatial/include/spatial_extension.hpp b/src/spatial/spatial_extension.hpp similarity index 75% rename from spatial/include/spatial_extension.hpp rename to src/spatial/spatial_extension.hpp index 620377b5..982bd0bd 100644 --- a/spatial/include/spatial_extension.hpp +++ b/src/spatial/spatial_extension.hpp @@ -4,7 +4,7 @@ namespace duckdb { -class SpatialExtension : public Extension { +class SpatialExtension final : public Extension { public: void Load(DuckDB &db) override; std::string Name() override; diff --git a/spatial/src/spatial/core/geoarrow.cpp b/src/spatial/spatial_geoarrow.cpp similarity index 64% rename from spatial/src/spatial/core/geoarrow.cpp rename to src/spatial/spatial_geoarrow.cpp index e91a62c4..2bebaf78 100644 --- a/spatial/src/spatial/core/geoarrow.cpp +++ b/src/spatial/spatial_geoarrow.cpp @@ -1,17 +1,21 @@ - -#include "spatial/core/geoarrow.hpp" +#include "spatial/spatial_geoarrow.hpp" #include "duckdb/common/arrow/arrow_converter.hpp" #include "duckdb/common/arrow/schema_metadata.hpp" #include "duckdb/function/table/arrow/arrow_duck_schema.hpp" -#include "spatial/core/geometry/wkb_reader.hpp" -#include "spatial/core/geometry/wkb_writer.hpp" -#include "spatial/core/types.hpp" +#include "duckdb/function/table_function.hpp" +#include "duckdb/main/database.hpp" +#include "duckdb/main/extension_util.hpp" +#include "geometry/geometry_serialization.hpp" +#include "spatial/geometry/geometry_type.hpp" +#include "spatial/geometry/sgl.hpp" +#include "spatial/geometry/wkb_writer.hpp" +#include "spatial/spatial_types.hpp" #include "yyjson.h" -namespace spatial { +namespace duckdb { -namespace core { +namespace { struct GeoArrowWKB { static unique_ptr GetType(const ArrowSchema &schema, const ArrowSchemaMetadata &schema_metadata) { @@ -68,20 +72,51 @@ struct GeoArrowWKB { } static void ArrowToDuck(ClientContext &context, Vector &source, Vector &result, idx_t count) { - ArenaAllocator arena(BufferAllocator::Get(context)); - WKBReader reader(arena); - - UnaryExecutor::ExecuteWithNulls(source, result, count, - [&](string_t input, ValidityMask &mask, idx_t idx) { - auto geom = reader.Deserialize(input); - return Geometry::Serialize(geom, result); - }); + // Just use the default allocator, invoking the buffer manager on each call is a bit much. + ArenaAllocator arena(Allocator::Get(context)); + GeometryAllocator alloc(arena); + + constexpr auto MAX_STACK_DEPTH = 128; + uint32_t recursion_stack[MAX_STACK_DEPTH]; + + sgl::ops::wkb_reader reader = {}; + reader.copy_vertices = false; + reader.alloc = &alloc; + reader.allow_mixed_zm = true; + reader.nan_as_empty = true; + + reader.stack_buf = recursion_stack; + reader.stack_cap = MAX_STACK_DEPTH; + + UnaryExecutor::ExecuteWithNulls( + source, result, count, [&](const string_t &wkb, ValidityMask &mask, idx_t idx) { + reader.buf = wkb.GetDataUnsafe(); + reader.end = reader.buf + wkb.GetSize(); + + sgl::geometry geom(sgl::geometry_type::INVALID); + if (!sgl::ops::wkb_reader_try_parse(&reader, &geom)) { + const auto error = sgl::ops::wkb_reader_get_error_message(&reader); + throw InvalidInputException("Could not parse WKB input: %s", error); + } + + // We're a bit lenient and allow mixed ZM, but correct it here. + if (reader.has_mixed_zm) { + sgl::ops::force_zm(alloc, &geom, reader.has_any_z, reader.has_any_m, 0, 0); + } + + // Serialize the geometry to the result blob + const auto size = Serde::GetRequiredSize(geom); + auto blob = StringVector::EmptyString(result, size); + Serde::Serialize(geom, blob.GetDataWriteable(), size); + blob.Finalize(); + return blob; + }); } static void DuckToArrow(ClientContext &context, Vector &source, Vector &result, idx_t count) { WKBWriter writer; - UnaryExecutor::Execute(source, result, count, - [&](geometry_t input) { return writer.Write(input, result); }); + UnaryExecutor::Execute( + source, result, count, [&](const geometry_t &input) { return writer.Write(input, result); }); } }; @@ -92,17 +127,15 @@ void RegisterArrowExtensions(DBConfig &config) { GeoArrowWKB::DuckToArrow)}); } -class GeoArrowRegisterFunctionData : public TableFunctionData { +class GeoArrowRegisterFunctionData final : public TableFunctionData { public: GeoArrowRegisterFunctionData() : finished(false) { } bool finished {false}; }; -static inline duckdb::unique_ptr GeoArrowRegisterBind(ClientContext &context, - TableFunctionBindInput &input, - vector &return_types, - vector &names) { +unique_ptr GeoArrowRegisterBind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { names.push_back("registered"); return_types.push_back(LogicalType::BOOLEAN); return make_uniq(); @@ -126,11 +159,11 @@ void GeoArrowRegisterScan(ClientContext &context, TableFunctionInput &data_p, Da data.finished = true; } +} // namespace + void GeoArrow::Register(DatabaseInstance &db) { TableFunction register_func("register_geoarrow_extensions", {}, GeoArrowRegisterScan, GeoArrowRegisterBind); ExtensionUtil::RegisterFunction(db, register_func); } -} // namespace core - -} // namespace spatial +} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/spatial_geoarrow.hpp b/src/spatial/spatial_geoarrow.hpp new file mode 100644 index 00000000..b0069822 --- /dev/null +++ b/src/spatial/spatial_geoarrow.hpp @@ -0,0 +1,11 @@ +#pragma once + +namespace duckdb { + +class DatabaseInstance; + +struct GeoArrow { + static void Register(DatabaseInstance &db); +}; + +} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/spatial_optimizers.cpp b/src/spatial/spatial_optimizers.cpp new file mode 100644 index 00000000..deb0e950 --- /dev/null +++ b/src/spatial/spatial_optimizers.cpp @@ -0,0 +1,312 @@ +#include "spatial/spatial_types.hpp" + +#include "duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp" +#include "duckdb/execution/expression_executor.hpp" +#include "duckdb/optimizer/optimizer_extension.hpp" +#include "duckdb/planner/expression/bound_comparison_expression.hpp" +#include "duckdb/planner/expression/bound_conjunction_expression.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckdb/planner/logical_operator.hpp" +#include "duckdb/planner/operator/logical_any_join.hpp" +#include "duckdb/planner/operator/logical_comparison_join.hpp" +#include "duckdb/planner/operator/logical_filter.hpp" +#include "duckdb/planner/operator/logical_get.hpp" +#include "duckdb/planner/operator/logical_join.hpp" +#include "duckdb/main/connection.hpp" + +namespace duckdb { + +namespace { + +//---------------------------------------------------------------------------------------------------------------------- +// Range Join Spatial Predicate Rewriter +//---------------------------------------------------------------------------------------------------------------------- +// +// Rewrites joins on spatial predicates to range joins on their bounding boxes +// combined with a spatial predicate filter. This turns the joins from a +// blockwise-nested loop join into a inequality join + filter, which is much +// faster. +// +// All spatial predicates (except st_disjoint) imply an intersection of the +// bounding boxes of the two geometries. +// +// TODO: Rewrite this to use the expression/operator matchers + +class RangeJoinSpatialPredicateRewriter : public OptimizerExtension { +public: + RangeJoinSpatialPredicateRewriter() { + optimize_function = RangeJoinSpatialPredicateRewriter::Optimize; + } + + static void AddComparison(unique_ptr &join, unique_ptr left, + unique_ptr right, ExpressionType type) { + JoinCondition cmp; + cmp.comparison = type; + cmp.left = std::move(left); + cmp.right = std::move(right); + join->conditions.push_back(std::move(cmp)); + } + + static bool IsTableRefsDisjoint(unordered_set &left_table_indexes, unordered_set &right_table_indexes, + unordered_set &left_bindings, unordered_set &right_bindings) { + + // Check that all the left-side bindings reference the left-side tables of the join, + // as well as that all the right-side bindings reference the right-side tables of the join. + // and that the left and right side bindings are disjoint. + + for (auto &left_binding : left_bindings) { + if (right_bindings.find(left_binding) != right_bindings.end()) { + // The left side bindings reference the right side tables of the join. + return false; + } + // Also check that the left side bindings are on the left side of the join + if (left_table_indexes.find(left_binding) == left_table_indexes.end()) { + // The left side bindings are not on the left side of the join. + return false; + } + } + + for (auto &right_binding : right_bindings) { + if (left_bindings.find(right_binding) != left_bindings.end()) { + // The right side bindings reference the left side tables of the join. + return false; + } + // Also check that the right side bindings are on the right side of the join + if (right_table_indexes.find(right_binding) == right_table_indexes.end()) { + // The right side bindings are not on the right side of the join. + return false; + } + } + + return true; + } + + static void TryOptimize(ClientContext &context, OptimizerExtensionInfo *info, unique_ptr &plan) { + + auto &op = *plan; + + // Look for ANY_JOIN operators + if (op.type == LogicalOperatorType::LOGICAL_ANY_JOIN) { + auto &any_join = op.Cast(); + + // Check if the join condition is a spatial predicate and the join type is INNER + if (any_join.condition->type == ExpressionType::BOUND_FUNCTION && any_join.join_type == JoinType::INNER) { + auto bound_func_expr = any_join.condition->Copy(); + auto &bound_function = bound_func_expr->Cast(); + + // Note that we cant perform this optimization for st_disjoint as all comparisons have to be AND'd + case_insensitive_set_t predicates = {"st_equals", "st_intersects", "st_touches", "st_crosses", + "st_within", "st_contains", "st_overlaps", "st_covers", + "st_coveredby", "st_containsproperly"}; + + if (predicates.find(bound_function.function.name) == predicates.end()) { + return; + } + + if (bound_function.children.size() != 2) { + return; + } + + // It has to be on raw geometry types + if (bound_function.children[0]->return_type != GeoTypes::GEOMETRY() || + bound_function.children[1]->return_type != GeoTypes::GEOMETRY()) { + return; + } + + // Found a spatial predicate we can optimize + + // Convert this into a comparison join on st_xmin, st_xmax, st_ymin, st_ymax of the two input + // geometries + auto left_pred_expr = std::move(bound_function.children[0]); + auto right_pred_expr = std::move(bound_function.children[1]); + + // We need to place the left side of the predicate on the left side of the join + // and the right side of the predicate on the right side of the join + // So look at the table indexes of the left and right side of the predicate + unordered_set left_table_indexes; + LogicalJoin::GetTableReferences(*any_join.children[0], left_table_indexes); + + unordered_set right_table_indexes; + LogicalJoin::GetTableReferences(*any_join.children[1], right_table_indexes); + + unordered_set left_pred_bindings; + LogicalJoin::GetExpressionBindings(*left_pred_expr, left_pred_bindings); + + unordered_set right_pred_bindings; + LogicalJoin::GetExpressionBindings(*right_pred_expr, right_pred_bindings); + + // Check if we can optimize this join + // We need to make sure that the left and right side of the predicate are disjoint + // e.g. + // a JOIN b ON st_intersects(a.geom, b.geom) => OK + // a JOIN b ON st_intersects(b.geom, a.geom) => OK + // a JOIN b ON st_intersects(a.geom, st_union(a.geom, b.geom)) => NOT OK + auto can_split = IsTableRefsDisjoint(left_table_indexes, right_table_indexes, left_pred_bindings, + right_pred_bindings); + if (!can_split) { + // Try again with the left and right side of the predicate swapped + // We can safely swap because the intersection operation we encode with the comparison join + // is symmetric, so the order of the arguments wont matter in the "new" join condition we're + // about to create. + can_split = IsTableRefsDisjoint(left_table_indexes, right_table_indexes, right_pred_bindings, + left_pred_bindings); + if (!can_split) { + // We cant optimize this join + return; + } + // Swap the left and right side of the predicate + std::swap(left_pred_expr, right_pred_expr); + } + + // Lookup the st_xmin, st_xmax, st_ymin, st_ymax functions in the catalog + auto &catalog = Catalog::GetSystemCatalog(context); + + auto &extent_func_set = + catalog.GetEntry(context, CatalogType::SCALAR_FUNCTION_ENTRY, DEFAULT_SCHEMA, "st_extent_approx") + .Cast(); + + auto &xmin_func_set = + catalog.GetEntry(context, CatalogType::SCALAR_FUNCTION_ENTRY, DEFAULT_SCHEMA, "st_xmin") + .Cast(); + auto &xmax_func_set = + catalog.GetEntry(context, CatalogType::SCALAR_FUNCTION_ENTRY, DEFAULT_SCHEMA, "st_xmax") + .Cast(); + auto &ymin_func_set = + catalog.GetEntry(context, CatalogType::SCALAR_FUNCTION_ENTRY, DEFAULT_SCHEMA, "st_ymin") + .Cast(); + auto &ymax_func_set = + catalog.GetEntry(context, CatalogType::SCALAR_FUNCTION_ENTRY, DEFAULT_SCHEMA, "st_ymax") + .Cast(); + + auto &left_arg_type = left_pred_expr->return_type; + auto &right_arg_type = right_pred_expr->return_type; + + auto extent_func_left = extent_func_set.functions.GetFunctionByArguments(context, {left_arg_type}); + auto extent_func_right = extent_func_set.functions.GetFunctionByArguments(context, {right_arg_type}); + + auto xmin_func_left = + xmin_func_set.functions.GetFunctionByArguments(context, {extent_func_left.return_type}); + auto xmax_func_left = + xmax_func_set.functions.GetFunctionByArguments(context, {extent_func_left.return_type}); + auto ymin_func_left = + ymin_func_set.functions.GetFunctionByArguments(context, {extent_func_left.return_type}); + auto ymax_func_left = + ymax_func_set.functions.GetFunctionByArguments(context, {extent_func_left.return_type}); + + auto xmin_func_right = + xmin_func_set.functions.GetFunctionByArguments(context, {extent_func_right.return_type}); + auto xmax_func_right = + xmax_func_set.functions.GetFunctionByArguments(context, {extent_func_right.return_type}); + auto ymin_func_right = + ymin_func_set.functions.GetFunctionByArguments(context, {extent_func_right.return_type}); + auto ymax_func_right = + ymax_func_set.functions.GetFunctionByArguments(context, {extent_func_right.return_type}); + + // Create the new join condition + vector> left_extent_args; + left_extent_args.push_back(left_pred_expr->Copy()); + auto left_extent = make_uniq(GeoTypes::BOX_2D(), std::move(extent_func_left), + std::move(left_extent_args), nullptr); + + vector> right_extent_args; + right_extent_args.push_back(right_pred_expr->Copy()); + auto right_extent = make_uniq(GeoTypes::BOX_2D(), std::move(extent_func_right), + std::move(right_extent_args), nullptr); + + // Left + vector> left_xmin_args; + left_xmin_args.push_back(left_extent->Copy()); + auto a_x_min = make_uniq(LogicalType::DOUBLE, std::move(xmin_func_left), + std::move(left_xmin_args), nullptr); + + vector> left_xmax_args; + left_xmax_args.push_back(left_extent->Copy()); + auto a_x_max = make_uniq(LogicalType::DOUBLE, std::move(xmax_func_left), + std::move(left_xmax_args), nullptr); + + vector> left_ymin_args; + left_ymin_args.push_back(left_extent->Copy()); + auto a_y_min = make_uniq(LogicalType::DOUBLE, std::move(ymin_func_left), + std::move(left_ymin_args), nullptr); + + vector> left_ymax_args; + left_ymax_args.push_back(left_extent->Copy()); + auto a_y_max = make_uniq(LogicalType::DOUBLE, std::move(ymax_func_left), + std::move(left_ymax_args), nullptr); + + // Right + vector> right_xmin_args; + right_xmin_args.push_back(right_extent->Copy()); + auto b_x_min = make_uniq(LogicalType::DOUBLE, std::move(xmin_func_right), + std::move(right_xmin_args), nullptr); + + vector> right_xmax_args; + right_xmax_args.push_back(right_extent->Copy()); + auto b_x_max = make_uniq(LogicalType::DOUBLE, std::move(xmax_func_right), + std::move(right_xmax_args), nullptr); + + vector> right_ymin_args; + right_ymin_args.push_back(right_extent->Copy()); + auto b_y_min = make_uniq(LogicalType::DOUBLE, std::move(ymin_func_right), + std::move(right_ymin_args), nullptr); + + vector> right_ymax_args; + right_ymax_args.push_back(right_extent->Copy()); + auto b_y_max = make_uniq(LogicalType::DOUBLE, std::move(ymax_func_right), + std::move(right_ymax_args), nullptr); + + // Now create the new join operator + auto new_join = make_uniq(JoinType::INNER); + AddComparison(new_join, std::move(a_x_min), std::move(b_x_max), + ExpressionType::COMPARE_LESSTHANOREQUALTO); + AddComparison(new_join, std::move(a_x_max), std::move(b_x_min), + ExpressionType::COMPARE_GREATERTHANOREQUALTO); + AddComparison(new_join, std::move(a_y_min), std::move(b_y_max), + ExpressionType::COMPARE_LESSTHANOREQUALTO); + AddComparison(new_join, std::move(a_y_max), std::move(b_y_min), + ExpressionType::COMPARE_GREATERTHANOREQUALTO); + + new_join->children = std::move(any_join.children); + if (any_join.has_estimated_cardinality) { + new_join->estimated_cardinality = any_join.estimated_cardinality; + new_join->has_estimated_cardinality = true; + } + + auto filter = make_uniq(std::move(any_join.condition)); + filter->children.push_back(std::move(new_join)); + + plan = std::move(filter); + } + } + } + + static void Optimize(OptimizerExtensionInput &input, unique_ptr &plan) { + + TryOptimize(input.context, input.info.get(), plan); + + // Recursively optimize the children + for (auto &child : plan->children) { + Optimize(input, child); + } + } +}; + +} // namespace + +//---------------------------------------------------------------------------------------------------------------------- +// Register optimizers +//---------------------------------------------------------------------------------------------------------------------- +void RegisterSpatialOptimizers(DatabaseInstance &db) { + Connection con(db); + auto &context = *con.context; + + con.BeginTransaction(); + auto &config = DBConfig::GetConfig(context); + + // Register the optimizer rules + config.optimizer_extensions.push_back(RangeJoinSpatialPredicateRewriter()); + + con.Commit(); +} + +} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/spatial_optimizers.hpp b/src/spatial/spatial_optimizers.hpp new file mode 100644 index 00000000..485ab220 --- /dev/null +++ b/src/spatial/spatial_optimizers.hpp @@ -0,0 +1,9 @@ +#pragma once + +namespace duckdb { + +class DatabaseInstance; + +void RegisterSpatialOptimizers(DatabaseInstance &db); + +} // namespace duckdb \ No newline at end of file diff --git a/spatial/src/spatial/core/types.cpp b/src/spatial/spatial_types.cpp similarity index 92% rename from spatial/src/spatial/core/types.cpp rename to src/spatial/spatial_types.cpp index dc44c1c2..9b52e546 100644 --- a/spatial/src/spatial/core/types.cpp +++ b/src/spatial/spatial_types.cpp @@ -1,12 +1,7 @@ -#include "spatial/core/types.hpp" +#include "spatial/spatial_types.hpp" +#include "duckdb/main/extension_util.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/parser/parsed_data/create_type_info.hpp" -#include "spatial/common.hpp" - -namespace spatial { - -namespace core { +namespace duckdb { LogicalType GeoTypes::POINT_2D() { auto type = LogicalType::STRUCT({{"x", LogicalType::DOUBLE}, {"y", LogicalType::DOUBLE}}); @@ -115,6 +110,4 @@ void GeoTypes::Register(DatabaseInstance &db) { ExtensionUtil::RegisterType(db, "WKB_BLOB", GeoTypes::WKB_BLOB()); } -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/spatial/include/spatial/core/types.hpp b/src/spatial/spatial_types.hpp similarity index 74% rename from spatial/include/spatial/core/types.hpp rename to src/spatial/spatial_types.hpp index e05ddbb4..31b81ea3 100644 --- a/spatial/include/spatial/core/types.hpp +++ b/src/spatial/spatial_types.hpp @@ -1,9 +1,12 @@ #pragma once -#include "spatial/common.hpp" -namespace spatial { +#include "duckdb/common/string.hpp" +#include "duckdb/common/vector.hpp" -namespace core { +namespace duckdb { + +class DatabaseInstance; +struct LogicalType; struct GeoTypes { static LogicalType POINT_2D(); @@ -21,6 +24,4 @@ struct GeoTypes { static LogicalType CreateEnumType(const string &name, const vector &members); }; -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/util/CMakeLists.txt b/src/spatial/util/CMakeLists.txt new file mode 100644 index 00000000..3335cc82 --- /dev/null +++ b/src/spatial/util/CMakeLists.txt @@ -0,0 +1,5 @@ +set(EXTENSION_SOURCES + ${EXTENSION_SOURCES} + ${CMAKE_CURRENT_SOURCE_DIR}/function_builder.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/math.cpp +PARENT_SCOPE) diff --git a/src/spatial/util/binary_reader.hpp b/src/spatial/util/binary_reader.hpp new file mode 100644 index 00000000..49ef6f9e --- /dev/null +++ b/src/spatial/util/binary_reader.hpp @@ -0,0 +1,79 @@ +#pragma once + +#include +#include +#include +#include +#include "duckdb/common/exception.hpp" + +namespace duckdb { + +class BinaryReader { +public: + BinaryReader(const char *ptr, const char *end) : beg(ptr), end(end), ptr(ptr) { + } + BinaryReader(const char *buffer, const size_t size) : BinaryReader(buffer, buffer + size) { + } + + template + T Read() { + static_assert(std::is_trivially_copyable::value, "Type must be trivially copyable"); + CheckSize(sizeof(T)); + T value; + memcpy(&value, ptr, sizeof(T)); + ptr += sizeof(T); + return value; + } + + template + T ReadBE() { + static_assert(std::is_trivially_copyable::value, "Type must be trivially copyable"); + CheckSize(sizeof(T)); + + uint8_t in[sizeof(T)]; + uint8_t out[sizeof(T)]; + memcpy(in, ptr, sizeof(T)); + ptr += sizeof(T); + + for (size_t i = 0; i < sizeof(T); i++) { + out[i] = in[sizeof(T) - i - 1]; + } + + T swapped = 0; + memcpy(&swapped, out, sizeof(T)); + return swapped; + } + + const char *Reserve(const size_t size) { + CheckSize(size); + const char *result = ptr; + ptr += size; + return result; + } + + void Skip(const size_t size) { + CheckSize(size); + ptr += size; + } + + const char *GetStart() const { + return beg; + } + + const char *GetEnd() const { + return end; + } + +private: + void CheckSize(const size_t size) const { + if (ptr + size > end) { + throw InternalException("Buffer overflow"); + } + } + + const char *beg; + const char *end; + const char *ptr; +}; + +} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/util/binary_writer.hpp b/src/spatial/util/binary_writer.hpp new file mode 100644 index 00000000..036b9c37 --- /dev/null +++ b/src/spatial/util/binary_writer.hpp @@ -0,0 +1,67 @@ +#pragma once + +#include +#include +#include +#include +#include "duckdb/common/exception.hpp" + +namespace duckdb { + +class BinaryWriter { +public: + BinaryWriter(char *ptr, char *end) : beg(ptr), end(end), ptr(ptr) { + } + BinaryWriter(char *buffer, const size_t size) : BinaryWriter(buffer, buffer + size) { + } + + template + void Write(const T &value) { + static_assert(std::is_trivially_copyable::value, "Type must be trivially copyable"); + CheckSize(sizeof(T)); + memcpy(ptr, &value, sizeof(T)); + ptr += sizeof(T); + } + + char *Reserve(const size_t size) { + CheckSize(size); + char *result = ptr; + ptr += size; + return result; + } + + void Skip(const size_t size, const bool zero = false) { + CheckSize(size); + if (zero) { + memset(ptr, 0, size); + } + ptr += size; + } + + void Copy(const char *buffer, const size_t size) { + CheckSize(size); + memcpy(ptr, buffer, size); + ptr += size; + } + + char *GetStart() const { + return beg; + } + + char *GetEnd() const { + return end; + } + +private: + void CheckSize(const size_t size) const { + if (ptr + size > end) { + throw InternalException("Buffer overflow"); + } + } + + char *beg; + char *end; + char *ptr; +}; + +} // namespace duckdb \ No newline at end of file diff --git a/spatial/include/spatial/core/util/cursor.hpp b/src/spatial/util/cursor.hpp similarity index 95% rename from spatial/include/spatial/core/util/cursor.hpp rename to src/spatial/util/cursor.hpp index 7bec6db7..3fcaafcc 100644 --- a/spatial/include/spatial/core/util/cursor.hpp +++ b/src/spatial/util/cursor.hpp @@ -1,9 +1,9 @@ #pragma once -#include "spatial/common.hpp" -namespace spatial { +#include "duckdb/common/typedefs.hpp" +#include "duckdb/common/types/string_type.hpp" -namespace core { +namespace duckdb { // TODO: Split this into a read and write cursor. Get rid of bounds checks in release mode class Cursor { @@ -122,6 +122,4 @@ class Cursor { } }; -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/util/function_builder.cpp b/src/spatial/util/function_builder.cpp new file mode 100644 index 00000000..69694e3e --- /dev/null +++ b/src/spatial/util/function_builder.cpp @@ -0,0 +1,140 @@ +#include "spatial/util/function_builder.hpp" + +#include "duckdb/catalog/catalog_entry/function_entry.hpp" +#include "duckdb/main/extension_util.hpp" + +#include + +namespace duckdb { + +string FunctionBuilder::RemoveIndentAndTrailingWhitespace(const char *text) { + string result; + // Skip any empty first newlines if present + while (*text == '\n') { + text++; + } + + // Track indent length + auto indent_start = text; + while (isspace(*text) && *text != '\n') { + text++; + } + auto indent_len = text - indent_start; + while (*text) { + result += *text; + if (*text++ == '\n') { + // Remove all indentation, but only if it matches the first line's indentation + bool matched_indent = true; + for (auto i = 0; i < indent_len; i++) { + if (*text != indent_start[i]) { + matched_indent = false; + break; + } + } + if (matched_indent) { + auto remaining_indent = indent_len; + while (*text && remaining_indent > 0) { + text++; + remaining_indent--; + } + } + } + } + + // Also remove any trailing whitespace + result.erase(result.find_last_not_of(" \n\r\t") + 1); + return result; +} + +void FunctionBuilder::Register(DatabaseInstance &db, const char *name, ScalarFunctionBuilder &builder) { + // Register the function + ExtensionUtil::RegisterFunction(db, std::move(builder.set)); + + // Also add the parameter names. We need to access the catalog entry for this. + auto &catalog = Catalog::GetSystemCatalog(db); + auto transaction = CatalogTransaction::GetSystemTransaction(db); + auto &schema = catalog.GetSchema(transaction, DEFAULT_SCHEMA); + auto catalog_entry = schema.GetEntry(transaction, CatalogType::SCALAR_FUNCTION_ENTRY, name); + if (!catalog_entry) { + // This should not happen, we just registered the function + throw InternalException("Function with name \"%s\" not found in FunctionBuilder::AddScalar", name); + } + + auto &func_entry = catalog_entry->Cast(); + + // Insert all descriptions + for (auto &desc : builder.descriptions) { + + // Add default description if none is set + if (desc.description.empty()) { + desc.description = builder.default_description; + } else { + desc.description = RemoveIndentAndTrailingWhitespace(desc.description.c_str()); + } + + // Add default example if none is set + if (desc.examples.empty()) { + desc.examples.push_back(builder.default_example); + } else { + for (auto &ex : desc.examples) { + ex = RemoveIndentAndTrailingWhitespace(ex.c_str()); + } + } + + func_entry.descriptions.push_back(desc); + } + + if (!builder.tags.empty()) { + func_entry.tags = std::move(builder.tags); + } +} + +void FunctionBuilder::Register(DatabaseInstance &db, const char *name, AggregateFunctionBuilder &builder) { + // Register the function + ExtensionUtil::RegisterFunction(db, std::move(builder.set)); + + // Also add the parameter names. We need to access the catalog entry for this. + auto &catalog = Catalog::GetSystemCatalog(db); + auto transaction = CatalogTransaction::GetSystemTransaction(db); + auto &schema = catalog.GetSchema(transaction, DEFAULT_SCHEMA); + auto catalog_entry = schema.GetEntry(transaction, CatalogType::AGGREGATE_FUNCTION_ENTRY, name); + if (!catalog_entry) { + // This should not happen, we just registered the function + throw InternalException("Function with name \"%s\" not found in FunctionBuilder::AddAggregate", name); + } + + auto &func_entry = catalog_entry->Cast(); + + // Insert all descriptions + const auto descr = RemoveIndentAndTrailingWhitespace(builder.description.c_str()); + const auto exampl = RemoveIndentAndTrailingWhitespace(builder.example.c_str()); + FunctionDescription function_description; + function_description.description = descr; + function_description.examples.push_back(exampl); + func_entry.descriptions.push_back(function_description); + + if (!builder.tags.empty()) { + func_entry.tags = std::move(builder.tags); + } +} + +void FunctionBuilder::AddTableFunctionDocs(DatabaseInstance &db, const char *name, const char *desc, + const char *example) { + + auto &catalog = Catalog::GetSystemCatalog(db); + auto transaction = CatalogTransaction::GetSystemTransaction(db); + auto &schema = catalog.GetSchema(transaction, DEFAULT_SCHEMA); + auto catalog_entry = schema.GetEntry(transaction, CatalogType::TABLE_FUNCTION_ENTRY, name); + if (!catalog_entry) { + // This should not happen, we just registered the function + throw InternalException("Function with name \"%s\" not found in FunctionBuilder::AddScalar", name); + } + + auto &func_entry = catalog_entry->Cast(); + FunctionDescription function_description; + function_description.description = RemoveIndentAndTrailingWhitespace(desc); + function_description.examples.push_back(RemoveIndentAndTrailingWhitespace(example)); + func_entry.descriptions.push_back(function_description); +} + +} // namespace duckdb \ No newline at end of file diff --git a/spatial/include/spatial/core/function_builder.hpp b/src/spatial/util/function_builder.hpp similarity index 63% rename from spatial/include/spatial/core/function_builder.hpp rename to src/spatial/util/function_builder.hpp index 6bb2d4e8..f053a954 100644 --- a/spatial/include/spatial/core/function_builder.hpp +++ b/src/spatial/util/function_builder.hpp @@ -2,14 +2,37 @@ #include "duckdb.hpp" -#include "spatial/common.hpp" #include "duckdb/function/function_set.hpp" #include "duckdb/function/scalar_function.hpp" #include "duckdb/parser/parsed_data/create_function_info.hpp" -namespace spatial { +namespace duckdb { + +//------------------------------------------------------------------------------ +// Function Builder +//------------------------------------------------------------------------------ + +class ScalarFunctionBuilder; +class AggregateFunctionBuilder; + +class FunctionBuilder { +public: + template + static void RegisterScalar(DatabaseInstance &db, const char *name, CALLBACK &&callback); + + template + static void RegisterAggregate(DatabaseInstance &db, const char *name, CALLBACK &&callback); + + // TODO: + static void AddTableFunctionDocs(DatabaseInstance &db, const char *name, const char *desc, const char *example); + + static string RemoveIndentAndTrailingWhitespace(const char *str); + +private: + static void Register(DatabaseInstance &db, const char *name, ScalarFunctionBuilder &builder); + static void Register(DatabaseInstance &db, const char *name, AggregateFunctionBuilder &builder); +}; -namespace core { //------------------------------------------------------------------------------ // Scalar Function Variant Builder //------------------------------------------------------------------------------ @@ -76,6 +99,7 @@ class ScalarFunctionBuilder { void AddVariant(CALLBACK &&callback); void SetTag(const string &key, const string &value); void SetDescription(const string &desc); + void SetExample(const string &ex); private: explicit ScalarFunctionBuilder(const char *name) : set(name) { @@ -87,10 +111,15 @@ class ScalarFunctionBuilder { // If not set by a variant string default_description; + string default_example; }; inline void ScalarFunctionBuilder::SetDescription(const string &desc) { - default_description = desc; + default_description = FunctionBuilder::RemoveIndentAndTrailingWhitespace(desc.c_str()); +} + +inline void ScalarFunctionBuilder::SetExample(const string &ex) { + default_example = FunctionBuilder::RemoveIndentAndTrailingWhitespace(ex.c_str()); } inline void ScalarFunctionBuilder::SetTag(const string &key, const string &value) { @@ -111,28 +140,50 @@ void ScalarFunctionBuilder::AddVariant(CALLBACK &&callback) { // Add the new variant to the set set.AddFunction(std::move(builder.function)); - // Add the default description if not set by the variant - if (builder.description.description.empty()) { - builder.description.description = default_description; - } - // Add the description descriptions.emplace_back(std::move(builder.description)); } //------------------------------------------------------------------------------ -// Function Builder +// Aggregate //------------------------------------------------------------------------------ -class FunctionBuilder { +class AggregateFunctionBuilder { + friend class FunctionBuilder; + public: - template - static void RegisterScalar(DatabaseInstance &db, const char *name, CALLBACK &&callback); + void SetTag(const string &key, const string &value); + void SetDescription(const string &desc); + void SetExample(const string &ex); + void SetFunction(const AggregateFunction &function); private: - static void Register(DatabaseInstance &db, const char *name, ScalarFunctionBuilder &builder); + explicit AggregateFunctionBuilder(const char *name) : set(name) { + } + string description; + string example; + unordered_map tags; + AggregateFunctionSet set; }; +inline void AggregateFunctionBuilder::SetFunction(const AggregateFunction &function) { + set.AddFunction(function); +} + +inline void AggregateFunctionBuilder::SetDescription(const string &desc) { + description = desc; +} +inline void AggregateFunctionBuilder::SetExample(const string &ex) { + example = ex; +} +inline void AggregateFunctionBuilder::SetTag(const string &key, const string &value) { + tags[key] = value; +} + +//------------------------------------------------------------------------------ +// Function Builder Methods +//------------------------------------------------------------------------------ + template void FunctionBuilder::RegisterScalar(DatabaseInstance &db, const char *name, CALLBACK &&callback) { ScalarFunctionBuilder builder(name); @@ -141,6 +192,11 @@ void FunctionBuilder::RegisterScalar(DatabaseInstance &db, const char *name, CAL Register(db, name, builder); } -} // namespace core +template +void FunctionBuilder::RegisterAggregate(DatabaseInstance &db, const char *name, CALLBACK &&callback) { + AggregateFunctionBuilder builder(name); + callback(builder); + Register(db, name, builder); +} -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/spatial/include/spatial/core/util/managed_collection.hpp b/src/spatial/util/managed_collection.hpp similarity index 94% rename from spatial/include/spatial/core/util/managed_collection.hpp rename to src/spatial/util/managed_collection.hpp index 6ec5a717..2923aeed 100644 --- a/spatial/include/spatial/core/util/managed_collection.hpp +++ b/src/spatial/util/managed_collection.hpp @@ -1,8 +1,11 @@ #pragma once -namespace spatial { +namespace duckdb { -namespace core { +// A ManagedCollection is a buffer-managed collection, capable of storing larger-than memory collections of elements. +// by splitting them over multiple blocks allocated from the BufferManager. +// Similar to a ColumnDataCollection, but for basic POD C++ types. +// The ManagedCollection is append-only and scan-only, and does not support random access. struct ManagedCollectionAppendState; struct ManagedCollectionScanState; @@ -219,6 +222,4 @@ T ManagedCollection::Fetch(idx_t idx) { return Load(ptr); } -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/spatial/src/spatial/core/util/math.cpp b/src/spatial/util/math.cpp similarity index 94% rename from spatial/src/spatial/core/util/math.cpp rename to src/spatial/util/math.cpp index e3e42f75..9c9d9eda 100644 --- a/spatial/src/spatial/core/util/math.cpp +++ b/src/spatial/util/math.cpp @@ -1,8 +1,6 @@ -#include "spatial/core/util/math.hpp" +#include "spatial/util/math.hpp" -namespace spatial { - -namespace core { +namespace duckdb { // We've got this exposed upstream, we just need to wait for the next release extern "C" int geos_d2sfixed_buffered_n(double f, uint32_t precision, char *result); @@ -64,6 +62,4 @@ string MathUtil::format_coord(double x, double y, double z, double m) { return string {buf}; } -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/spatial/include/spatial/core/util/math.hpp b/src/spatial/util/math.hpp similarity index 89% rename from spatial/include/spatial/core/util/math.hpp rename to src/spatial/util/math.hpp index 9bfe4123..1c206d25 100644 --- a/spatial/include/spatial/core/util/math.hpp +++ b/src/spatial/util/math.hpp @@ -1,10 +1,14 @@ -#include "spatial/common.hpp" +#pragma once -namespace spatial { +#include +#include +#include "duckdb/common/string.hpp" +#include "duckdb/common/vector.hpp" -namespace core { +namespace duckdb { struct MathUtil { + static string format_coord(double d); static string format_coord(double x, double y); static string format_coord(double x, double y, double z); @@ -43,6 +47,4 @@ struct MathUtil { } }; -} // namespace core - -} // namespace spatial \ No newline at end of file +} // namespace duckdb \ No newline at end of file diff --git a/spatial/third_party/protozero/include/protozero/basic_pbf_builder.hpp b/src/third_party/protozero/include/protozero/basic_pbf_builder.hpp similarity index 100% rename from spatial/third_party/protozero/include/protozero/basic_pbf_builder.hpp rename to src/third_party/protozero/include/protozero/basic_pbf_builder.hpp diff --git a/spatial/third_party/protozero/include/protozero/basic_pbf_writer.hpp b/src/third_party/protozero/include/protozero/basic_pbf_writer.hpp similarity index 100% rename from spatial/third_party/protozero/include/protozero/basic_pbf_writer.hpp rename to src/third_party/protozero/include/protozero/basic_pbf_writer.hpp diff --git a/spatial/third_party/protozero/include/protozero/buffer_fixed.hpp b/src/third_party/protozero/include/protozero/buffer_fixed.hpp similarity index 100% rename from spatial/third_party/protozero/include/protozero/buffer_fixed.hpp rename to src/third_party/protozero/include/protozero/buffer_fixed.hpp diff --git a/spatial/third_party/protozero/include/protozero/buffer_string.hpp b/src/third_party/protozero/include/protozero/buffer_string.hpp similarity index 100% rename from spatial/third_party/protozero/include/protozero/buffer_string.hpp rename to src/third_party/protozero/include/protozero/buffer_string.hpp diff --git a/spatial/third_party/protozero/include/protozero/buffer_tmpl.hpp b/src/third_party/protozero/include/protozero/buffer_tmpl.hpp similarity index 100% rename from spatial/third_party/protozero/include/protozero/buffer_tmpl.hpp rename to src/third_party/protozero/include/protozero/buffer_tmpl.hpp diff --git a/spatial/third_party/protozero/include/protozero/buffer_vector.hpp b/src/third_party/protozero/include/protozero/buffer_vector.hpp similarity index 100% rename from spatial/third_party/protozero/include/protozero/buffer_vector.hpp rename to src/third_party/protozero/include/protozero/buffer_vector.hpp diff --git a/spatial/third_party/protozero/include/protozero/byteswap.hpp b/src/third_party/protozero/include/protozero/byteswap.hpp similarity index 100% rename from spatial/third_party/protozero/include/protozero/byteswap.hpp rename to src/third_party/protozero/include/protozero/byteswap.hpp diff --git a/spatial/third_party/protozero/include/protozero/config.hpp b/src/third_party/protozero/include/protozero/config.hpp similarity index 100% rename from spatial/third_party/protozero/include/protozero/config.hpp rename to src/third_party/protozero/include/protozero/config.hpp diff --git a/spatial/third_party/protozero/include/protozero/data_view.hpp b/src/third_party/protozero/include/protozero/data_view.hpp similarity index 100% rename from spatial/third_party/protozero/include/protozero/data_view.hpp rename to src/third_party/protozero/include/protozero/data_view.hpp diff --git a/spatial/third_party/protozero/include/protozero/exception.hpp b/src/third_party/protozero/include/protozero/exception.hpp similarity index 100% rename from spatial/third_party/protozero/include/protozero/exception.hpp rename to src/third_party/protozero/include/protozero/exception.hpp diff --git a/spatial/third_party/protozero/include/protozero/iterators.hpp b/src/third_party/protozero/include/protozero/iterators.hpp similarity index 100% rename from spatial/third_party/protozero/include/protozero/iterators.hpp rename to src/third_party/protozero/include/protozero/iterators.hpp diff --git a/spatial/third_party/protozero/include/protozero/pbf_builder.hpp b/src/third_party/protozero/include/protozero/pbf_builder.hpp similarity index 100% rename from spatial/third_party/protozero/include/protozero/pbf_builder.hpp rename to src/third_party/protozero/include/protozero/pbf_builder.hpp diff --git a/spatial/third_party/protozero/include/protozero/pbf_message.hpp b/src/third_party/protozero/include/protozero/pbf_message.hpp similarity index 100% rename from spatial/third_party/protozero/include/protozero/pbf_message.hpp rename to src/third_party/protozero/include/protozero/pbf_message.hpp diff --git a/spatial/third_party/protozero/include/protozero/pbf_reader.hpp b/src/third_party/protozero/include/protozero/pbf_reader.hpp similarity index 100% rename from spatial/third_party/protozero/include/protozero/pbf_reader.hpp rename to src/third_party/protozero/include/protozero/pbf_reader.hpp diff --git a/spatial/third_party/protozero/include/protozero/pbf_writer.hpp b/src/third_party/protozero/include/protozero/pbf_writer.hpp similarity index 100% rename from spatial/third_party/protozero/include/protozero/pbf_writer.hpp rename to src/third_party/protozero/include/protozero/pbf_writer.hpp diff --git a/spatial/third_party/protozero/include/protozero/types.hpp b/src/third_party/protozero/include/protozero/types.hpp similarity index 100% rename from spatial/third_party/protozero/include/protozero/types.hpp rename to src/third_party/protozero/include/protozero/types.hpp diff --git a/spatial/third_party/protozero/include/protozero/varint.hpp b/src/third_party/protozero/include/protozero/varint.hpp similarity index 100% rename from spatial/third_party/protozero/include/protozero/varint.hpp rename to src/third_party/protozero/include/protozero/varint.hpp diff --git a/spatial/third_party/protozero/include/protozero/version.hpp b/src/third_party/protozero/include/protozero/version.hpp similarity index 100% rename from spatial/third_party/protozero/include/protozero/version.hpp rename to src/third_party/protozero/include/protozero/version.hpp diff --git a/spatial/third_party/shapelib/AUTHORS b/src/third_party/shapelib/AUTHORS similarity index 100% rename from spatial/third_party/shapelib/AUTHORS rename to src/third_party/shapelib/AUTHORS diff --git a/spatial/third_party/shapelib/CMakeLists.txt b/src/third_party/shapelib/CMakeLists.txt similarity index 100% rename from spatial/third_party/shapelib/CMakeLists.txt rename to src/third_party/shapelib/CMakeLists.txt diff --git a/spatial/third_party/shapelib/LICENSE-MIT b/src/third_party/shapelib/LICENSE-MIT similarity index 100% rename from spatial/third_party/shapelib/LICENSE-MIT rename to src/third_party/shapelib/LICENSE-MIT diff --git a/spatial/third_party/shapelib/dbfopen.c b/src/third_party/shapelib/dbfopen.c similarity index 100% rename from spatial/third_party/shapelib/dbfopen.c rename to src/third_party/shapelib/dbfopen.c diff --git a/spatial/third_party/shapelib/safileio.c b/src/third_party/shapelib/safileio.c similarity index 100% rename from spatial/third_party/shapelib/safileio.c rename to src/third_party/shapelib/safileio.c diff --git a/spatial/third_party/shapelib/sbnsearch.c b/src/third_party/shapelib/sbnsearch.c similarity index 100% rename from spatial/third_party/shapelib/sbnsearch.c rename to src/third_party/shapelib/sbnsearch.c diff --git a/spatial/third_party/shapelib/shapefil.h b/src/third_party/shapelib/shapefil.h similarity index 100% rename from spatial/third_party/shapelib/shapefil.h rename to src/third_party/shapelib/shapefil.h diff --git a/spatial/third_party/shapelib/shpopen.c b/src/third_party/shapelib/shpopen.c similarity index 100% rename from spatial/third_party/shapelib/shpopen.c rename to src/third_party/shapelib/shpopen.c diff --git a/spatial/third_party/shapelib/shptree.c b/src/third_party/shapelib/shptree.c similarity index 100% rename from spatial/third_party/shapelib/shptree.c rename to src/third_party/shapelib/shptree.c diff --git a/spatial/third_party/yyjson/CMakeLists.txt b/src/third_party/yyjson/CMakeLists.txt similarity index 100% rename from spatial/third_party/yyjson/CMakeLists.txt rename to src/third_party/yyjson/CMakeLists.txt diff --git a/spatial/third_party/yyjson/LICENSE b/src/third_party/yyjson/LICENSE similarity index 100% rename from spatial/third_party/yyjson/LICENSE rename to src/third_party/yyjson/LICENSE diff --git a/spatial/third_party/yyjson/include/yyjson.h b/src/third_party/yyjson/include/yyjson.h similarity index 100% rename from spatial/third_party/yyjson/include/yyjson.h rename to src/third_party/yyjson/include/yyjson.h diff --git a/spatial/third_party/yyjson/yyjson.cpp b/src/third_party/yyjson/yyjson.cpp similarity index 100% rename from spatial/third_party/yyjson/yyjson.cpp rename to src/third_party/yyjson/yyjson.cpp diff --git a/test/sql/geometry/st_ashexwkb.test b/test/sql/geometry/st_ashexwkb.test index 769161ee..562321e9 100644 --- a/test/sql/geometry/st_ashexwkb.test +++ b/test/sql/geometry/st_ashexwkb.test @@ -36,6 +36,12 @@ SELECT ST_GeomFROMHEXWKB('01030000C001000000050000000000000000000000000000000000 ---- POLYGON ZM ((0 0 1 1, 0 1 1 2, 1 1 1 3, 1 0 1 4, 0 0 1 5)) +# Unsupported type +statement error +SELECT ST_GeomFromHEXWKB('010800000000000000'); +---- +Invalid Input Error: Could not parse HEX WKB string: WKB type 'CIRCULARSTRING' is not supported! (type id: 8) + # Test rountrips properly statement ok CREATE TABLE types (geom GEOMETRY); diff --git a/test/sql/geometry/st_geomfromtext.test b/test/sql/geometry/st_geomfromtext.test index 2617d197..3524ff28 100644 --- a/test/sql/geometry/st_geomfromtext.test +++ b/test/sql/geometry/st_geomfromtext.test @@ -21,10 +21,10 @@ POINT (0 1) statement error SELECT ST_AsText(ST_GeomFromText('POINT Z (1 2)')); ---- -Invalid Input Error: WKT Parser: Expected double at position 12 near: 'POINT Z (1 2)'|<--- +Invalid Input Error: Expected number at position '12' near: 'POINT Z (1 2)'|<--- # Mixed dimensionality not allowed statement error SELECT ST_AsText(ST_GeomFromText('GEOMETRYCOLLECTION ZM (POINT Z (1 2 3))')); ---- -Invalid Input Error: WKT Parser: GeometryCollection with mixed Z and M types are not supported, mismatch at position 31 near: 'GEOMETRYCOLLECTION ZM (POINT Z ('|<--- \ No newline at end of file +Invalid Input Error: Mixed Z and M values are not supported at position '31' near: 'GEOMETRYCOLLECTION ZM (POINT Z ('|<--- diff --git a/vcpkg.json b/vcpkg.json index eb412930..e58426c4 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -2,7 +2,6 @@ "dependencies": [ "openssl", "zlib", - "geographiclib", "geos", "expat", {