From 87cecc2be5994e371b1108c383806d2466ee35bb Mon Sep 17 00:00:00 2001 From: KaiGai Kohei Date: Tue, 19 Sep 2023 09:50:35 +0900 Subject: [PATCH] add kvec (vectorized device data type) definitions --- src/codegen.c | 7 +- src/cuda_common.h | 375 ----------------------------------- src/pg_strom.h | 3 +- src/xpu_basetype.cu | 39 ++++ src/xpu_basetype.h | 1 - src/xpu_common.cu | 18 ++ src/xpu_common.h | 472 ++++++++++++++++++++++++++++++++++++++++++++ src/xpu_jsonlib.cu | 25 +++ src/xpu_misclib.cu | 97 ++++++++- src/xpu_misclib.h | 9 +- src/xpu_numeric.cu | 22 +++ src/xpu_numeric.h | 7 + src/xpu_postgis.cu | 66 +++++++ src/xpu_postgis.h | 139 ++----------- src/xpu_textlib.cu | 77 ++++++++ src/xpu_timelib.cu | 87 +++++++- 16 files changed, 941 insertions(+), 503 deletions(-) diff --git a/src/codegen.c b/src/codegen.c index 78b34d854..d5df5b346 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -29,7 +29,8 @@ static List *devfunc_code_slot[DEVFUNC_INFO_NSLOTS]; /* by FuncOpCode */ DEVKIND__ANY | (FLAGS), \ devtype_##NAME##_hash, \ sizeof(xpu_##NAME##_t), \ - __alignof__(xpu_##NAME##_t) }, + __alignof__(xpu_##NAME##_t), \ + sizeof(kvec_##NAME##_t) }, static struct { const char *type_extension; const char *type_name; @@ -38,10 +39,11 @@ static struct { devtype_hashfunc_f type_hashfunc; int type_sizeof; int type_alignof; + int kvec_sizeof; } devtype_catalog[] = { #include "xpu_opcodes.h" /* alias device data types */ - {NULL, NULL, TypeOpCode__Invalid, 0, NULL, 0} + {NULL, NULL, TypeOpCode__Invalid, 0, NULL, 0, 0} }; static struct { @@ -112,6 +114,7 @@ build_basic_devtype_info(TypeCacheEntry *tcache, const char *ext_name) dtype->type_extension = (ext_name ? pstrdup(ext_name) : NULL); dtype->type_sizeof = devtype_catalog[i].type_sizeof; dtype->type_alignof = devtype_catalog[i].type_alignof; + dtype->kvec_sizeof = devtype_catalog[i].kvec_sizeof; dtype->type_hashfunc = devtype_catalog[i].type_hashfunc; /* type equality functions */ dtype->type_eqfunc = get_opcode(tcache->eq_opr); diff --git a/src/cuda_common.h b/src/cuda_common.h index 76e7622af..0ac2acaa7 100644 --- a/src/cuda_common.h +++ b/src/cuda_common.h @@ -13,54 +13,7 @@ #define CUDA_COMMON_H #include "xpu_common.h" -#define WARPSIZE 32 -#define MAXTHREADS_PER_BLOCK 1024 -#define MAXWARPS_PER_BLOCK (MAXTHREADS_PER_BLOCK / WARPSIZE) -#define CUDA_L1_CACHELINE_SZ 128 - #if defined(__CUDACC__) -/* - * Thread index at CUDA C - */ -#define get_group_id() (blockIdx.x) -#define get_num_groups() (gridDim.x) -#define get_local_id() (threadIdx.x) -#define get_local_size() (blockDim.x) -#define get_global_id() (threadIdx.x + blockIdx.x * blockDim.x) -#define get_global_size() (blockDim.x * gridDim.x) - -/* Dynamic shared memory entrypoint */ -extern __shared__ char __pgstrom_dynamic_shared_workmem[] __MAXALIGNED__; -#define SHARED_WORKMEM(UNITSZ,INDEX) \ - (__pgstrom_dynamic_shared_workmem + (UNITSZ)*(INDEX)) - -INLINE_FUNCTION(uint32_t) LaneId(void) -{ - uint32_t rv; - - asm volatile("mov.u32 %0, %laneid;" : "=r"(rv) ); - - return rv; -} - -INLINE_FUNCTION(uint32_t) DynamicShmemSize(void) -{ - uint32_t rv; - - asm volatile("mov.u32 %0, %dynamic_smem_size;" : "=r"(rv) ); - - return rv; -} - -INLINE_FUNCTION(uint32_t) TotalShmemSize(void) -{ - uint32_t rv; - - asm volatile("mov.u32 %0, %total_smem_size;" : "=r"(rv) ); - - return rv; -} - template INLINE_FUNCTION(T) __reduce_stair_add_sync(T value, T *p_total_sum = NULL) @@ -349,332 +302,4 @@ kern_gpujoin_main(kern_session_info *session, kern_data_extra *kds_extra, kern_data_store *kds_dst); -/* - * Atomic function wrappers - */ -INLINE_FUNCTION(uint32_t) -__atomic_write_uint32(uint32_t *ptr, uint32_t ival) -{ -#ifdef __CUDACC__ - return atomicExch((unsigned int *)ptr, ival); -#else - return __atomic_exchange_n(ptr, ival, __ATOMIC_SEQ_CST); -#endif -} - -INLINE_FUNCTION(uint64_t) -__atomic_write_uint64(uint64_t *ptr, uint64_t ival) -{ -#ifdef __CUDACC__ - return atomicExch((unsigned long long int *)ptr, ival); -#else - return __atomic_exchange_n(ptr, ival, __ATOMIC_SEQ_CST); -#endif -} - -INLINE_FUNCTION(uint32_t) -__atomic_add_uint32(uint32_t *ptr, uint32_t ival) -{ -#ifdef __CUDACC__ - return atomicAdd((unsigned int *)ptr, (unsigned int)ival); -#else - return __atomic_fetch_add(ptr, ival, __ATOMIC_SEQ_CST); -#endif -} - -INLINE_FUNCTION(uint64_t) -__atomic_add_uint64(uint64_t *ptr, uint64_t ival) -{ -#ifdef __CUDACC__ - return atomicAdd((unsigned long long *)ptr, (unsigned long long)ival); -#else - return __atomic_fetch_add(ptr, ival, __ATOMIC_SEQ_CST); -#endif -} - -INLINE_FUNCTION(int64_t) -__atomic_add_int64(int64_t *ptr, int64_t ival) -{ -#ifdef __CUDACC__ - return atomicAdd((unsigned long long int *)ptr, (unsigned long long int)ival); -#else - return __atomic_fetch_add(ptr, ival, __ATOMIC_SEQ_CST); -#endif -} - -INLINE_FUNCTION(float8_t) -__atomic_add_fp64(float8_t *ptr, float8_t fval) -{ -#ifdef __CUDACC__ - return atomicAdd((double *)ptr, (double)fval); -#else - union { - uint64_t ival; - float8_t fval; - } oldval, newval; - - oldval.fval = __volatileRead(ptr); - do { - newval.fval = oldval.fval + fval; - } while (!__atomic_compare_exchange_n((uint64_t *)ptr, - &oldval.ival, - newval.ival, - false, - __ATOMIC_SEQ_CST, - __ATOMIC_SEQ_CST)); - return oldval.fval; -#endif -} - -INLINE_FUNCTION(uint32_t) -__atomic_and_uint32(uint32_t *ptr, uint32_t mask) -{ -#ifdef __CUDACC__ - return atomicAnd((unsigned int *)ptr, (unsigned int)mask); -#else - return __atomic_fetch_and(ptr, mask, __ATOMIC_SEQ_CST); -#endif -} - -INLINE_FUNCTION(uint32_t) -__atomic_or_uint32(uint32_t *ptr, uint32_t mask) -{ -#ifdef __CUDACC__ - return atomicOr((unsigned int *)ptr, (unsigned int)mask); -#else - return __atomic_fetch_or(ptr, mask, __ATOMIC_SEQ_CST); -#endif -} - -INLINE_FUNCTION(uint32_t) -__atomic_max_uint32(uint32_t *ptr, uint32_t ival) -{ -#ifdef __CUDACC__ - return atomicMax((unsigned int *)ptr, (unsigned int)ival); -#else - uint32_t oldval = __volatileRead(ptr); - - while (oldval > ival) - { - if (__atomic_compare_exchange_n(ptr, - &oldval, - ival, - false, - __ATOMIC_SEQ_CST, - __ATOMIC_SEQ_CST)) - break; - } - return oldval; -#endif -} - -INLINE_FUNCTION(int64_t) -__atomic_min_int64(int64_t *ptr, int64_t ival) -{ -#ifdef __CUDACC__ - return atomicMin((long long int *)ptr, (long long int)ival); -#else - int64_t oldval = __volatileRead(ptr); - - while (oldval > ival) - { - if (__atomic_compare_exchange_n(ptr, - &oldval, - ival, - false, - __ATOMIC_SEQ_CST, - __ATOMIC_SEQ_CST)) - break; - } - return oldval; -#endif -} - -INLINE_FUNCTION(int64_t) -__atomic_max_int64(int64_t *ptr, int64_t ival) -{ -#ifdef __CUDACC__ - return atomicMax((long long int *)ptr, (long long int)ival); -#else - int64_t oldval = __volatileRead(ptr); - - while (oldval < ival) - { - if (__atomic_compare_exchange_n(ptr, - &oldval, - ival, - false, - __ATOMIC_SEQ_CST, - __ATOMIC_SEQ_CST)) - break; - } - return oldval; -#endif -} - -INLINE_FUNCTION(float8_t) -__atomic_min_fp64(float8_t *ptr, float8_t fval) -{ -#ifdef __CUDACC__ - union { - unsigned long long ival; - float8_t fval; - } oldval, curval, newval; - - newval.fval = fval; - curval.fval = __volatileRead(ptr); - while (newval.fval < curval.fval) - { - oldval = curval; - curval.ival = atomicCAS((unsigned long long *)ptr, - oldval.ival, - newval.ival); - if (curval.ival == oldval.ival) - break; - } - return curval.fval; -#else - union { - uint64_t ival; - float8_t fval; - } oldval, newval; - - newval.fval = fval; - oldval.fval = __volatileRead(ptr); - while (oldval.fval > newval.fval) - { - if (__atomic_compare_exchange_n((uint64_t *)ptr, - &oldval.ival, - newval.ival, - false, - __ATOMIC_SEQ_CST, - __ATOMIC_SEQ_CST)) - break; - } - return oldval.fval; -#endif -} - -INLINE_FUNCTION(float8_t) -__atomic_max_fp64(float8_t *ptr, float8_t fval) -{ -#ifdef __CUDACC__ - union { - unsigned long long ival; - float8_t fval; - } oldval, curval, newval; - - newval.fval = fval; - curval.fval = __volatileRead(ptr); - while (newval.fval > curval.fval) - { - oldval = curval; - curval.ival = atomicCAS((unsigned long long *)ptr, - oldval.ival, - newval.ival); - if (curval.ival == oldval.ival) - break; - } - return curval.fval; -#else - union { - uint64_t ival; - float8_t fval; - } oldval, newval; - - newval.fval = fval; - oldval.fval = __volatileRead(ptr); - while (oldval.fval > newval.fval) - { - if (__atomic_compare_exchange_n((uint64_t *)ptr, - &oldval.ival, - newval.ival, - false, - __ATOMIC_SEQ_CST, - __ATOMIC_SEQ_CST)) - break; - } - return oldval.fval; -#endif -} - -INLINE_FUNCTION(uint32_t) -__atomic_cas_uint32(uint32_t *ptr, uint32_t comp, uint32_t newval) -{ -#ifdef __CUDACC__ - return atomicCAS((unsigned int *)ptr, - (unsigned int)comp, - (unsigned int)newval); -#else - __atomic_compare_exchange_n(ptr, - &comp, - newval, - false, - __ATOMIC_SEQ_CST, - __ATOMIC_SEQ_CST); - return comp; -#endif -} - -INLINE_FUNCTION(uint64_t) -__atomic_cas_uint64(uint64_t *ptr, uint64_t comp, uint64_t newval) -{ -#ifdef __CUDACC__ - return atomicCAS((unsigned long long int *)ptr, - (unsigned long long int)comp, - (unsigned long long int)newval); -#else - __atomic_compare_exchange_n(ptr, - &comp, - newval, - false, - __ATOMIC_SEQ_CST, - __ATOMIC_SEQ_CST); - return comp; -#endif -} - -/* - * Misc functions - */ -INLINE_FUNCTION(void) -print_kern_data_store(const kern_data_store *kds) -{ - printf("kds %p { length=%lu, nitems=%u, usage=%u, ncols=%u, format=%c, has_varlena=%c, tdhasoid=%c, tdtypeid=%u, tdtypmod=%d, table_oid=%u, hash_nslots=%u, block_offset=%u, block_nloaded=%u, nr_colmeta=%u }\n", - kds, - kds->length, - kds->nitems, - kds->usage, - kds->ncols, - kds->format, - kds->has_varlena ? 't' : 'f', - kds->tdhasoid ? 't' : 'f', - kds->tdtypeid, - kds->tdtypmod, - kds->table_oid, - kds->hash_nslots, - kds->block_offset, - kds->block_nloaded, - kds->nr_colmeta); - for (int j=0; j < kds->nr_colmeta; j++) - { - const kern_colmeta *cmeta = &kds->colmeta[j]; - - printf("cmeta[%d] { attbyval=%c, attalign=%d, attlen=%d, attnum=%d, attcacheoff=%d, atttypid=%u, atttypmod=%d, atttypkind=%c, kds_format=%c, kds_offset=%u, idx_subattrs=%u, num_subattrs=%u, attname='%s' }\n", - j, - cmeta->attbyval ? 't' : 'f', - (int)cmeta->attalign, - (int)cmeta->attlen, - (int)cmeta->attnum, - (int)cmeta->attcacheoff, - cmeta->atttypid, - cmeta->atttypmod, - cmeta->atttypkind, - cmeta->kds_format, - cmeta->kds_offset, - (unsigned int)cmeta->idx_subattrs, - (unsigned int)cmeta->num_subattrs, - cmeta->attname); - } -} #endif /* CUDA_COMMON_H */ diff --git a/src/pg_strom.h b/src/pg_strom.h index c30039e19..16a7b008c 100644 --- a/src/pg_strom.h +++ b/src/pg_strom.h @@ -199,8 +199,9 @@ typedef struct devtype_info const char *type_extension; const char *type_name; Oid type_namespace; - int type_sizeof; + int type_sizeof; /* sizeof(xpu_NAME_t) */ int type_alignof; + int kvec_sizeof; /* sizeof(kvec_NAME_t) */ devtype_hashfunc_f type_hashfunc; /* oid of type related functions */ Oid type_eqfunc; diff --git a/src/xpu_basetype.cu b/src/xpu_basetype.cu index f8301730b..fda47943d 100644 --- a/src/xpu_basetype.cu +++ b/src/xpu_basetype.cu @@ -96,6 +96,19 @@ xpu_bool_datum_comp(kern_context *kcxt, *p_comp = ((int)a->value - (int)b->value); return true; } +STATIC_FUNCTION(bool) +xpu_bool_datum_load_heap(kern_context *kcxt, + kvec_datum_t *__result, + int kvec_id, + const char *addr) +{ + kvec_bool_t *result = (kvec_bool_t *)__result; + + kvec_update_nullmask(&result->nullmask, kvec_id, addr); + if (addr) + result->values[kvec_id] = *((const bool *)addr); + return true; +} PGSTROM_SQLTYPE_OPERATORS(bool,true,1,sizeof(bool)); /* @@ -182,6 +195,19 @@ PGSTROM_SQLTYPE_OPERATORS(bool,true,1,sizeof(bool)); *p_comp = 0; \ return true; \ } \ + STATIC_FUNCTION(bool) \ + xpu_##NAME##_datum_load_heap(kern_context *kcxt, \ + kvec_datum_t *__result, \ + int kvec_id, \ + const char *addr) \ + { \ + kvec_##NAME##_t *result = (kvec_##NAME##_t *)__result; \ + \ + kvec_update_nullmask(&result->nullmask, kvec_id, addr); \ + if (addr) \ + result->values[kvec_id] = *((const BASETYPE *)addr); \ + return true; \ + } \ PGSTROM_SQLTYPE_OPERATORS(NAME,true,sizeof(BASETYPE),sizeof(BASETYPE)) PGSTROM_SIMPLE_INTEGER_TEMPLATE(int1, int8_t, i8); @@ -273,6 +299,19 @@ PGSTROM_SIMPLE_INTEGER_TEMPLATE(int8,int64_t,i64); *p_comp = 0; \ return true; \ } \ + STATIC_FUNCTION(bool) \ + xpu_##NAME##_datum_load_heap(kern_context *kcxt, \ + kvec_datum_t *__result, \ + int kvec_id, \ + const char *addr) \ + { \ + kvec_##NAME##_t *result = (kvec_##NAME##_t *)__result; \ + \ + kvec_update_nullmask(&result->nullmask, kvec_id, addr); \ + if (addr) \ + result->values[kvec_id] = *((const BASETYPE *)addr); \ + return true; \ + } \ PGSTROM_SQLTYPE_OPERATORS(NAME,true,sizeof(BASETYPE),sizeof(BASETYPE)) PGSTROM_SIMPLE_FLOAT_TEMPLATE(float2, float2_t, fp16, Half); diff --git a/src/xpu_basetype.h b/src/xpu_basetype.h index 14cd5a372..f169ef7ac 100644 --- a/src/xpu_basetype.h +++ b/src/xpu_basetype.h @@ -12,7 +12,6 @@ #ifndef XPU_BASETYPE_H #define XPU_BASETYPE_H - #ifndef PG_BOOLOID #define PG_BOOLOID 16 #endif /* PG_BOOLOID */ diff --git a/src/xpu_common.cu b/src/xpu_common.cu index d6bf5fe55..c58e9a46d 100644 --- a/src/xpu_common.cu +++ b/src/xpu_common.cu @@ -2805,6 +2805,15 @@ xpu_array_datum_comp(kern_context *kcxt, STROM_ELOG(kcxt, "xpu_array_datum_comp is not implemented"); return false; } +STATIC_FUNCTION(bool) +xpu_array_datum_load_heap(kern_context *kcxt, + kvec_datum_t *__result, + int kvec_id, + const char *addr) +{ + STROM_ELOG(kcxt, "xpu_composite_datum_heap_load is not implemented"); + return false; +} //MEMO: some array type uses typalign=4. is it ok? PGSTROM_SQLTYPE_OPERATORS(array,false,4,-1); @@ -2857,6 +2866,15 @@ xpu_composite_datum_comp(kern_context *kcxt, STROM_ELOG(kcxt, "xpu_composite_datum_comp is not implemented"); return false; } +STATIC_FUNCTION(bool) +xpu_composite_datum_load_heap(kern_context *kcxt, + kvec_datum_t *__result, + int kvec_id, + const char *addr) +{ + STROM_ELOG(kcxt, "xpu_composite_datum_heap_load is not implemented"); + return false; +} PGSTROM_SQLTYPE_OPERATORS(composite,false,8,-1); /* diff --git a/src/xpu_common.h b/src/xpu_common.h index 447f97b02..5a15793a7 100644 --- a/src/xpu_common.h +++ b/src/xpu_common.h @@ -244,6 +244,60 @@ __strcmp(const char *s1, const char *s2) return c1 - c2; } +/* ---------------------------------------------------------------- + * + * Fundamental CUDA definitions + * + * ---------------------------------------------------------------- + */ +#define WARPSIZE 32 +#define MAXTHREADS_PER_BLOCK 1024 +#define MAXWARPS_PER_BLOCK (MAXTHREADS_PER_BLOCK / WARPSIZE) +#define CUDA_L1_CACHELINE_SZ 128 + +#if defined(__CUDACC__) +/* Thread index at CUDA C++ */ +#define get_group_id() (blockIdx.x) +#define get_num_groups() (gridDim.x) +#define get_local_id() (threadIdx.x) +#define get_local_size() (blockDim.x) +#define get_global_id() (threadIdx.x + blockIdx.x * blockDim.x) +#define get_global_size() (blockDim.x * gridDim.x) + +/* Dynamic shared memory entrypoint */ +extern __shared__ char __pgstrom_dynamic_shared_workmem[] __MAXALIGNED__; +#define SHARED_WORKMEM(UNITSZ,INDEX) \ + (__pgstrom_dynamic_shared_workmem + (UNITSZ)*(INDEX)) + +/* Reference to the special registers */ +INLINE_FUNCTION(uint32_t) LaneId(void) +{ + uint32_t rv; + + asm volatile("mov.u32 %0, %laneid;" : "=r"(rv) ); + + return rv; +} + +INLINE_FUNCTION(uint32_t) DynamicShmemSize(void) +{ + uint32_t rv; + + asm volatile("mov.u32 %0, %dynamic_smem_size;" : "=r"(rv) ); + + return rv; +} + +INLINE_FUNCTION(uint32_t) TotalShmemSize(void) +{ + uint32_t rv; + + asm volatile("mov.u32 %0, %total_smem_size;" : "=r"(rv) ); + + return rv; +} +#endif /* __CUDACC__ */ + /* * TypeOpCode / FuncOpCode */ @@ -1472,6 +1526,58 @@ typedef struct toast_compress_header (((varattrib_4b *)(PTR))->va_4byte.va_header = (((uint32_t) (len)) << 2)) #endif /* POSTGRES_H */ +/* ---------------------------------------------------------------- + * + * Definition of vectorized xPU device data types + * + * ---------------------------------------------------------------- + */ +#define KVEC_UNITSZ 64 +#define KVEC_ALIGN(x) TYPEALIGN(16,(x)) /* 128bit alignment */ + +#define KVEC_DATUM_COMMON_FIELD \ + uint64_t nullmask + +typedef struct kvec_datum_t { + KVEC_DATUM_COMMON_FIELD; +} kvec_datum_t; + +INLINE_FUNCTION(void) +kvec_update_nullmask(uint64_t *p_nullmask, int kvec_id, const char *addr) +{ + uint64_t bits = (1UL << kvec_id); + uint64_t mask __attribute__ ((unused)); + + assert(kvec_id >= 0 && kvec_id < KVEC_UNITSZ); +#ifdef __CUDACC__ + if (LaneId() == 0) + mask = *p_nullmask; + mask = __shfl_sync(__activemask(), mask, 0); + if (!addr) + { + if ((mask & bits) == 0) + bits = 0; /* no change */ + } + else + { + if ((mask & bits) != 0) + bits = 0; /* no change */ + } + bits |= __shfl_xor_sync(__activemask(), bits, 0x01); + bits |= __shfl_xor_sync(__activemask(), bits, 0x02); + bits |= __shfl_xor_sync(__activemask(), bits, 0x04); + bits |= __shfl_xor_sync(__activemask(), bits, 0x08); + bits |= __shfl_xor_sync(__activemask(), bits, 0x10); + if (LaneId() == 0) + *p_nullmask = (mask ^ bits); +#else + if (!addr) + *p_nullmask &= ~bits; + else + *p_nullmask |= bits; +#endif +} + /* ---------------------------------------------------------------- * * Definitions for XPU device data types @@ -1517,21 +1623,51 @@ struct xpu_datum_operators { int *p_comp, /* out */ const xpu_datum_t *a, /* in */ const xpu_datum_t *b); /* in */ + //added for kvec support + bool (*xpu_datum_load_heap)(kern_context *kcxt, + kvec_datum_t *result, + int kvec_id, + const char *addr); + //how to handle arrow format? }; +#define __PGSTROM_SQLTYPE_SIMPLE_DECLARATION(NAME,BASETYPE) \ + typedef struct { \ + XPU_DATUM_COMMON_FIELD; \ + BASETYPE value; \ + } xpu_##NAME##_t; \ + EXTERN_DATA xpu_datum_operators xpu_##NAME##_ops #define PGSTROM_SQLTYPE_SIMPLE_DECLARATION(NAME,BASETYPE) \ + typedef struct { \ + KVEC_DATUM_COMMON_FIELD; \ + BASETYPE values[KVEC_UNITSZ]; \ + } kvec_##NAME##_t; \ typedef struct { \ XPU_DATUM_COMMON_FIELD; \ BASETYPE value; \ } xpu_##NAME##_t; \ EXTERN_DATA xpu_datum_operators xpu_##NAME##_ops + +#define __PGSTROM_SQLTYPE_VARLENA_DECLARATION(NAME) \ + typedef struct { \ + XPU_DATUM_COMMON_FIELD; \ + int length; /* -1, if PG verlena */ \ + const char *value; \ + } xpu_##NAME##_t; \ + EXTERN_DATA xpu_datum_operators xpu_##NAME##_ops #define PGSTROM_SQLTYPE_VARLENA_DECLARATION(NAME) \ + typedef struct { \ + KVEC_DATUM_COMMON_FIELD; \ + int length[KVEC_UNITSZ]; \ + const char *values[KVEC_UNITSZ]; \ + } kvec_##NAME##_t; \ typedef struct { \ XPU_DATUM_COMMON_FIELD; \ int length; /* -1, if PG verlena */ \ const char *value; \ } xpu_##NAME##_t; \ EXTERN_DATA xpu_datum_operators xpu_##NAME##_ops + #define PGSTROM_SQLTYPE_OPERATORS(NAME,TYPBYVAL,TYPALIGN,TYPLENGTH) \ PUBLIC_DATA xpu_datum_operators xpu_##NAME##_ops = { \ .xpu_type_name = #NAME, \ @@ -1546,6 +1682,7 @@ struct xpu_datum_operators { .xpu_datum_write = xpu_##NAME##_datum_write, \ .xpu_datum_hash = xpu_##NAME##_datum_hash, \ .xpu_datum_comp = xpu_##NAME##_datum_comp, \ + .xpu_datum_load_heap = xpu_##NAME##_datum_load_heap, \ } #include "xpu_basetype.h" @@ -2677,4 +2814,339 @@ KERN_MULTIRELS_GIST_INDEX(kern_multirels *kmrels, int dindex) offset = kmrels->chunks[dindex].gist_offset; return (kern_data_store *)(offset == 0 ? NULL : ((char *)kmrels + offset)); } + +/* ---------------------------------------------------------------- + * + * Atomic Operations + * + * ---------------------------------------------------------------- + */ +INLINE_FUNCTION(uint32_t) +__atomic_write_uint32(uint32_t *ptr, uint32_t ival) +{ +#ifdef __CUDACC__ + return atomicExch((unsigned int *)ptr, ival); +#else + return __atomic_exchange_n(ptr, ival, __ATOMIC_SEQ_CST); +#endif +} + +INLINE_FUNCTION(uint64_t) +__atomic_write_uint64(uint64_t *ptr, uint64_t ival) +{ +#ifdef __CUDACC__ + return atomicExch((unsigned long long int *)ptr, ival); +#else + return __atomic_exchange_n(ptr, ival, __ATOMIC_SEQ_CST); +#endif +} + +INLINE_FUNCTION(uint32_t) +__atomic_add_uint32(uint32_t *ptr, uint32_t ival) +{ +#ifdef __CUDACC__ + return atomicAdd((unsigned int *)ptr, (unsigned int)ival); +#else + return __atomic_fetch_add(ptr, ival, __ATOMIC_SEQ_CST); +#endif +} + +INLINE_FUNCTION(uint64_t) +__atomic_add_uint64(uint64_t *ptr, uint64_t ival) +{ +#ifdef __CUDACC__ + return atomicAdd((unsigned long long *)ptr, (unsigned long long)ival); +#else + return __atomic_fetch_add(ptr, ival, __ATOMIC_SEQ_CST); +#endif +} + +INLINE_FUNCTION(int64_t) +__atomic_add_int64(int64_t *ptr, int64_t ival) +{ +#ifdef __CUDACC__ + return atomicAdd((unsigned long long int *)ptr, (unsigned long long int)ival); +#else + return __atomic_fetch_add(ptr, ival, __ATOMIC_SEQ_CST); +#endif +} + +INLINE_FUNCTION(float8_t) +__atomic_add_fp64(float8_t *ptr, float8_t fval) +{ +#ifdef __CUDACC__ + return atomicAdd((double *)ptr, (double)fval); +#else + union { + uint64_t ival; + float8_t fval; + } oldval, newval; + + oldval.fval = __volatileRead(ptr); + do { + newval.fval = oldval.fval + fval; + } while (!__atomic_compare_exchange_n((uint64_t *)ptr, + &oldval.ival, + newval.ival, + false, + __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST)); + return oldval.fval; +#endif +} + +INLINE_FUNCTION(uint32_t) +__atomic_and_uint32(uint32_t *ptr, uint32_t mask) +{ +#ifdef __CUDACC__ + return atomicAnd((unsigned int *)ptr, (unsigned int)mask); +#else + return __atomic_fetch_and(ptr, mask, __ATOMIC_SEQ_CST); +#endif +} + +INLINE_FUNCTION(uint32_t) +__atomic_or_uint32(uint32_t *ptr, uint32_t mask) +{ +#ifdef __CUDACC__ + return atomicOr((unsigned int *)ptr, (unsigned int)mask); +#else + return __atomic_fetch_or(ptr, mask, __ATOMIC_SEQ_CST); +#endif +} + +INLINE_FUNCTION(uint32_t) +__atomic_max_uint32(uint32_t *ptr, uint32_t ival) +{ +#ifdef __CUDACC__ + return atomicMax((unsigned int *)ptr, (unsigned int)ival); +#else + uint32_t oldval = __volatileRead(ptr); + + while (oldval > ival) + { + if (__atomic_compare_exchange_n(ptr, + &oldval, + ival, + false, + __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST)) + break; + } + return oldval; +#endif +} + +INLINE_FUNCTION(int64_t) +__atomic_min_int64(int64_t *ptr, int64_t ival) +{ +#ifdef __CUDACC__ + return atomicMin((long long int *)ptr, (long long int)ival); +#else + int64_t oldval = __volatileRead(ptr); + + while (oldval > ival) + { + if (__atomic_compare_exchange_n(ptr, + &oldval, + ival, + false, + __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST)) + break; + } + return oldval; +#endif +} + +INLINE_FUNCTION(int64_t) +__atomic_max_int64(int64_t *ptr, int64_t ival) +{ +#ifdef __CUDACC__ + return atomicMax((long long int *)ptr, (long long int)ival); +#else + int64_t oldval = __volatileRead(ptr); + + while (oldval < ival) + { + if (__atomic_compare_exchange_n(ptr, + &oldval, + ival, + false, + __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST)) + break; + } + return oldval; +#endif +} + +INLINE_FUNCTION(float8_t) +__atomic_min_fp64(float8_t *ptr, float8_t fval) +{ +#ifdef __CUDACC__ + union { + unsigned long long ival; + float8_t fval; + } oldval, curval, newval; + + newval.fval = fval; + curval.fval = __volatileRead(ptr); + while (newval.fval < curval.fval) + { + oldval = curval; + curval.ival = atomicCAS((unsigned long long *)ptr, + oldval.ival, + newval.ival); + if (curval.ival == oldval.ival) + break; + } + return curval.fval; +#else + union { + uint64_t ival; + float8_t fval; + } oldval, newval; + + newval.fval = fval; + oldval.fval = __volatileRead(ptr); + while (oldval.fval > newval.fval) + { + if (__atomic_compare_exchange_n((uint64_t *)ptr, + &oldval.ival, + newval.ival, + false, + __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST)) + break; + } + return oldval.fval; +#endif +} + +INLINE_FUNCTION(float8_t) +__atomic_max_fp64(float8_t *ptr, float8_t fval) +{ +#ifdef __CUDACC__ + union { + unsigned long long ival; + float8_t fval; + } oldval, curval, newval; + + newval.fval = fval; + curval.fval = __volatileRead(ptr); + while (newval.fval > curval.fval) + { + oldval = curval; + curval.ival = atomicCAS((unsigned long long *)ptr, + oldval.ival, + newval.ival); + if (curval.ival == oldval.ival) + break; + } + return curval.fval; +#else + union { + uint64_t ival; + float8_t fval; + } oldval, newval; + + newval.fval = fval; + oldval.fval = __volatileRead(ptr); + while (oldval.fval > newval.fval) + { + if (__atomic_compare_exchange_n((uint64_t *)ptr, + &oldval.ival, + newval.ival, + false, + __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST)) + break; + } + return oldval.fval; +#endif +} + +INLINE_FUNCTION(uint32_t) +__atomic_cas_uint32(uint32_t *ptr, uint32_t comp, uint32_t newval) +{ +#ifdef __CUDACC__ + return atomicCAS((unsigned int *)ptr, + (unsigned int)comp, + (unsigned int)newval); +#else + __atomic_compare_exchange_n(ptr, + &comp, + newval, + false, + __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST); + return comp; +#endif +} + +INLINE_FUNCTION(uint64_t) +__atomic_cas_uint64(uint64_t *ptr, uint64_t comp, uint64_t newval) +{ +#ifdef __CUDACC__ + return atomicCAS((unsigned long long int *)ptr, + (unsigned long long int)comp, + (unsigned long long int)newval); +#else + __atomic_compare_exchange_n(ptr, + &comp, + newval, + false, + __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST); + return comp; +#endif +} + +/* ---------------------------------------------------------------- + * + * Misc functions + * + * ---------------------------------------------------------------- + */ +INLINE_FUNCTION(void) +print_kern_data_store(const kern_data_store *kds) +{ + printf("kds %p { length=%lu, nitems=%u, usage=%u, ncols=%u, format=%c, has_varlena=%c, tdhasoid=%c, tdtypeid=%u, tdtypmod=%d, table_oid=%u, hash_nslots=%u, block_offset=%u, block_nloaded=%u, nr_colmeta=%u }\n", + kds, + kds->length, + kds->nitems, + kds->usage, + kds->ncols, + kds->format, + kds->has_varlena ? 't' : 'f', + kds->tdhasoid ? 't' : 'f', + kds->tdtypeid, + kds->tdtypmod, + kds->table_oid, + kds->hash_nslots, + kds->block_offset, + kds->block_nloaded, + kds->nr_colmeta); + for (int j=0; j < kds->nr_colmeta; j++) + { + const kern_colmeta *cmeta = &kds->colmeta[j]; + + printf("cmeta[%d] { attbyval=%c, attalign=%d, attlen=%d, attnum=%d, attcacheoff=%d, atttypid=%u, atttypmod=%d, atttypkind=%c, kds_format=%c, kds_offset=%u, idx_subattrs=%u, num_subattrs=%u, attname='%s' }\n", + j, + cmeta->attbyval ? 't' : 'f', + (int)cmeta->attalign, + (int)cmeta->attlen, + (int)cmeta->attnum, + (int)cmeta->attcacheoff, + cmeta->atttypid, + cmeta->atttypmod, + cmeta->atttypkind, + cmeta->kds_format, + cmeta->kds_offset, + (unsigned int)cmeta->idx_subattrs, + (unsigned int)cmeta->num_subattrs, + cmeta->attname); + } +} #endif /* XPU_COMMON_H */ diff --git a/src/xpu_jsonlib.cu b/src/xpu_jsonlib.cu index 66c937823..7417561c4 100644 --- a/src/xpu_jsonlib.cu +++ b/src/xpu_jsonlib.cu @@ -324,6 +324,31 @@ xpu_jsonb_datum_comp(kern_context*kcxt, STROM_ELOG(kcxt, "device jsonb type has no compare handler"); return false; } + +STATIC_FUNCTION(bool) +xpu_jsonb_datum_load_heap(kern_context *kcxt, + kvec_datum_t *__result, + int kvec_id, + const char *addr) +{ + kvec_jsonb_t *result = (kvec_jsonb_t *)__result; + + kvec_update_nullmask(&result->nullmask, kvec_id, addr); + if (addr) + { + if (VARATT_IS_EXTERNAL(addr) || VARATT_IS_COMPRESSED(addr)) + { + result->values[kvec_id] = addr; + result->length[kvec_id] = -1; + } + else + { + result->values[kvec_id] = VARDATA_ANY(addr); + result->length[kvec_id] = VARSIZE_ANY_EXHDR(addr); + } + } + return true; +} PGSTROM_SQLTYPE_OPERATORS(jsonb,false,4,-1); /* ---------------------------------------------------------------- diff --git a/src/xpu_misclib.cu b/src/xpu_misclib.cu index 7869475bb..080b26b5f 100644 --- a/src/xpu_misclib.cu +++ b/src/xpu_misclib.cu @@ -626,6 +626,20 @@ xpu_money_datum_comp(kern_context *kcxt, *p_comp = 0; return true; } + +STATIC_FUNCTION(bool) +xpu_money_datum_load_heap(kern_context *kcxt, + kvec_datum_t *__result, + int kvec_id, + const char *addr) +{ + kvec_money_t *result = (kvec_money_t *)__result; + + kvec_update_nullmask(&result->nullmask, kvec_id, addr); + if (addr) + result->values[kvec_id] = *((const Cash *)addr); + return true; +} PGSTROM_SQLTYPE_OPERATORS(money, true, 8, sizeof(Cash)); PG_SIMPLE_COMPARE_TEMPLATE(cash_,money,money,) /* @@ -720,9 +734,6 @@ uuid_cmp_internal(const xpu_uuid_t *datum_a, return 0; } - - - STATIC_FUNCTION(bool) xpu_uuid_datum_comp(kern_context *kcxt, int *p_comp, @@ -738,6 +749,20 @@ xpu_uuid_datum_comp(kern_context *kcxt, UUID_LEN); return true; } + +STATIC_FUNCTION(bool) +xpu_uuid_datum_load_heap(kern_context *kcxt, + kvec_datum_t *__result, + int kvec_id, + const char *addr) +{ + kvec_uuid_t *result = (kvec_uuid_t *)__result; + + kvec_update_nullmask(&result->nullmask, kvec_id, addr); + if (addr) + memcpy(&result->values[kvec_id], addr, UUID_LEN); + return true; +} PGSTROM_SQLTYPE_OPERATORS(uuid, false, 1, UUID_LEN); #define PG_UUID_COMPARE_TEMPLATE(NAME,OPER) \ @@ -894,6 +919,20 @@ xpu_macaddr_datum_comp(kern_context *kcxt, *p_comp = macaddr_cmp_internal(a, b); return true; } + +STATIC_FUNCTION(bool) +xpu_macaddr_datum_load_heap(kern_context *kcxt, + kvec_datum_t *__result, + int kvec_id, + const char *addr) +{ + kvec_macaddr_t *result = (kvec_macaddr_t *)__result; + + kvec_update_nullmask(&result->nullmask, kvec_id, addr); + if (addr) + memcpy(&result->values[kvec_id], addr, sizeof(macaddr)); + return true; +} PGSTROM_SQLTYPE_OPERATORS(macaddr, false, 4, sizeof(macaddr)); #define PG_MACADDR_COMPARE_TEMPLATE(NAME,OPER) \ @@ -1174,6 +1213,58 @@ xpu_inet_datum_comp(kern_context *kcxt, *p_comp = inet_cmp_internal(a, b); return true; } + +STATIC_FUNCTION(bool) +xpu_inet_datum_load_heap(kern_context *kcxt, + kvec_datum_t *__result, + int kvec_id, + const char *addr) +{ + kvec_inet_t *result = (kvec_inet_t *)__result; + const inet_struct *in; + int sz; + + kvec_update_nullmask(&result->nullmask, kvec_id, addr); + if (addr) + { + if (VARATT_IS_EXTERNAL(addr) || VARATT_IS_COMPRESSED(addr)) + { + STROM_CPU_FALLBACK(kcxt, "inet value is compressed or toasted"); + return false; + } + in = (const inet_struct *)VARDATA_ANY(addr); + sz = VARSIZE_ANY_EXHDR(addr); + + if (sz == offsetof(inet_struct, ipaddr[4])) + { + if (in->family != PGSQL_AF_INET) + { + STROM_ELOG(kcxt, "inet (ipv4) value corruption"); + return false; + } + result->family[kvec_id] = in->family; + result->bits[kvec_id] = in->bits; + memcpy(&result->ipaddr[16 * kvec_id], in->ipaddr, 4); + } + else if (sz == offsetof(inet_struct, ipaddr[16])) + { + if (in->family != PGSQL_AF_INET6) + { + STROM_ELOG(kcxt, "inet (ipv6) value corruption"); + return false; + } + result->family[kvec_id] = in->family; + result->bits[kvec_id] = in->bits; + memcpy(&result->ipaddr[16 * kvec_id], in->ipaddr, 16); + } + else + { + STROM_ELOG(kcxt, "Bug? inet value is corrupted"); + return false; + } + } + return true; +} PGSTROM_SQLTYPE_OPERATORS(inet, false, 4, -1); #define PG_NETWORK_COMPARE_TEMPLATE(NAME,OPER) \ diff --git a/src/xpu_misclib.h b/src/xpu_misclib.h index 2b5f30a76..753c6239d 100644 --- a/src/xpu_misclib.h +++ b/src/xpu_misclib.h @@ -67,7 +67,14 @@ typedef struct PGSTROM_SQLTYPE_SIMPLE_DECLARATION(money, int64_t); PGSTROM_SQLTYPE_SIMPLE_DECLARATION(uuid, pg_uuid_t); PGSTROM_SQLTYPE_SIMPLE_DECLARATION(macaddr, macaddr); -PGSTROM_SQLTYPE_SIMPLE_DECLARATION(inet, inet_struct); + +typedef struct { + KVEC_DATUM_COMMON_FIELD; + uint8_t family[KVEC_UNITSZ]; + uint8_t bits[KVEC_UNITSZ]; + uint8_t ipaddr[16 * KVEC_UNITSZ]; +} kvec_inet_t; +__PGSTROM_SQLTYPE_SIMPLE_DECLARATION(inet, inet_struct); EXTERN_FUNCTION(int) xpu_interval_write_heap(kern_context *kcxt, diff --git a/src/xpu_numeric.cu b/src/xpu_numeric.cu index 986f3ed9d..eb5f78a1d 100644 --- a/src/xpu_numeric.cu +++ b/src/xpu_numeric.cu @@ -134,6 +134,28 @@ xpu_numeric_datum_comp(kern_context *kcxt, *p_comp = __numeric_compare(a, b); return true; } + + +STATIC_FUNCTION(bool) +xpu_numeric_datum_load_heap(kern_context *kcxt, + kvec_datum_t *__result, + int kvec_id, + const char *addr) +{ + kvec_numeric_t *result = (kvec_numeric_t *)__result; + + kvec_update_nullmask(&result->nullmask, kvec_id, addr); + if (addr) + { + xpu_numeric_t num; + + __xpu_numeric_from_varlena(&num, (const varlena *)addr); + result->kinds[kvec_id] = num.kind; + result->weights[kvec_id] = num.weight; + result->values[kvec_id] = num.value; + } + return true; +} PGSTROM_SQLTYPE_OPERATORS(numeric, false, 4, -1); PUBLIC_FUNCTION(bool) diff --git a/src/xpu_numeric.h b/src/xpu_numeric.h index 54d1283be..89be3f225 100644 --- a/src/xpu_numeric.h +++ b/src/xpu_numeric.h @@ -12,6 +12,13 @@ #ifndef XPU_NUMERIC_H #define XPU_NUMERIC_H +typedef struct { + KVEC_DATUM_COMMON_FIELD; + uint8_t kinds[KVEC_UNITSZ]; + int16_t weights[KVEC_UNITSZ]; + int128_t values[KVEC_UNITSZ]; +} kvec_numeric_t; + typedef struct { XPU_DATUM_COMMON_FIELD; uint8_t kind; /* one of XPU_NUMERIC_KIND__* below */ diff --git a/src/xpu_postgis.cu b/src/xpu_postgis.cu index 99d86ffef..d4657184e 100644 --- a/src/xpu_postgis.cu +++ b/src/xpu_postgis.cu @@ -516,6 +516,51 @@ xpu_geometry_datum_comp(kern_context *kcxt, STROM_ELOG(kcxt, "geometry type has no compare function"); return false; } + +STATIC_FUNCTION(bool) +xpu_geometry_datum_load_heap(kern_context *kcxt, + kvec_datum_t *__result, + int kvec_id, + const char *addr) +{ + kvec_geometry_t *result = (kvec_geometry_t *)__result; + + kvec_update_nullmask(&result->nullmask, kvec_id, addr); + if (addr) + { + if (VARATT_IS_EXTERNAL(addr) || VARATT_IS_COMPRESSED(addr)) + { + result->type[kvec_id] = GEOM_INVALID_VARLENA; + result->rawdata[kvec_id] = addr; + } + else + { + __GSERIALIZED *g = (__GSERIALIZED *)VARDATA_ANY(addr); + int32_t sz = VARSIZE_ANY_EXHDR(addr); + xpu_geometry_t geom; + + if ((g->gflags & G2FLAG_VER_0) != 0) + { + if (!__geometry_datum_ref_v2(kcxt, &geom, g, sz)) + return false; + } + else + { + if (!__geometry_datum_ref_v1(kcxt, &geom, g, sz)) + return false; + } + assert(!XPU_DATUM_ISNULL(&geom)); + result->type[kvec_id] = geom.type; + result->type[kvec_id] = geom.flags; + result->srid[kvec_id] = geom.srid; + result->nitems[kvec_id] = geom.nitems; + result->rawsize[kvec_id] = geom.rawsize; + result->rawdata[kvec_id] = geom.rawdata; + result->bbox[kvec_id] = geom.bbox; + } + } + return true; +} PGSTROM_SQLTYPE_OPERATORS(geometry,false,4,-1); /* ================================================================ @@ -595,6 +640,27 @@ xpu_box2df_datum_comp(kern_context *kcxt, STROM_ELOG(kcxt, "box2df type has no compare function"); return false; } + +STATIC_FUNCTION(bool) +xpu_box2df_datum_load_heap(kern_context *kcxt, + kvec_datum_t *__result, + int kvec_id, + const char *addr) +{ + kvec_box2df_t *result = (kvec_box2df_t *)__result; + + kvec_update_nullmask(&result->nullmask, kvec_id, addr); + if (addr) + { + const geom_bbox_2d *bbox = (const geom_bbox_2d *)addr; + + result->xmin[kvec_id] = bbox->xmin; + result->xmax[kvec_id] = bbox->xmax; + result->ymin[kvec_id] = bbox->ymin; + result->ymax[kvec_id] = bbox->ymax; + } + return true; +} PGSTROM_SQLTYPE_OPERATORS(box2df,false,1,sizeof(geom_bbox_2d)); /* ================================================================ diff --git a/src/xpu_postgis.h b/src/xpu_postgis.h index beb1f087e..b920b0205 100644 --- a/src/xpu_postgis.h +++ b/src/xpu_postgis.h @@ -66,6 +66,7 @@ typedef struct #define GEOM_TRIANGLETYPE 14 #define GEOM_TINTYPE 15 #define GEOM_NUMTYPES 16 +#define GEOM_INVALID_VARLENA 255 #define GEOM_TYPE_IS_VALID(gs_type) ((gs_type) >= 1 && (gs_type) <= GEOM_NUMTYPES) /* see LWFLAG_* in CPU code; at liblwgeom.h */ @@ -118,7 +119,14 @@ typedef union geom_bbox_4d d4; } geom_bbox; -PGSTROM_SQLTYPE_SIMPLE_DECLARATION(box2df, geom_bbox_2d); +typedef struct { + KVEC_DATUM_COMMON_FIELD; + float4_t xmin[KVEC_UNITSZ]; + float4_t xmax[KVEC_UNITSZ]; + float4_t ymin[KVEC_UNITSZ]; + float4_t ymax[KVEC_UNITSZ]; +} kvec_box2df_t; +__PGSTROM_SQLTYPE_SIMPLE_DECLARATION(box2df, geom_bbox_2d); INLINE_FUNCTION(size_t) geometry_bbox_size(uint32_t geom_flags) @@ -134,6 +142,18 @@ geometry_bbox_size(uint32_t geom_flags) #define SRID_MAXIMUM 999999 #define SRID_USER_MAXIMUM 998999 +typedef struct +{ + KVEC_DATUM_COMMON_FIELD; + uint8_t type[KVEC_UNITSZ]; + uint16_t flags[KVEC_UNITSZ]; + int32_t srid[KVEC_UNITSZ]; + uint32_t nitems[KVEC_UNITSZ]; + uint32_t rawsize[KVEC_UNITSZ]; + const char *rawdata[KVEC_UNITSZ]; + const geom_bbox *bbox[KVEC_UNITSZ]; +} kvec_geometry_t; + typedef struct { XPU_DATUM_COMMON_FIELD; @@ -178,121 +198,4 @@ typedef struct double x, y, z, m; } POINT4D; -#if 0 -/* for DATUM_CLASS__GEOMETRY */ -DEVICE_FUNCTION(cl_uint) -pg_geometry_datum_length(kern_context *kcxt, Datum datum); -DEVICE_FUNCTION(cl_uint) -pg_geometry_datum_write(kern_context *kcxt, char *dest, Datum datum); - -/* - * box2df operators & functions - */ -DEVICE_FUNCTION(pg_bool_t) -pgfn_geometry_overlaps(kern_context *kcxt, - const pg_geometry_t &arg1, - const pg_geometry_t &arg2); -DEVICE_FUNCTION(pg_bool_t) -pgfn_box2df_geometry_overlaps(kern_context *kcxt, - const pg_box2df_t &arg1, - const pg_geometry_t &arg2); -DEVICE_FUNCTION(pg_bool_t) -pgfn_geometry_contains(kern_context *kcxt, - const pg_geometry_t &arg1, - const pg_geometry_t &arg2); -DEVICE_FUNCTION(pg_bool_t) -pgfn_box2df_geometry_contains(kern_context *kcxt, - const pg_box2df_t &arg1, - const pg_geometry_t &arg2); -DEVICE_FUNCTION(pg_bool_t) -pgfn_geometry_within(kern_context *kcxt, - const pg_geometry_t &arg1, - const pg_geometry_t &arg2); -DEVICE_FUNCTION(pg_bool_t) -pgfn_box2df_geometry_within(kern_context *kcxt, - const pg_box2df_t &arg1, - const pg_geometry_t &arg2); - -DEVICE_FUNCTION(pg_geometry_t) -pgfn_st_expand(kern_context *kcxt, - const pg_geometry_t &arg1, pg_float8_t arg2); - -/* - * GiST index handlers - */ -DEVICE_FUNCTION(cl_bool) -pgindex_gist_geometry_overlap(kern_context *kcxt, - PageHeaderData *i_page, - const pg_box2df_t &i_var, - const pg_geometry_t &i_arg); -DEVICE_FUNCTION(cl_bool) -pgindex_gist_box2df_overlap(kern_context *kcxt, - PageHeaderData *i_page, - const pg_box2df_t &i_var, - const pg_box2df_t &i_arg); -DEVICE_FUNCTION(cl_bool) -pgindex_gist_geometry_contains(kern_context *kcxt, - PageHeaderData *i_page, - const pg_box2df_t &i_var, - const pg_geometry_t &i_arg); -DEVICE_FUNCTION(cl_bool) -pgindex_gist_box2df_contains(kern_context *kcxt, - PageHeaderData *i_page, - const pg_box2df_t &i_var, - const pg_box2df_t &i_arg); -DEVICE_FUNCTION(cl_bool) -pgindex_gist_geometry_contained(kern_context *kcxt, - PageHeaderData *i_page, - const pg_box2df_t &i_var, - const pg_geometry_t &i_arg); -DEVICE_FUNCTION(cl_bool) -pgindex_gist_box2df_contained(kern_context *kcxt, - PageHeaderData *i_page, - const pg_box2df_t &i_var, - const pg_box2df_t &i_arg); - -/* - * PostGIS functions - */ -DEVICE_FUNCTION(pg_geometry_t) -pgfn_st_setsrid(kern_context *kcxt, - const pg_geometry_t &arg1, pg_int4_t arg2); -DEVICE_FUNCTION(pg_geometry_t) -pgfn_st_makepoint2(kern_context *kcxt, - pg_float8_t x, pg_float8_t y); -DEVICE_FUNCTION(pg_geometry_t) -pgfn_st_makepoint3(kern_context *kcxt, - pg_float8_t x, pg_float8_t y, pg_float8_t z); -DEVICE_FUNCTION(pg_geometry_t) -pgfn_st_makepoint4(kern_context *kcxt, - pg_float8_t x, pg_float8_t y, - pg_float8_t z, pg_float8_t m); -DEVICE_FUNCTION(pg_float8_t) -pgfn_st_distance(kern_context *kcxt, - const pg_geometry_t &arg1, - const pg_geometry_t &arg2); -DEVICE_FUNCTION(pg_bool_t) -pgfn_st_dwithin(kern_context *kcxt, - const pg_geometry_t &arg1, - const pg_geometry_t &arg2, - pg_float8_t arg3); -DEVICE_FUNCTION(pg_int4_t) -pgfn_st_linecrossingdirection(kern_context *kcxt, - const pg_geometry_t &arg1, - const pg_geometry_t &arg2); -DEVICE_FUNCTION(pg_text_t) -pgfn_st_relate(kern_context *kcxt, - const pg_geometry_t &arg1, - const pg_geometry_t &arg2); -DEVICE_FUNCTION(pg_bool_t) -pgfn_st_contains(kern_context *kcxt, - const pg_geometry_t &arg1, - const pg_geometry_t &arg2); -DEVICE_FUNCTION(pg_bool_t) -pgfn_st_crosses(kern_context *kcxt, - const pg_geometry_t &arg1, - const pg_geometry_t &arg2); - -#endif - #endif /* XPU_POSTGIS_H */ diff --git a/src/xpu_textlib.cu b/src/xpu_textlib.cu index 578e0f91d..6440ce2c4 100644 --- a/src/xpu_textlib.cu +++ b/src/xpu_textlib.cu @@ -161,6 +161,33 @@ xpu_bpchar_datum_comp(kern_context *kcxt, *p_comp = comp; return true; } + +STATIC_FUNCTION(bool) +xpu_bpchar_datum_load_heap(kern_context *kcxt, + kvec_datum_t *__result, + int kvec_id, + const char *addr) +{ + kvec_bpchar_t *result = (kvec_bpchar_t *)__result; + + kvec_update_nullmask(&result->nullmask, kvec_id, addr); + if (addr) + { + if (VARATT_IS_EXTERNAL(addr) || VARATT_IS_COMPRESSED(addr)) + { + result->values[kvec_id] = addr; + result->length[kvec_id] = -1; + } + else + { + const char *str = VARDATA_ANY(addr); + + result->values[kvec_id] = str; + result->length[kvec_id] = bpchar_truelen(str, VARSIZE_ANY_EXHDR(addr)); + } + } + return true; +} PGSTROM_SQLTYPE_OPERATORS(bpchar, false, 4, -1); /* @@ -296,6 +323,31 @@ xpu_text_datum_comp(kern_context *kcxt, *p_comp = comp; return true; } + +STATIC_FUNCTION(bool) +xpu_text_datum_load_heap(kern_context *kcxt, + kvec_datum_t *__result, + int kvec_id, + const char *addr) +{ + kvec_text_t *result = (kvec_text_t *)__result; + + kvec_update_nullmask(&result->nullmask, kvec_id, addr); + if (addr) + { + if (VARATT_IS_EXTERNAL(addr) || VARATT_IS_COMPRESSED(addr)) + { + result->values[kvec_id] = addr; + result->length[kvec_id] = -1; + } + else + { + result->values[kvec_id] = VARDATA_ANY(addr); + result->length[kvec_id] = VARSIZE_ANY_EXHDR(addr); + } + } + return true; +} PGSTROM_SQLTYPE_OPERATORS(text, false, 4, -1); /* @@ -424,6 +476,31 @@ xpu_bytea_datum_comp(kern_context *kcxt, *p_comp = comp; return true; } + +STATIC_FUNCTION(bool) +xpu_bytea_datum_load_heap(kern_context *kcxt, + kvec_datum_t *__result, + int kvec_id, + const char *addr) +{ + kvec_bytea_t *result = (kvec_bytea_t *)__result; + + kvec_update_nullmask(&result->nullmask, kvec_id, addr); + if (addr) + { + if (VARATT_IS_EXTERNAL(addr) || VARATT_IS_COMPRESSED(addr)) + { + result->values[kvec_id] = addr; + result->length[kvec_id] = -1; + } + else + { + result->values[kvec_id] = VARDATA_ANY(addr); + result->length[kvec_id] = VARSIZE_ANY_EXHDR(addr); + } + } + return true; +} PGSTROM_SQLTYPE_OPERATORS(bytea, false, 4, -1); /* diff --git a/src/xpu_timelib.cu b/src/xpu_timelib.cu index 12cd02a90..6be231c01 100644 --- a/src/xpu_timelib.cu +++ b/src/xpu_timelib.cu @@ -145,6 +145,20 @@ xpu_date_datum_comp(kern_context *kcxt, *p_comp = 0; return true; } + +STATIC_FUNCTION(bool) +xpu_date_datum_load_heap(kern_context *kcxt, + kvec_datum_t *__result, + int kvec_id, + const char *addr) +{ + kvec_date_t *result = (kvec_date_t *)__result; + + kvec_update_nullmask(&result->nullmask, kvec_id, addr); + if (addr) + result->values[kvec_id] = *((const DateADT *)addr); + return true; +} PGSTROM_SQLTYPE_OPERATORS(date, true, 4, sizeof(DateADT)); /* @@ -235,6 +249,20 @@ xpu_time_datum_comp(kern_context *kcxt, *p_comp = 0; return true; } + +STATIC_FUNCTION(bool) +xpu_time_datum_load_heap(kern_context *kcxt, + kvec_datum_t *__result, + int kvec_id, + const char *addr) +{ + kvec_time_t *result = (kvec_time_t *)__result; + + kvec_update_nullmask(&result->nullmask, kvec_id, addr); + if (addr) + result->values[kvec_id] = *((const TimeADT *)addr); + return true; +} PGSTROM_SQLTYPE_OPERATORS(time, true, 8, sizeof(TimeADT)); /* @@ -315,11 +343,25 @@ STATIC_FUNCTION(bool) xpu_timetz_datum_comp(kern_context *kcxt, int *p_comp, const xpu_datum_t *__a, - const xpu_datum_t *__b) + const xpu_datum_t *__b) { STROM_ELOG(kcxt, "timetz has no compare handler"); return false; } + +STATIC_FUNCTION(bool) +xpu_timetz_datum_load_heap(kern_context *kcxt, + kvec_datum_t *__result, + int kvec_id, + const char *addr) +{ + kvec_timetz_t *result = (kvec_timetz_t *)__result; + + kvec_update_nullmask(&result->nullmask, kvec_id, addr); + if (addr) + memcpy(&result->values[kvec_id], addr, SizeOfTimeTzADT); + return true; +} PGSTROM_SQLTYPE_OPERATORS(timetz, false, 8, SizeOfTimeTzADT); /* @@ -410,6 +452,20 @@ xpu_timestamp_datum_comp(kern_context *kcxt, *p_comp = 0; return true; } + +STATIC_FUNCTION(bool) +xpu_timestamp_datum_load_heap(kern_context *kcxt, + kvec_datum_t *__result, + int kvec_id, + const char *addr) +{ + kvec_timestamp_t *result = (kvec_timestamp_t *)__result; + + kvec_update_nullmask(&result->nullmask, kvec_id, addr); + if (addr) + result->values[kvec_id] = *((const Timestamp *)addr); + return true; +} PGSTROM_SQLTYPE_OPERATORS(timestamp, true, 8, sizeof(Timestamp)); /* @@ -500,6 +556,20 @@ xpu_timestamptz_datum_comp(kern_context *kcxt, *p_comp = 0; return true; } + +STATIC_FUNCTION(bool) +xpu_timestamptz_datum_load_heap(kern_context *kcxt, + kvec_datum_t *__result, + int kvec_id, + const char *addr) +{ + kvec_timestamptz_t *result = (kvec_timestamptz_t *)__result; + + kvec_update_nullmask(&result->nullmask, kvec_id, addr); + if (addr) + result->values[kvec_id] = *((const TimestampTz *)addr); + return true; +} PGSTROM_SQLTYPE_OPERATORS(timestamptz, true, 8, sizeof(TimestampTz)); /* @@ -631,8 +701,21 @@ xpu_interval_datum_comp(kern_context *kcxt, *p_comp = (aval - bval); return true; } -PGSTROM_SQLTYPE_OPERATORS(interval, false, 8, sizeof(Interval)); +STATIC_FUNCTION(bool) +xpu_interval_datum_load_heap(kern_context *kcxt, + kvec_datum_t *__result, + int kvec_id, + const char *addr) +{ + kvec_interval_t *result = (kvec_interval_t *)__result; + + kvec_update_nullmask(&result->nullmask, kvec_id, addr); + if (addr) + memcpy(&result->values[kvec_id], addr, sizeof(Interval)); + return true; +} +PGSTROM_SQLTYPE_OPERATORS(interval, false, 8, sizeof(Interval)); STATIC_FUNCTION(int) date2j(int y, int m, int d)