Skip to content

Commit

Permalink
revised code build infractructure for more portable GPU code
Browse files Browse the repository at this point in the history
issue at heterodb#696
  • Loading branch information
kaigai committed Mar 1, 2024
1 parent e187ef6 commit 967567b
Show file tree
Hide file tree
Showing 6 changed files with 406 additions and 116 deletions.
41 changes: 19 additions & 22 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# PG-Strom Makefile
#
include ../Makefile.common
PGXS := $(shell $(PG_CONFIG) --pgxs)

#
# Source of PG-Strom host code
Expand All @@ -13,6 +14,7 @@ STROM_OBJS = main.o githash.o extra.o codegen.o misc.o executor.o \
arrow_fdw.o arrow_nodes.o \
pcie.o float2.o tinyint.o aggfuncs.o
GENERATED-HEADERS = gpu_devattrs.h githash.c
STROM_HEADERS = arrow_defs.h arrow_ipc.h float2.h

#
# Githash.c checker
Expand All @@ -23,13 +25,6 @@ GITHASH_CHECKS := $(shell grep -q \"$(PGSTROM_GITHASH)\" githash.c 2>/dev/null |
# Source of NVIDIA GPU device code
#
include Makefile.cuda
__CUDA_OBJS = xpu_common cuda_gpuscan cuda_gpujoin cuda_gpupreagg \
xpu_basetype xpu_numeric xpu_timelib xpu_textlib xpu_misclib \
xpu_jsonlib xpu_postgis
CUDA_HEADERS = cuda_common.h xpu_common.h xpu_opcodes.h xpu_basetype.h \
xpu_numeric.h xpu_textlib.h xpu_timelib.h xpu_misclib.h \
xpu_jsonlib.h xpu_postgis.h
CUDA_OBJS = $(addsuffix .fatbin,$(__CUDA_OBJS))

#
# Installation Scripts
Expand All @@ -54,10 +49,12 @@ endif
ifneq ($(PGSTROM_GITHASH),)
PGSTROM_FLAGS += -DPGSTROM_GITHASH=\"$(PGSTROM_GITHASH)\"
endif
PGSTROM_FLAGS += -DPGSHAREDIR=\"$(shell $(PG_CONFIG) --sharedir)\"
PGSTROM_FLAGS += -DCUDA_MAXREGCOUNT=$(MAXREGCOUNT)
PGSTROM_FLAGS += -DCUDA_BUILTIN_OBJS="\"$(__CUDA_OBJS)\""
PGSTROM_FLAGS += -DNVCC_VERSION=$(NVCC_VERSION)
PGSTROM_FLAGS += -DPGSHAREDIR=\"$(shell $(PG_CONFIG) --sharedir)\" \
-DPGINCLUDEDIR=\"$(shell $(PG_CONFIG) --includedir-server)\" \
-DCUDA_MAXREGCOUNT=$(MAXREGCOUNT) \
-DCUDA_CORE_FILES="\"$(__CUDA_CORE_FILES)\"" \
-DCUDA_CORE_HEADERS="\"$(__CUDA_CORE_HEADERS)\"" \
-DCUDA_TOOLKIT_BASEDIR="\"$(CUDA_PATH)\""
PG_CPPFLAGS := $(PGSTROM_FLAGS) -I $(CUDA_IPATH)
SHLIB_LINK := -L $(CUDA_LPATH) -lcuda

Expand All @@ -66,13 +63,14 @@ SHLIB_LINK := -L $(CUDA_LPATH) -lcuda
#
MODULE_big = pg_strom
MODULEDIR = pg_strom
DATA = $(STROM_SQL) ../LICENSE Makefile.cuda
DATA = $(STROM_SQL) ../LICENSE Makefile.cuda \
$(CUDA_SRCS) $(CUDA_HEADERS) $(STROM_HEADERS)
OBJS = $(STROM_OBJS)
DATA_built = $(CUDA_OBJS)
EXTRA_CLEAN = $(DATA_built) $(GENERATED-HEADERS)
DATA_built = $(CUDA_FATBIN)
EXTRA_CLEAN = $(CUDA_OBJS) $(GENERATED-HEADERS) \
$(shell ls -d pgstrom-gpucode-V*-*.fatbin 2>/dev/null)
EXTENSION = pg_strom

PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)

#
Expand All @@ -96,11 +94,10 @@ githash.c:
#
# GPU Device Code
#
%.fatbin: %.cu $(CUDA_HEADERS)
$(NVCC) $(NVCC_FLAGS) -o $@ $<
%.o: %.cu $(CUDA_HEADERS)
$(NVCC) $(NVCC_FLAGS) --device-c -o $@ $<

#
# clean up only host code binary
#
clean-host:
rm -f $(MODULE_big).so $(OBJS)
$(CUDA_FATBIN): $(CUDA_OBJS)
$(NVCC) $(NVCC_FLAGS) --device-link --fatbin -o $@ $(CUDA_OBJS)

fatbin: $(CUDA_FATBIN)
41 changes: 21 additions & 20 deletions src/Makefile.cuda
Original file line number Diff line number Diff line change
Expand Up @@ -16,34 +16,35 @@ CUDA_PATH := $(shell for x in $(CUDA_PATH_LIST); \
CUDA_IPATH := $(CUDA_PATH)/include
CUDA_BPATH := $(CUDA_PATH)/bin
CUDA_LPATH := $(CUDA_PATH)/lib64
NVCC := $(CUDA_PATH)/bin/nvcc
NVCC := $(CUDA_BPATH)/nvcc
NVCC_VERSION := $(shell $(NVCC) --version | grep ^Cuda | \
grep -o -E 'V[0-9\.]+$$' | \
sed -e 's/V//g' -e 's/\./ /g' | \
awk '{ print $$1 * 1000 + $$2; }')
awk '{ print $$1 * 1000 + $$2 * 10; }')
# Max number of registers per GPU thread for PG-Strom modules
# (64k / 128 = up to 512 threads per SM)
MAXREGCOUNT := 128

# flags to build GPU libraries
__NVCC_FLAGS ?= $(NVCC_FLAGS_CUSTOM)
__NVCC_FLAGS += -I $(shell $(PG_CONFIG) --includedir-server) \
--fatbin \
--maxrregcount=$(MAXREGCOUNT) \
--gpu-architecture=compute_60

# supported device depends on CUDA version
# don't forget to update the logic of target_cc in cuda_program.c
ifeq ($(shell test $(NVCC_VERSION) -ge 11080; echo $$?), 0)
__NVCC_FLAGS += --gpu-code=sm_60,sm_61,sm_70,sm_75,sm_80,sm_86,sm_89,sm_90
__NVCC_NTHREADS := 8
else
#error "CUDA Toolkit version is too old (must be 11.8 or later)"
endif
__NVCC_FLAGS += --threads $(__NVCC_NTHREADS)
__NVCC_FLAGS += --source-in-ptx
__NVCC_FLAGS += -DHAVE_FLOAT2
__NVCC_FLAGS += --relocatable-device-code=true

--source-in-ptx -lineinfo \
-DHAVE_FLOAT2 \
-Xnvlink --suppress-stack-size-warning \
--gpu-architecture=native \
--threads 4
# nvcc flags
NVCC_FLAGS = $(__NVCC_FLAGS) -lineinfo
NVCC_FLAGS = $(__NVCC_FLAGS) $(NVCC_FLAGS_CUSTOM)

# PG-Strom GPU Code
__CUDA_CORE_FILES = xpu_common cuda_gpuscan cuda_gpujoin cuda_gpupreagg \
xpu_basetype xpu_numeric xpu_timelib xpu_textlib \
xpu_misclib xpu_jsonlib xpu_postgis
__CUDA_CORE_HEADERS = cuda_common.h xpu_common.h xpu_opcodes.h xpu_basetype.h \
xpu_numeric.h xpu_textlib.h xpu_timelib.h xpu_misclib.h \
xpu_jsonlib.h xpu_postgis.h
CUDA_OBJS = $(addsuffix .o, $(__CUDA_CORE_FILES) $(CUDA_CUSTOM_FILES))
CUDA_SRCS = $(addsuffix .cu,$(__CUDA_CORE_FILES) $(CUDA_CUSTOM_FILES))
CUDA_HEADERS = $(__CUDA_CORE_HEADERS) $(CUDA_CUSTOM_HEADERS)
CUDA_MD5SUM = $(shell cat $(CUDA_HEADERS) $(CUDA_SRCS) | md5sum | awk '{print $$1}')
CUDA_FATBIN = pgstrom-gpucode-V$(NVCC_VERSION)-$(CUDA_MD5SUM).fatbin
Loading

0 comments on commit 967567b

Please sign in to comment.