From a151fd3c5b86a378ea3dde0a724b1d7345d3f170 Mon Sep 17 00:00:00 2001 From: KaiGai Kohei Date: Sat, 2 Mar 2024 00:34:39 +0900 Subject: [PATCH] pgstrom_setup_gpu_fatbin() also checks pre-built fatbin, if any --- src/Makefile | 8 +++++--- src/Makefile.cuda | 26 ++++++++++++++++++-------- src/gpu_device.c | 27 +++++++++++++++++++-------- 3 files changed, 42 insertions(+), 19 deletions(-) diff --git a/src/Makefile b/src/Makefile index 17d0e0b31..05ca84aa6 100644 --- a/src/Makefile +++ b/src/Makefile @@ -2,7 +2,6 @@ # PG-Strom Makefile # include ../Makefile.common -PGXS := $(shell $(PG_CONFIG) --pgxs) # # Source of PG-Strom host code @@ -66,11 +65,14 @@ MODULEDIR = pg_strom DATA = $(STROM_SQL) ../LICENSE Makefile.cuda \ $(CUDA_SRCS) $(CUDA_HEADERS) $(STROM_HEADERS) OBJS = $(STROM_OBJS) +ifeq ($(WITH_FATBIN),1) DATA_built = $(CUDA_FATBIN) +endif EXTRA_CLEAN = $(CUDA_OBJS) $(GENERATED-HEADERS) \ $(shell ls -d pgstrom-gpucode-V*-*.fatbin 2>/dev/null) EXTENSION = pg_strom +PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) # @@ -95,9 +97,9 @@ githash.c: # GPU Device Code # %.o: %.cu $(CUDA_HEADERS) - $(NVCC) $(NVCC_FLAGS) --device-c -o $@ $< + $(NVCC) $(NVCC_CFLAGS) --device-c -o $@ $< $(CUDA_FATBIN): $(CUDA_OBJS) - $(NVCC) $(NVCC_FLAGS) --device-link --fatbin -o $@ $(CUDA_OBJS) + $(NVCC) $(NVCC_LDFLAGS) --device-link --fatbin -o $@ $(CUDA_OBJS) fatbin: $(CUDA_FATBIN) diff --git a/src/Makefile.cuda b/src/Makefile.cuda index 509f04181..a392c0a76 100644 --- a/src/Makefile.cuda +++ b/src/Makefile.cuda @@ -25,16 +25,26 @@ NVCC_VERSION := $(shell $(NVCC) --version | grep ^Cuda | \ # (64k / 128 = up to 512 threads per SM) MAXREGCOUNT := 128 +__NUM_GPUS := $(shell ls -d /proc/driver/nvidia/gpus/*/information | wc -l) +ifeq ($(__NUM_GPUS),0) +__NVCC_TARGET := --gpu-architecture=compute_60 \ + --gpu-code=sm_60,sm_61,sm_70,sm_75,sm_80,sm_86,sm_89,sm_90 \ + --threads 8 +else +__NVCC_TARGET := --gpu-architecture=native +endif + # flags to build GPU libraries -__NVCC_FLAGS += -I $(shell $(PG_CONFIG) --includedir-server) \ - --maxrregcount=$(MAXREGCOUNT) \ - --source-in-ptx -lineinfo \ - -DHAVE_FLOAT2 \ - -Xnvlink --suppress-stack-size-warning \ - --gpu-architecture=native \ - --threads 4 +__NVCC_CFLAGS += -I $(shell $(PG_CONFIG) --includedir-server) \ + --maxrregcount=$(MAXREGCOUNT) \ + --source-in-ptx -lineinfo \ + -DHAVE_FLOAT2 \ + $(__NVCC_TARGET) +__NVCC_LDFLAGS += -Xnvlink --suppress-stack-size-warning \ + $(__NVCC_TARGET) # nvcc flags -NVCC_FLAGS = $(__NVCC_FLAGS) $(NVCC_FLAGS_CUSTOM) +NVCC_CFLAGS = $(__NVCC_CFLAGS) $(NVCC_FLAGS_CUSTOM) $(NVCC_CFLAGS_CUSTOM) +NVCC_LDFLAGS = $(__NVCC_LDFLAGS) $(NVCC_FLAGS_CUSTOM) $(NVCC_LDFLAGS_CUSTOM) # PG-Strom GPU Code __CUDA_CORE_FILES = xpu_common cuda_gpuscan cuda_gpujoin cuda_gpupreagg \ diff --git a/src/gpu_device.c b/src/gpu_device.c index 0f06f50ac..769300c9a 100644 --- a/src/gpu_device.c +++ b/src/gpu_device.c @@ -592,7 +592,8 @@ __validate_gpu_fatbin_file(const char *fatbin_dir, const char *fatbin_file) * __rebuild_gpu_fatbin_file */ static void -__rebuild_gpu_fatbin_file(const char *fatbin_file) +__rebuild_gpu_fatbin_file(const char *fatbin_dir, + const char *fatbin_file) { StringInfoData cmd; char workdir[200]; @@ -665,8 +666,8 @@ __rebuild_gpu_fatbin_file(const char *fatbin_file) appendStringInfo(&cmd, "mkdir -p '%s'; " "install -m 0644 %s/%s '%s'", - PGSTROM_FATBIN_DIR, - workdir, fatbin_file, PGSTROM_FATBIN_DIR); + fatbin_dir, + workdir, fatbin_file, fatbin_dir); strcpy(namebuf, CUDA_CORE_FILES); for (tok = strtok_r(namebuf, " ", &pos); tok != NULL; @@ -692,13 +693,23 @@ static void pgstrom_setup_gpu_fatbin(void) { const char *fatbin_file = __setup_gpu_fatbin_filename(); + const char *fatbin_dir = PGSHAREDIR "/pg_strom"; char *path; - if (!__validate_gpu_fatbin_file(PGSTROM_FATBIN_DIR, fatbin_file)) - __rebuild_gpu_fatbin_file(fatbin_file); - - path = alloca(strlen(fatbin_file) + 200); - sprintf(path, "%s/%s", PGSTROM_FATBIN_DIR, fatbin_file); + if (!__validate_gpu_fatbin_file(fatbin_dir, + fatbin_file)) + { + fatbin_dir = PGSTROM_FATBIN_DIR; + if (!__validate_gpu_fatbin_file(fatbin_dir, + fatbin_file)) + { + __rebuild_gpu_fatbin_file(fatbin_dir, + fatbin_file); + } + } + path = alloca(strlen(fatbin_dir) + + strlen(fatbin_file) + 100); + sprintf(path, "%s/%s", fatbin_dir, fatbin_file); pgstrom_fatbin_image_filename = strdup(path); if (!pgstrom_fatbin_image_filename) elog(ERROR, "out of memory");