diff --git a/README.md b/README.md index 54c891dd4..e5c66683d 100644 --- a/README.md +++ b/README.md @@ -126,14 +126,10 @@ grbrun ./a.out In more detail, the steps to follow are: -1. Edit the `include/graphblas/base/config.hpp`. In particular, please ensure - that `config::SIMD_SIZE::bytes` defined in that file is set correctly with - respect to the target architecture. - -2. Create an empty directory for building ALP and move into it: +1. Create an empty directory for building ALP and move into it: `mkdir build && cd build`. -3. Invoke the `bootstrap.sh` script located inside the ALP root directory +2. Invoke the `bootstrap.sh` script located inside the ALP root directory `` to generate the build infrastructure via CMake inside the the current directory: @@ -142,9 +138,9 @@ In more detail, the steps to follow are: - note: add `--with-lpf=/path/to/lpf/install/dir` if you have LPF installed and would like to use it. -4. Issue `make -j` to compile the C++11 ALP library for the configured backends. +3. Issue `make -j` to compile the C++11 ALP library for the configured backends. -5. (*Optional*) To later run all unit tests, several datasets must be made +4. (*Optional*) To later run all unit tests, several datasets must be made available. Please run the `/tools/downloadDatasets.sh` script for @@ -152,7 +148,7 @@ In more detail, the steps to follow are: b. the option to automatically download them. -6. (*Optional*) To make the ALP documentation, issue `make userdocs`. This +5. (*Optional*) To make the ALP documentation, issue `make userdocs`. This generates both a. LaTeX in `/docs/user/latex/refman.tex`, and @@ -162,7 +158,7 @@ In more detail, the steps to follow are: To build a PDF from the LaTeX sources, cd into the directory mentioned, and issue `make`. -7. (*Optional*) Issue `make -j smoketests` to run a quick set of functional +6. (*Optional*) Issue `make -j smoketests` to run a quick set of functional tests. Please scan the output for any failed tests. If you do this with LPF enabled, and LPF was configured to use an MPI engine (which is the default), and the MPI implementation used is _not_ MPICH, then @@ -171,15 +167,15 @@ In more detail, the steps to follow are: implementation you used, and uncomment the lines directly below each occurrence. -8. (*Optional*) Issue `make -j unittests` to run an exhaustive set of unit +7. (*Optional*) Issue `make -j unittests` to run an exhaustive set of unit tests. Please scan the output for any failed tests. If you do this with LPF enabled, please edit `tests/parse_env.sh` if required as described in step 5. -9. Issue `make -j install` to install ALP into the install directory configured +8. Issue `make -j install` to install ALP into the install directory configured during step 1. -10. (*Optional*) Issue `source /bin/setenv` to make +9. (*Optional*) Issue `source /bin/setenv` to make available the `grbcxx` and `grbrun` compiler wrapper and runner. Congratulations, you are now ready for developing and integrating ALP @@ -230,6 +226,8 @@ and lists technical papers. - [Development in ALP](#development-in-alp) - [Acknowledgements](#acknowledgements) - [Citing ALP, ALP/GraphBLAS, and ALP/Pregel](#citing-alp-alpgraphblas-and-alppregel) + - [ALP and ALP/GraphBLAS](#alp-and-alpgraphblas) + - [ALP/Pregel](#alppregel) # Configuration @@ -259,6 +257,8 @@ classes of backends. The main configuration file is found in 8. type used for indexing nonzeroes, as the `NonzeroIndexType` typedef; 9. index type used for vector coordinates, as the `VectorIndexType` typedef. +The most important parameters are automatically detected during the CMake +configuration (vector size, cache line size, L1 Data cache size). Other configuration values in this file are automatically inferred, are fixed non-configurable settings, or are presently not used by any ALP backend. @@ -418,10 +418,11 @@ large outputs is strongly advisable. ### Compilation -Our backends auto-vectorise, hence please recall step 1 from the quick start -guide, and make sure the `include/graphblas/base/config.hpp` file reflects the -correct value for `config::SIMD_SIZE::bytes`. This value must be updated prior -to the compilation and installation of ALP. +Our backends auto-vectorise using the information in the +`include/graphblas/base/config.hpp`, especially `config::SIMD_SIZE::bytes`. +This and other values are automatically detected for the CPU the CMake +configuration runs on; the user may however want to set it to a different value +manually prior to the compilation and installation of ALP. When targeting different architectures with differing SIMD widths, different ALP installations for different architectures could be maintained. diff --git a/cmake/AddGRBInstall.cmake b/cmake/AddGRBInstall.cmake index 94bd58f31..f5f3ad178 100644 --- a/cmake/AddGRBInstall.cmake +++ b/cmake/AddGRBInstall.cmake @@ -33,6 +33,9 @@ set( ALP_UTILS_LIBRARY_OUTPUT_NAME "alp_utils" ) set( BINARY_LIBRARIES_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/lib" ) set( CMAKE_CONFIGS_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/cmake" ) set( NAMESPACE_NAME "ALPGraphBLAS") +set( ARCH_DETECT_APPS_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/arch_info ) +set( ARCH_DETECT_APPS_INSTALL_DIR ${BIN_INSTALL_DIR}/arch_info ) + # installation export unit for ALL targets install( EXPORT GraphBLASTargets diff --git a/cmake/CompileFlags.cmake b/cmake/CompileFlags.cmake index 4c6c1d862..94be6e012 100644 --- a/cmake/CompileFlags.cmake +++ b/cmake/CompileFlags.cmake @@ -35,6 +35,10 @@ assert_defined_variables( TEST_PERFORMANCE_DEFINITIONS TEST_PERFORMANCE_OPTIONS ) +### ARCH INFO DETECTION +include( DetectArchInfo ) +assert_valid_variables( SIMD_SIZE L1DCACHE_SIZE CACHE_LINE_SIZE ) + # allow only Relase, Debug and Coverage set( CMAKE_CONFIGURATION_TYPES "Release;Debug;Coverage" CACHE STRING "Add the configurations that we need" FORCE @@ -60,10 +64,18 @@ endif() set( COMMON_OPTS "-g" "-Wall" "-Wextra" ) +set( arch_defs "_SIMD_SIZE=${SIMD_SIZE};_L1DCACHE_SIZE=${L1DCACHE_SIZE};_CACHE_LINE_SIZE=${CACHE_LINE_SIZE}" ) + # cache variable to allow manual tweaks from CMake cache set_valid_string( COMMON_DEFS_Release "${COMMON_COMPILE_DEFINITIONS}" "" ) set_valid_string( COMMON_DEFS_Debug "${COMMON_COMPILE_DEFINITIONS}" "" ) set_valid_string( COMMON_DEFS_Coverage "${COMMON_COMPILE_DEFINITIONS}" "" ) + +list( PREPEND COMMON_DEFS_Release ${arch_defs} ) +list( PREPEND COMMON_DEFS_Debug ${arch_defs} ) +list( PREPEND COMMON_DEFS_Coverage ${arch_defs} ) + + set_valid_string( COMMON_OPTS_Release "${COMMON_COMPILE_OPTIONS}" "${COMMON_OPTS}" ) diff --git a/cmake/DetectArchInfo.cmake b/cmake/DetectArchInfo.cmake new file mode 100644 index 000000000..3c7128a10 --- /dev/null +++ b/cmake/DetectArchInfo.cmake @@ -0,0 +1,127 @@ +# +# Copyright 2024 Huawei Technologies Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +#[===================================================================[ +Detect Architectural Info for the system CPU + +Three parameters are detected and used during compilation: +1. maximum supported size of the SIMD vector +2. size of the L1 Data cache (shortly L1D) +3. size of the L1D cache line (typically the same for all caches) + +If any of this information cannot be gathered from hardware, a default is used. +#]===================================================================] + +assert_valid_variables( ARCH_DETECT_APPS_DIR ) + +set( _supported_arches "x86_64;aarch64" ) +if( NOT CMAKE_SYSTEM_PROCESSOR IN_LIST _supported_arches ) + message( WARNING "Architecture \"${CMAKE_SYSTEM_PROCESSOR}\" not supported" ) +else() + set( _supported_arch ON ) +endif() + +set( DEFAULT_SIMD_SIZE 64 ) +set( DEFAULT_L1CACHE_SIZE 32768 ) +set( DEFAULT_CACHE_LINE_SIZE 64 ) + +if( CMAKE_VERSION VERSION_LESS "3.25.0" ) + # old CMake versions have a different signature for try_compile() + # https://cmake.org/cmake/help/latest/command/try_run.html#try-compiling-and-running-source-files + set( _dest ${CMAKE_CURRENT_BINARY_DIR} ) +endif() + +# compile executable to detect SIMD ISA and run it +set( ARCH_DETECT_APPS_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/arch_info ) +set( _simd_detect_destination detect_simd_isa ) + +set( SIMD_ISA_DETECT_APP OFF ) +# compile and also copy the file to a known folder in order to use it in the +# installation infrastructure: the grbcxx script needs it +if( _supported_arch ) + try_compile( COMPILED ${_dest} SOURCES ${CMAKE_SOURCE_DIR}/cmake/${CMAKE_SYSTEM_PROCESSOR}_simd_detect.c + COPY_FILE ${ARCH_DETECT_APPS_DIR}/${_simd_detect_destination} + COPY_FILE_ERROR COPY_MSG + ) +endif() +if( COMPILED ) + # attemtp to run the compiled app + execute_process( + COMMAND ${ARCH_DETECT_APPS_DIR}/${_simd_detect_destination} + RESULT_VARIABLE RES + OUTPUT_VARIABLE SIMD_ISA + OUTPUT_STRIP_TRAILING_WHITESPACE + ) +endif() + +if( NOT COMPILED OR ( NOT RES STREQUAL "0" ) OR COPY_MSG ) + # if we could not compile or run, set defaults + set( SIMD_SIZE ${DEFAULT_SIMD_SIZE} ) + message( WARNING "Cannot detect SIMD ISA, thus applying default vector size: ${SIMD_SIZE}B" ) +else() + # set vector size based on detected SIMD ISA and wanr in case of SVE or SVE2 + # not yet implemented + set( SIMD_ISA_DETECT_APP ${_simd_detect_destination} ) + if( SIMD_ISA STREQUAL "SVE" OR SIMD_ISA STREQUAL "SVE2" ) + set( SIMD_SIZE 64 ) + message( WARNING "Detected SIMD ISA ${SIMD_ISA}, whose size is implementation-dependent and currently not detected. Please, consider filing an issue to the authors. Applying default vector size: ${SIMD_SIZE}B" ) + else() + if( SIMD_ISA STREQUAL "AVX512" ) + set( SIMD_SIZE 64 ) + elseif( SIMD_ISA STREQUAL "AVX2" ) + set( SIMD_SIZE 32 ) + elseif( SIMD_ISA STREQUAL "AVX" ) + set( SIMD_SIZE 16 ) + elseif( SIMD_ISA STREQUAL "NEON" ) + set( SIMD_SIZE 16 ) + endif() + message( "Detected SIMD ISA: ${SIMD_ISA}; vector size : ${SIMD_SIZE}B" ) + endif() +endif() + +set( L1CACHE_DETECT_APP OFF ) +# for L1D information, use a Bash script, so just try to run it +execute_process( + COMMAND ${CMAKE_SOURCE_DIR}/cmake/l1_cache_info.sh + RESULT_VARIABLE RES + OUTPUT_VARIABLE CACHE_DETECT_OUTPUT + OUTPUT_STRIP_TRAILING_WHITESPACE +) +# copy the script to the build infrastructure, for testing and for installation +file( COPY ${CMAKE_SOURCE_DIR}/cmake/l1_cache_info.sh DESTINATION ${ARCH_DETECT_APPS_DIR} ) +if( NOT RES STREQUAL "0" ) + # could not run properly, set defaults + set( L1CACHE_SIZE ${DEFAULT_L1CACHE_SIZE} ) + set( CACHE_LINE_SIZE ${DEFAULT_CACHE_LINE_SIZE} ) + message( WARNING "Cannot detect L1 cache features, thus applying default settigs" ) +else() + set( L1CACHE_DETECT_APP l1_cache_info.sh ) + # parse multi-lines output and get each info; example output: + # TYPE: Data + # SIZE: 32768 + # LINE: 64 + string( REGEX MATCHALL + "TYPE:[ \t]*(Data|Unified)[ \t\r\n]+SIZE:[ \t]*([0-9]+)[ \t\r\n]+LINE:[ \t]*([0-9]+)[ \t\r\n]*" + MATCH_OUTPUT "${CACHE_DETECT_OUTPUT}" + ) + set( L1DCACHE_TYPE ${CMAKE_MATCH_1} ) + set( L1DCACHE_SIZE ${CMAKE_MATCH_2} ) + set( CACHE_LINE_SIZE ${CMAKE_MATCH_3} ) + if( L1DCACHE_TYPE STREQUAL "Unified" ) + message( WARNING "The L1 cache is Unified, so it may not be possible to effectively utilize its entire size (${L1DCACHE_SIZE}B) for the data." ) + endif() +endif() +message( "L1 cache size: ${L1DCACHE_SIZE}B; cacheline size: ${CACHE_LINE_SIZE}B" ) diff --git a/cmake/aarch64_simd_detect.c b/cmake/aarch64_simd_detect.c new file mode 100644 index 000000000..094e5880d --- /dev/null +++ b/cmake/aarch64_simd_detect.c @@ -0,0 +1,58 @@ + +/* + * Copyright 2024 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +/* + * Check the supported SIMD ISA in an ARM architecture, via getauxval(): + * https://man7.org/linux/man-pages/man3/getauxval.3.html + * + * Note that support for SVE2 may be too recent for the kernel/GLIBC version in + * use, hence the #ifdef on HWCAP2_SVE2. + * https://docs.kernel.org/arch/arm64/elf_hwcaps.html + * + * Also note that SVE (and SVE2) has implementation-dependent vector size, whose + * retrieval is currently not implemented; the build infrastructure properly + * warns about this case. + */ + +int main() { + +#ifdef HWCAP2_SVE2 + if( getauxval( AT_HWCAP2 ) & HWCAP2_SVE2 ) { + printf( "SVE2\n" ); + return 0; + } +#endif + + int retval = 0; + const unsigned long flags = getauxval( AT_HWCAP ); +#ifdef HWCAP_SVE + if( flags & HWCAP_SVE ) { + printf("SVE"); + } else +#endif + if ( flags & HWCAP_ASIMD ) { + printf( "NEON" ); + } else { + printf( "no SIMD ISA detected!" ); + retval = 1; + } + printf( "\n" ); + return retval; +} diff --git a/cmake/l1_cache_info.sh b/cmake/l1_cache_info.sh new file mode 100755 index 000000000..62353aed1 --- /dev/null +++ b/cmake/l1_cache_info.sh @@ -0,0 +1,73 @@ +#!/bin/bash + +# +# Copyright 2024 Huawei Technologies Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Detect information about the L1D cache from the SYSFS interface, which is in +# Linux since 2008 + +function symbolic_to_bytes { + local symbolic_size="$1" + local _symbolic_size=${symbolic_size//M/*1024*1024} + local _byte_size=${_symbolic_size//K/*1024} + echo $((_byte_size)) +} + +function test_not_zero { + if [[ "$1" == "0" ]]; then + echo "0 value: makes no sense!" + exit 1 + fi +} + +INFO_ROOT="/sys/devices/system/cpu/cpu0/cache" + +# look for Data cache (Harvard architecture); Unified is also accepted +for f in ${INFO_ROOT}/index*; do + level=$(cat ${f}/level) + if [[ "$?" != "0" ]]; then + echo "error detecting the cache level" + exit 1 + fi + if [[ "${level}" != "1" ]]; then + # if it's not a cache of level 1, skip + continue + fi + type=$(cat ${f}/type) + + if [[ "${type}" == "Data" || "${type}" == "Unified" ]]; then + cache_dir=${f} + break + fi +done + +if [[ -z "${cache_dir}" ]]; then + echo "cannot find cache info" + exit 1 +fi + +echo "TYPE: ${type}" + +cache_symbolic_size=$(cat ${cache_dir}/size) +cache_byte_size=$(symbolic_to_bytes ${cache_symbolic_size}) +test_not_zero "${cache_byte_size}" +echo "SIZE: ${cache_byte_size}" + +symbolic_line_size=$(cat ${cache_dir}/coherency_line_size) +line_byte_size=$(symbolic_to_bytes ${symbolic_line_size}) + +test_not_zero "${line_byte_size}" +echo "LINE: ${line_byte_size}" diff --git a/cmake/x86_64_simd_detect.c b/cmake/x86_64_simd_detect.c new file mode 100644 index 000000000..b25fb97db --- /dev/null +++ b/cmake/x86_64_simd_detect.c @@ -0,0 +1,47 @@ + +/* + * Copyright 2024 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +/* + * This file detects the SIMD ISA for x86 architectures using compiler built-in functionalities + * from https://gcc.gnu.org/onlinedocs/gcc/x86-Built-in-Functions.html#index-_005f_005fbuiltin_005fcpu_005fsupports-1 + * also supported in clang. + * + * Note that the SIMD support can be advertised by the CPU (e.g., via the CPUID + * instruction) despite being disabled by the Operating System. + * The compiler's built-in functions check both conditions. + */ + +int main() { + __builtin_cpu_init (); + int retval = 0; + if (__builtin_cpu_supports( "avx512f" ) ) { + printf( "AVX512" ); + } else if ( __builtin_cpu_supports( "avx2" ) ) { + printf( "AVX2" ); + } else if ( __builtin_cpu_supports( "avx" ) ) { + printf( "AVX" ); + } else if ( __builtin_cpu_supports( "sse" ) ) { + printf( "sse" ); + } else { + printf( "no SIMD ISA detected!" ); + retval = 1; + } + printf( "\n" ); + return retval; +} diff --git a/docs/Build_and_test_infra.md b/docs/Build_and_test_infra.md index 8e28e47cb..701e4f78b 100644 --- a/docs/Build_and_test_infra.md +++ b/docs/Build_and_test_infra.md @@ -75,6 +75,17 @@ The building infrastructure allows users to select which backends are to be built together with the relevant build options (dependencies, additional compilation/optimization flags, ...). +As part of the CMake configuration, several architecture-dependent features are +detected automatically and reported (e.g., SIMD vector size, L!Data cache size, +...), which are used to optimize the performance of ALP/GraphBLAS-based +applications. +In case of issues (e.g., failed detection of a feature), a warning +is displayed and sensible defaults are used to let users try ALP/GraphBLAS, even +without optimal settings. +Users should check the configuration output and verify the presence of warnings; +they are also invited to check the normal messages of the configuration output +to make sure the reported information is accurate. + There are **two ways to create the building infrastructure**, depending on the level of control you want over the build options. diff --git a/include/graphblas/base/config.hpp b/include/graphblas/base/config.hpp index f7796c852..456733ae6 100644 --- a/include/graphblas/base/config.hpp +++ b/include/graphblas/base/config.hpp @@ -28,6 +28,19 @@ #ifndef _H_GRB_CONFIG_BASE #define _H_GRB_CONFIG_BASE +#ifndef _SIMD_SIZE + #error "_SIMD_SIZE is not defined" +#endif + +#ifndef _L1DCACHE_SIZE + #error "_L1DCACHE_SIZE is not defined" +#endif + +#ifndef _CACHE_LINE_SIZE + #error "_CACHE_LINE_SIZE is not defined" +#endif + + #include //size_t #include @@ -91,7 +104,7 @@ namespace grb { * The cache line size in bytes. Update this value at compile time to * reflect the target architecture. */ - static constexpr size_t bytes = 64; + static constexpr size_t bytes = _CACHE_LINE_SIZE; public: @@ -120,7 +133,7 @@ namespace grb { * The SIMD size, in bytes. Update this value at compile time to reflect * the target architecture. */ - static constexpr size_t bytes = 64; + static constexpr size_t bytes = _SIMD_SIZE; public: @@ -235,7 +248,7 @@ namespace grb { /** @returns the private L1 data cache size, in bytes. */ static constexpr size_t l1_cache_size() { - return 32768; + return _L1DCACHE_SIZE; } /** diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 50e731a30..faaf867b6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -28,9 +28,12 @@ add_subdirectory( transition ) ### BUILD WRAPPER SCRIPTS FOR INSTALLATION assert_valid_variables( AVAILABLE_BACKENDS CMAKE_INSTALL_PREFIX INCLUDE_INSTALL_DIR - VERSION BIN_INSTALL_DIR + VERSION BIN_INSTALL_DIR DEFAULT_SIMD_SIZE DEFAULT_L1CACHE_SIZE + DEFAULT_CACHE_LINE_SIZE ARCH_DETECT_APPS_DIR +) +assert_defined_variables( COMMON_WRAPPER_DEFINITIONS COMMON_WRAPPER_OPTIONS + SIMD_ISA_DETECT_APP L1CACHE_DETECT_APP ) -assert_defined_variables( COMMON_WRAPPER_DEFINITIONS COMMON_WRAPPER_OPTIONS ) if( WITH_BSP1D_BACKEND OR WITH_HYBRID_BACKEND ) assert_valid_variables( LPFRUN_CMD MANUALRUN ) @@ -94,6 +97,16 @@ list( JOIN runenv_list " " BACKEND_RUNENV_SPACED ) list( JOIN runner_list " " BACKEND_RUNNER_SPACED ) configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/grbrun.in ${CMAKE_CURRENT_BINARY_DIR}/grbrun @ONLY ) +# generate script to get architecture-specific flags +configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/get_arch_flags.in ${ARCH_DETECT_APPS_DIR}/get_arch_flags @ONLY ) +# copy the whole directory with all executables to get arch-specific info +install( DIRECTORY ${ARCH_DETECT_APPS_DIR} + DESTINATION ${BIN_INSTALL_DIR} + FILE_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE + GROUP_READ GROUP_EXECUTE + WORLD_READ WORLD_EXECUTE +) + # install them to the install folder with execute permission install( FILES ${CMAKE_CURRENT_BINARY_DIR}/setenv ${CMAKE_CURRENT_BINARY_DIR}/grbcxx @@ -107,7 +120,7 @@ install( FILES ${CMAKE_CURRENT_BINARY_DIR}/setenv ### GENERATE CMAKE INFRASTRUCTURE INSIDE INSTALLATION DIRECTORY -include(CMakePackageConfigHelpers) +include( CMakePackageConfigHelpers ) # write file with version information write_basic_package_version_file( diff --git a/src/get_arch_flags.in b/src/get_arch_flags.in new file mode 100755 index 000000000..e573778c7 --- /dev/null +++ b/src/get_arch_flags.in @@ -0,0 +1,88 @@ +#!/bin/bash + +# +# Copyright 2024 Huawei Technologies Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +error_echo() { printf "%s\n" "$*" >&2; } + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +DEFAULT_SIMD_SIZE="@DEFAULT_SIMD_SIZE@" +DEFAULT_L1CACHE_SIZE="@DEFAULT_L1CACHE_SIZE@" +DEFAULT_CACHE_LINE_SIZE="@DEFAULT_CACHE_LINE_SIZE@" + +SIMD_ISA_DETECT_APP="@SIMD_ISA_DETECT_APP@" +L1CACHE_DETECT_APP="@L1CACHE_DETECT_APP@" + +SIMD_ISA_DETECT_APP_PATH="${SCRIPT_DIR}/${SIMD_ISA_DETECT_APP}" +L1CACHE_DETECT_APP_PATH="${SCRIPT_DIR}/${L1CACHE_DETECT_APP}" + +errval="1" +if [[ "${SIMD_ISA_DETECT_APP}" != "OFF" ]]; then + # call app to detect SIMD ISA + simd_isa=$(${SIMD_ISA_DETECT_APP_PATH}) + errval="$?" +fi + +if [[ "${errval}" == "0" ]]; then + # parse simd_isa + if [[ "${simd_isa}" == "AVX512" ]]; then + SIMD_SIZE=64 + elif [[ "${simd_isa}" == "AVX2" ]]; then + SIMD_SIZE=32 + elif [[ "${simd_isa}" == "AVX" ]]; then + SIMD_SIZE=16 + elif [[ "${simd_isa}" == "NEON" ]]; then + SIMD_SIZE=16 + elif [[ "${simd_isa}" == "SVE" || "${simd_isa}" == "SVE2" ]]; then + error_echo "Detected SIMD ISA is ${simd_isa}": the detection of its implementation-dependent vector size is currently not implemented. + error_echo "Applying therefore a standard vector size of ${DEFAULT_SIMD_SIZE}" + error_echo "Please, consider filing an issue to the authors" + SIMD_SIZE="${DEFAULT_SIMD_SIZE}" + fi +else + SIMD_SIZE="${DEFAULT_SIMD_SIZE}" +fi + +errval="1" +if [[ "${L1CACHE_DETECT_APP}" != "OFF" ]]; then + # call app to detect L1D cache info + cache_info="$(${L1CACHE_DETECT_APP_PATH})" + errval="$?" +fi + +if [[ "${errval}" == "0" ]]; then + # parse cache_info + regex='[[:space:]]*TYPE:[[:blank:]]*(Data|Unified)[[:space:]]*SIZE:[[:blank:]]*([[:digit:]]+)[[:space:]]*LINE:[[:blank:]]*([[:digit:]]+)[[:blank:]]*' + if [[ "${cache_info}" =~ ${regex} ]]; then + type="${BASH_REMATCH[1]}" + L1DCACHE_SIZE="${BASH_REMATCH[2]}" + CACHE_LINE_SIZE="${BASH_REMATCH[3]}" + else + errval="1" + error_echo "Error while detecting L1D cache parameters: applying defaults..." + fi + if [[ "${type}" == "Unified" ]]; then + echo "The L1 cache is Unified, hence it may not be fully available to the application" + fi +fi + +if [[ "${errval}" != "0" ]]; then + L1DCACHE_SIZE="${DEFAULT_L1CACHE_SIZE}" + CACHE_LINE_SIZE="${DEFAULT_CACHE_LINE_SIZE}" +fi + +ARCH_FLAGS="-D_SIMD_SIZE=${SIMD_SIZE} -D_L1DCACHE_SIZE=${L1DCACHE_SIZE} -D_CACHE_LINE_SIZE=${CACHE_LINE_SIZE}" diff --git a/src/grbcxx.in b/src/grbcxx.in index 3f30dfb2f..77cd2660a 100644 --- a/src/grbcxx.in +++ b/src/grbcxx.in @@ -1,7 +1,7 @@ #!/bin/bash # -# Copyright 2021 Huawei Technologies Co., Ltd. +# Copyright 2024 Huawei Technologies Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -88,6 +88,13 @@ then exit 255 fi +# the get_arch_info() script MUST populate the ARCH_FLAGS variable with arch-specific flags +. $( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )/arch_info/get_arch_flags +if [[ -z "${ARCH_FLAGS}" ]]; then + echo "Cannot get architecture-specific flags!" + exit 1 +fi + LFLAGS= if ${LINK} then @@ -96,6 +103,6 @@ fi COMPILER=${BACKENDCOMPILERS[${BACKENDID}]} -CMD="${COMPILER} -std=${CXXSTD} -I'${INCLUDEDIR}' ${COMMONCFLAGS} ${BACKENDCFLAGS[${BACKENDID}]} "${ARGS[@]}" "$@" ${LFLAGS}" +CMD="${COMPILER} -std=${CXXSTD} ${ARCH_FLAGS} -I'${INCLUDEDIR}' ${COMMONCFLAGS} ${BACKENDCFLAGS[${BACKENDID}]} "${ARGS[@]}" "$@" ${LFLAGS}" ${SHOW} "${CMD}"