From 54bec4f33f729caebb47dea06ce1171d9ec7bd7b Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Wed, 6 Mar 2024 15:19:25 +0100 Subject: [PATCH 1/8] logic to detect SIMD ISA in x64 and ARM --- cmake/arm_simd_detect.c | 44 ++++++++++++++++++++++++++++++++++++++ cmake/x86_64_simd_detect.c | 39 +++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 cmake/arm_simd_detect.c create mode 100644 cmake/x86_64_simd_detect.c diff --git a/cmake/arm_simd_detect.c b/cmake/arm_simd_detect.c new file mode 100644 index 000000000..0f0ae0a07 --- /dev/null +++ b/cmake/arm_simd_detect.c @@ -0,0 +1,44 @@ + +/* + * Copyright 2024 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +int main() { + +#ifdef HWCAP2_SVE2 + if( getauxval( AT_HWCAP2 ) & HWCAP2_SVE2 ) { + printf( "SVE2\n" ); + return 0; + } +#endif + + const unsigned long flags = getauxval( AT_HWCAP ); +#ifdef HWCAP_SVE + if( flags & HWCAP_SVE ) { + printf("SVE"); + } else +#endif + if ( flags & HWCAP_ASIMD ) { + printf( "NEON" ); + } else { + printf( "no SIMD ISA detected!\n" ); + return 1; + } + printf( "\n" ); + return 0; +} diff --git a/cmake/x86_64_simd_detect.c b/cmake/x86_64_simd_detect.c new file mode 100644 index 000000000..a46be1b6c --- /dev/null +++ b/cmake/x86_64_simd_detect.c @@ -0,0 +1,39 @@ + +/* + * Copyright 2024 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +// from https://gcc.gnu.org/onlinedocs/gcc/x86-Built-in-Functions.html#index-_005f_005fbuiltin_005fcpu_005fsupports-1 + +int main() { + __builtin_cpu_init (); + + if (__builtin_cpu_supports( "avx512f" ) ) { + printf( "AVX512" ); + } else if ( __builtin_cpu_supports( "avx2" ) ) { + printf( "AVX2" ); + } else if ( __builtin_cpu_supports( "avx" ) ) { + printf( "AVX" ); + } else if ( __builtin_cpu_supports( "sse" ) ) { + printf( "sse" ); + } else { + printf( "no SIMD ISA detected!\n" ); + return 1; + } + printf( "\n" ); + return 0; +} From 8a1858a8bc6c000ae5429ea491d218af309019b7 Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Wed, 6 Mar 2024 15:20:23 +0100 Subject: [PATCH 2/8] logic to detect L1 Data cache info --- cmake/l1_cache_info.sh | 69 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100755 cmake/l1_cache_info.sh diff --git a/cmake/l1_cache_info.sh b/cmake/l1_cache_info.sh new file mode 100755 index 000000000..20893805c --- /dev/null +++ b/cmake/l1_cache_info.sh @@ -0,0 +1,69 @@ +#!/bin/bash + +# +# Copyright 2024 Huawei Technologies Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +function symbolic_to_bytes { + local symbolic_size="$1" + local _symbolic_size=${symbolic_size//M/*1024*1024} + local _byte_size=${_symbolic_size//K/*1024} + echo $((_byte_size)) +} + +function test_not_zero { + if [[ "$1" == "0" ]]; then + echo "0 value: makes no sense!" + exit 1 + fi +} + +INFO_ROOT="/sys/devices/system/cpu/cpu0/cache" + +# look for Data cache (Harvard architecture); Unified is also accepted +for f in ${INFO_ROOT}/index*; do + level=$(cat ${f}/level) + if [[ "$?" != "0" ]]; then + echo "error detecting the cache level" + exit 1 + fi + if [[ "${level}" != "1" ]]; then + continue + fi + type=$(cat ${f}/type) + + if [[ "${type}" == "Data" || "${type}" == "Unified" ]]; then + cache_dir=${f} + break + fi +done + +if [[ -z "${cache_dir}" ]]; then + echo "cannot find cache info" + exit 1 +fi + +echo "TYPE: ${type}" + +cache_symbolic_size=$(cat ${cache_dir}/size) +cache_byte_size=$(symbolic_to_bytes ${cache_symbolic_size}) +test_not_zero "${cache_byte_size}" +echo "SIZE: ${cache_byte_size}" + +symbolic_line_size=$(cat ${cache_dir}/coherency_line_size) +line_byte_size=$(symbolic_to_bytes ${symbolic_line_size}) + +test_not_zero "${line_byte_size}" +echo "LINE: ${line_byte_size}" From 4c1d76fa65356a9ba6f9924fc119bc69f46f0a4d Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Wed, 6 Mar 2024 17:46:14 +0100 Subject: [PATCH 3/8] adding CMake logic to detect arch info --- cmake/CompileFlags.cmake | 12 ++++ cmake/DetectArchInfo.cmake | 94 +++++++++++++++++++++++++++++++ cmake/arm_simd_detect.c | 7 ++- cmake/x86_64_simd_detect.c | 8 +-- include/graphblas/base/config.hpp | 19 ++++++- 5 files changed, 130 insertions(+), 10 deletions(-) create mode 100644 cmake/DetectArchInfo.cmake diff --git a/cmake/CompileFlags.cmake b/cmake/CompileFlags.cmake index 4c6c1d862..94be6e012 100644 --- a/cmake/CompileFlags.cmake +++ b/cmake/CompileFlags.cmake @@ -35,6 +35,10 @@ assert_defined_variables( TEST_PERFORMANCE_DEFINITIONS TEST_PERFORMANCE_OPTIONS ) +### ARCH INFO DETECTION +include( DetectArchInfo ) +assert_valid_variables( SIMD_SIZE L1DCACHE_SIZE CACHE_LINE_SIZE ) + # allow only Relase, Debug and Coverage set( CMAKE_CONFIGURATION_TYPES "Release;Debug;Coverage" CACHE STRING "Add the configurations that we need" FORCE @@ -60,10 +64,18 @@ endif() set( COMMON_OPTS "-g" "-Wall" "-Wextra" ) +set( arch_defs "_SIMD_SIZE=${SIMD_SIZE};_L1DCACHE_SIZE=${L1DCACHE_SIZE};_CACHE_LINE_SIZE=${CACHE_LINE_SIZE}" ) + # cache variable to allow manual tweaks from CMake cache set_valid_string( COMMON_DEFS_Release "${COMMON_COMPILE_DEFINITIONS}" "" ) set_valid_string( COMMON_DEFS_Debug "${COMMON_COMPILE_DEFINITIONS}" "" ) set_valid_string( COMMON_DEFS_Coverage "${COMMON_COMPILE_DEFINITIONS}" "" ) + +list( PREPEND COMMON_DEFS_Release ${arch_defs} ) +list( PREPEND COMMON_DEFS_Debug ${arch_defs} ) +list( PREPEND COMMON_DEFS_Coverage ${arch_defs} ) + + set_valid_string( COMMON_OPTS_Release "${COMMON_COMPILE_OPTIONS}" "${COMMON_OPTS}" ) diff --git a/cmake/DetectArchInfo.cmake b/cmake/DetectArchInfo.cmake new file mode 100644 index 000000000..33eca1bb8 --- /dev/null +++ b/cmake/DetectArchInfo.cmake @@ -0,0 +1,94 @@ +# +# Copyright 2021 Huawei Technologies Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set( _supported_arches "x86_64;arm" ) +if( NOT CMAKE_SYSTEM_PROCESSOR IN_LIST _supported_arches ) + message( FATAL_ERROR "Architecture \"${CMAKE_SYSTEM_PROCESSOR}\" not supported" ) +endif() + +set( DEFAULT_SIMD_SIZE 64 ) +set( DEFAULT_L1CACHE_SIZE 32768 ) +set( DEFAULT_CACHE_LINE_SIZE 64 ) + +if( CMAKE_VERSION VERSION_LESS "3.25.0" ) + # old CMake versions have a different signature for try_compile() + # https://cmake.org/cmake/help/latest/command/try_run.html#try-compiling-and-running-source-files + set( _dest ${CMAKE_CURRENT_BINARY_DIR} ) +endif() + +set( _arch_detect_folder ${CMAKE_CURRENT_BINARY_DIR}/src/arch_info ) +set( _simd_detect_destination ${_arch_detect_folder}/detect_simd_isa ) + +try_compile( COMPILED ${_dest} SOURCES ${CMAKE_SOURCE_DIR}/cmake/${CMAKE_SYSTEM_PROCESSOR}_simd_detect.c + COPY_FILE ${_simd_detect_destination} + COPY_FILE_ERROR COPY_MSG +) +if( COMPILED ) + execute_process( + COMMAND ${_simd_detect_destination} + RESULT_VARIABLE RES + OUTPUT_VARIABLE SIMD_ISA + OUTPUT_STRIP_TRAILING_WHITESPACE + ) +endif() + +if( NOT COMPILED OR ( NOT RES STREQUAL "0" ) OR COPY_MSG ) + set( SIMD_SIZE ${DEFAULT_SIMD_SIZE} ) + message( WARNING "Cannot detect SIMD ISA, thus applying default vector size: ${SIMD_SIZE}B" ) +else() + set( SIMD_ISA_DETECT_EXE ${_simd_detect_destination} ) + if( SIMD_ISA STREQUAL "SVE" OR SIMD_ISA STREQUAL "SVE2" ) + set( SIMD_SIZE 64 ) + message( WARNING "Detected SIMD ISA ${SIMD_ISA}, whose size is implementation-dependant and currently not detected. Please, consider filing an issue to the authors. Applying default vector size: ${SIMD_SIZE}B" ) + else() + if( SIMD_ISA STREQUAL "AVX512" ) + set( SIMD_SIZE 64 ) + elseif( SIMD_ISA STREQUAL "AVX2" ) + set( SIMD_SIZE 32 ) + elseif( SIMD_ISA STREQUAL "AVX" ) + set( SIMD_SIZE 16 ) + elseif( SIMD_ISA STREQUAL "NEON" ) + set( SIMD_SIZE 16 ) + endif() + message( "Detected SIMD ISA: ${SIMD_ISA}; vector size : ${SIMD_SIZE}B" ) + endif() +endif() + +execute_process( + COMMAND ${CMAKE_SOURCE_DIR}/cmake/l1_cache_info.sh + RESULT_VARIABLE RES + OUTPUT_VARIABLE CACHE_DETECT_OUTPUT + OUTPUT_STRIP_TRAILING_WHITESPACE +) +file( COPY ${CMAKE_SOURCE_DIR}/cmake/l1_cache_info.sh DESTINATION ${_arch_detect_folder} ) +if( NOT RES STREQUAL "0" ) + set( L1CACHE_SIZE ${DEFAULT_L1CACHE_SIZE} ) + set( CACHE_LINE_SIZE ${DEFAULT_CACHE_LINE_SIZE} ) + message( WARNING "Cannot detect L1 cache features, thus applying default settigs" ) +else() + set( L1CACHE_RUNTIME_DETECT TRUE ) + string( REGEX MATCHALL + "TYPE:[ \t]*(Data|Unified)[ \t\r\n]+SIZE:[ \t]*([0-9]+)[ \t\r\n]+LINE:[ \t]*([0-9]+)[ \t\r\n]*" + MATCH_OUTPUT "${CACHE_DETECT_OUTPUT}" + ) + set( L1DCACHE_TYPE ${CMAKE_MATCH_1} ) + set( L1DCACHE_SIZE ${CMAKE_MATCH_2} ) + set( CACHE_LINE_SIZE ${CMAKE_MATCH_3} ) + if( L1DCACHE_TYPE STREQUAL "Unified" ) + message( WARNING "The L1 cache is Unified, so it may not be possible to effectively utilize its entire size (${L1DCACHE_SIZE}B) for the data." ) + endif() +endif() +message( "L1 cache size: ${L1DCACHE_SIZE}B; cacheline size: ${CACHE_LINE_SIZE}B" ) diff --git a/cmake/arm_simd_detect.c b/cmake/arm_simd_detect.c index 0f0ae0a07..4b0e435e7 100644 --- a/cmake/arm_simd_detect.c +++ b/cmake/arm_simd_detect.c @@ -27,6 +27,7 @@ int main() { } #endif + int retval = 0; const unsigned long flags = getauxval( AT_HWCAP ); #ifdef HWCAP_SVE if( flags & HWCAP_SVE ) { @@ -36,9 +37,9 @@ int main() { if ( flags & HWCAP_ASIMD ) { printf( "NEON" ); } else { - printf( "no SIMD ISA detected!\n" ); - return 1; + printf( "no SIMD ISA detected!" ); + retval = 1; } printf( "\n" ); - return 0; + return retval; } diff --git a/cmake/x86_64_simd_detect.c b/cmake/x86_64_simd_detect.c index a46be1b6c..06bd5d473 100644 --- a/cmake/x86_64_simd_detect.c +++ b/cmake/x86_64_simd_detect.c @@ -21,7 +21,7 @@ int main() { __builtin_cpu_init (); - + int retval = 0; if (__builtin_cpu_supports( "avx512f" ) ) { printf( "AVX512" ); } else if ( __builtin_cpu_supports( "avx2" ) ) { @@ -31,9 +31,9 @@ int main() { } else if ( __builtin_cpu_supports( "sse" ) ) { printf( "sse" ); } else { - printf( "no SIMD ISA detected!\n" ); - return 1; + printf( "no SIMD ISA detected!" ); + retval = 1; } printf( "\n" ); - return 0; + return retval; } diff --git a/include/graphblas/base/config.hpp b/include/graphblas/base/config.hpp index f7796c852..456733ae6 100644 --- a/include/graphblas/base/config.hpp +++ b/include/graphblas/base/config.hpp @@ -28,6 +28,19 @@ #ifndef _H_GRB_CONFIG_BASE #define _H_GRB_CONFIG_BASE +#ifndef _SIMD_SIZE + #error "_SIMD_SIZE is not defined" +#endif + +#ifndef _L1DCACHE_SIZE + #error "_L1DCACHE_SIZE is not defined" +#endif + +#ifndef _CACHE_LINE_SIZE + #error "_CACHE_LINE_SIZE is not defined" +#endif + + #include //size_t #include @@ -91,7 +104,7 @@ namespace grb { * The cache line size in bytes. Update this value at compile time to * reflect the target architecture. */ - static constexpr size_t bytes = 64; + static constexpr size_t bytes = _CACHE_LINE_SIZE; public: @@ -120,7 +133,7 @@ namespace grb { * The SIMD size, in bytes. Update this value at compile time to reflect * the target architecture. */ - static constexpr size_t bytes = 64; + static constexpr size_t bytes = _SIMD_SIZE; public: @@ -235,7 +248,7 @@ namespace grb { /** @returns the private L1 data cache size, in bytes. */ static constexpr size_t l1_cache_size() { - return 32768; + return _L1DCACHE_SIZE; } /** From f26be7b6cfaa5aecd5aa1feec9fe14000d876ff4 Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Thu, 7 Mar 2024 14:37:58 +0100 Subject: [PATCH 4/8] detecting arch info also in compiler script --- cmake/AddGRBInstall.cmake | 3 ++ cmake/DetectArchInfo.cmake | 20 +++++---- src/CMakeLists.txt | 15 ++++++- src/get_arch_info.in | 86 ++++++++++++++++++++++++++++++++++++++ src/grbcxx.in | 10 ++++- 5 files changed, 122 insertions(+), 12 deletions(-) create mode 100755 src/get_arch_info.in diff --git a/cmake/AddGRBInstall.cmake b/cmake/AddGRBInstall.cmake index 94bd58f31..f5f3ad178 100644 --- a/cmake/AddGRBInstall.cmake +++ b/cmake/AddGRBInstall.cmake @@ -33,6 +33,9 @@ set( ALP_UTILS_LIBRARY_OUTPUT_NAME "alp_utils" ) set( BINARY_LIBRARIES_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/lib" ) set( CMAKE_CONFIGS_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/cmake" ) set( NAMESPACE_NAME "ALPGraphBLAS") +set( ARCH_DETECT_APPS_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/arch_info ) +set( ARCH_DETECT_APPS_INSTALL_DIR ${BIN_INSTALL_DIR}/arch_info ) + # installation export unit for ALL targets install( EXPORT GraphBLASTargets diff --git a/cmake/DetectArchInfo.cmake b/cmake/DetectArchInfo.cmake index 33eca1bb8..4b656ca72 100644 --- a/cmake/DetectArchInfo.cmake +++ b/cmake/DetectArchInfo.cmake @@ -1,5 +1,5 @@ # -# Copyright 2021 Huawei Technologies Co., Ltd. +# Copyright 2024 Huawei Technologies Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,6 +14,8 @@ # limitations under the License. # +assert_valid_variables( ARCH_DETECT_APPS_DIR ) + set( _supported_arches "x86_64;arm" ) if( NOT CMAKE_SYSTEM_PROCESSOR IN_LIST _supported_arches ) message( FATAL_ERROR "Architecture \"${CMAKE_SYSTEM_PROCESSOR}\" not supported" ) @@ -29,16 +31,17 @@ if( CMAKE_VERSION VERSION_LESS "3.25.0" ) set( _dest ${CMAKE_CURRENT_BINARY_DIR} ) endif() -set( _arch_detect_folder ${CMAKE_CURRENT_BINARY_DIR}/src/arch_info ) -set( _simd_detect_destination ${_arch_detect_folder}/detect_simd_isa ) +set( ARCH_DETECT_APPS_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/arch_info ) +set( _simd_detect_destination detect_simd_isa ) +set( SIMD_ISA_DETECT_APP OFF ) try_compile( COMPILED ${_dest} SOURCES ${CMAKE_SOURCE_DIR}/cmake/${CMAKE_SYSTEM_PROCESSOR}_simd_detect.c - COPY_FILE ${_simd_detect_destination} + COPY_FILE ${ARCH_DETECT_APPS_DIR}/${_simd_detect_destination} COPY_FILE_ERROR COPY_MSG ) if( COMPILED ) execute_process( - COMMAND ${_simd_detect_destination} + COMMAND ${ARCH_DETECT_APPS_DIR}/${_simd_detect_destination} RESULT_VARIABLE RES OUTPUT_VARIABLE SIMD_ISA OUTPUT_STRIP_TRAILING_WHITESPACE @@ -49,7 +52,7 @@ if( NOT COMPILED OR ( NOT RES STREQUAL "0" ) OR COPY_MSG ) set( SIMD_SIZE ${DEFAULT_SIMD_SIZE} ) message( WARNING "Cannot detect SIMD ISA, thus applying default vector size: ${SIMD_SIZE}B" ) else() - set( SIMD_ISA_DETECT_EXE ${_simd_detect_destination} ) + set( SIMD_ISA_DETECT_APP ${_simd_detect_destination} ) if( SIMD_ISA STREQUAL "SVE" OR SIMD_ISA STREQUAL "SVE2" ) set( SIMD_SIZE 64 ) message( WARNING "Detected SIMD ISA ${SIMD_ISA}, whose size is implementation-dependant and currently not detected. Please, consider filing an issue to the authors. Applying default vector size: ${SIMD_SIZE}B" ) @@ -67,19 +70,20 @@ else() endif() endif() +set( L1CACHE_DETECT_APP OFF ) execute_process( COMMAND ${CMAKE_SOURCE_DIR}/cmake/l1_cache_info.sh RESULT_VARIABLE RES OUTPUT_VARIABLE CACHE_DETECT_OUTPUT OUTPUT_STRIP_TRAILING_WHITESPACE ) -file( COPY ${CMAKE_SOURCE_DIR}/cmake/l1_cache_info.sh DESTINATION ${_arch_detect_folder} ) +file( COPY ${CMAKE_SOURCE_DIR}/cmake/l1_cache_info.sh DESTINATION ${ARCH_DETECT_APPS_DIR} ) if( NOT RES STREQUAL "0" ) set( L1CACHE_SIZE ${DEFAULT_L1CACHE_SIZE} ) set( CACHE_LINE_SIZE ${DEFAULT_CACHE_LINE_SIZE} ) message( WARNING "Cannot detect L1 cache features, thus applying default settigs" ) else() - set( L1CACHE_RUNTIME_DETECT TRUE ) + set( L1CACHE_DETECT_APP l1_cache_info.sh ) string( REGEX MATCHALL "TYPE:[ \t]*(Data|Unified)[ \t\r\n]+SIZE:[ \t]*([0-9]+)[ \t\r\n]+LINE:[ \t]*([0-9]+)[ \t\r\n]*" MATCH_OUTPUT "${CACHE_DETECT_OUTPUT}" diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 50e731a30..4e56f279a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -28,9 +28,12 @@ add_subdirectory( transition ) ### BUILD WRAPPER SCRIPTS FOR INSTALLATION assert_valid_variables( AVAILABLE_BACKENDS CMAKE_INSTALL_PREFIX INCLUDE_INSTALL_DIR - VERSION BIN_INSTALL_DIR + VERSION BIN_INSTALL_DIR DEFAULT_SIMD_SIZE DEFAULT_L1CACHE_SIZE + DEFAULT_CACHE_LINE_SIZE ARCH_DETECT_APPS_DIR +) +assert_defined_variables( COMMON_WRAPPER_DEFINITIONS COMMON_WRAPPER_OPTIONS + SIMD_ISA_DETECT_APP L1CACHE_DETECT_APP ) -assert_defined_variables( COMMON_WRAPPER_DEFINITIONS COMMON_WRAPPER_OPTIONS ) if( WITH_BSP1D_BACKEND OR WITH_HYBRID_BACKEND ) assert_valid_variables( LPFRUN_CMD MANUALRUN ) @@ -94,6 +97,14 @@ list( JOIN runenv_list " " BACKEND_RUNENV_SPACED ) list( JOIN runner_list " " BACKEND_RUNNER_SPACED ) configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/grbrun.in ${CMAKE_CURRENT_BINARY_DIR}/grbrun @ONLY ) +configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/get_arch_info.in ${ARCH_DETECT_APPS_DIR}/get_arch_info @ONLY ) +install( DIRECTORY ${ARCH_DETECT_APPS_DIR} + DESTINATION ${BIN_INSTALL_DIR} + FILE_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE + GROUP_READ GROUP_EXECUTE + WORLD_READ WORLD_EXECUTE +) + # install them to the install folder with execute permission install( FILES ${CMAKE_CURRENT_BINARY_DIR}/setenv ${CMAKE_CURRENT_BINARY_DIR}/grbcxx diff --git a/src/get_arch_info.in b/src/get_arch_info.in new file mode 100755 index 000000000..c2f9c70b8 --- /dev/null +++ b/src/get_arch_info.in @@ -0,0 +1,86 @@ +#!/bin/bash + +# +# Copyright 2024 Huawei Technologies Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +error_echo() { printf "%s\n" "$*" >&2; } + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +DEFAULT_SIMD_SIZE="@DEFAULT_SIMD_SIZE@" +DEFAULT_L1CACHE_SIZE="@DEFAULT_L1CACHE_SIZE@" +DEFAULT_CACHE_LINE_SIZE="@DEFAULT_CACHE_LINE_SIZE@" + +SIMD_ISA_DETECT_APP="@SIMD_ISA_DETECT_APP@" +L1CACHE_DETECT_APP="@L1CACHE_DETECT_APP@" + +SIMD_ISA_DETECT_APP_PATH="${SCRIPT_DIR}/${SIMD_ISA_DETECT_APP}" +L1CACHE_DETECT_APP_PATH="${SCRIPT_DIR}/${L1CACHE_DETECT_APP}" + +errval="1" +if [[ "${SIMD_ISA_DETECT_APP}" != "OFF" ]]; then + simd_isa=$(${SIMD_ISA_DETECT_APP_PATH}) + errval="$?" +fi + +if [[ "${errval}" == "0" ]]; then + # parse simd_isa + if [[ "${simd_isa}" == "AVX512" ]]; then + SIMD_SIZE=64 + elif [[ "${simd_isa}" == "AVX2" ]]; then + SIMD_SIZE=32 + elif [[ "${simd_isa}" == "AVX" ]]; then + SIMD_SIZE=16 + elif [[ "${simd_isa}" == "NEON" ]]; then + SIMD_SIZE=16 + elif [[ "${simd_isa}" == "SVE" || "${simd_isa}" == "SVE2" ]]; then + error_echo "Detected SIMD ISA is ${simd_isa}": the detection of its implementation-dependant vector size is currently not implemented. + error_echo "Applying therefore a standard vector size of ${DEFAULT_SIMD_SIZE}" + error_echo "Please, consider filing an issue to the authors" + SIMD_SIZE="${DEFAULT_SIMD_SIZE}" + fi +else + SIMD_SIZE="${DEFAULT_SIMD_SIZE}" +fi + +errval="1" +if [[ "${L1CACHE_DETECT_APP}" != "OFF" ]]; then + cache_info="$(${L1CACHE_DETECT_APP_PATH})" + errval="$?" +fi + +if [[ "${errval}" == "0" ]]; then + # parse cache_info + regex='[[:space:]]*TYPE:[[:blank:]]*(Data|Unified)[[:space:]]*SIZE:[[:blank:]]*([[:digit:]]+)[[:space:]]*LINE:[[:blank:]]*([[:digit:]]+)[[:blank:]]*' + if [[ "${cache_info}" =~ ${regex} ]]; then + type="${BASH_REMATCH[1]}" + L1DCACHE_SIZE="${BASH_REMATCH[2]}" + CACHE_LINE_SIZE="${BASH_REMATCH[3]}" + else + errval="1" + error_echo "Error while detecting L1D cache parameters: applying defaults..." + fi + if [[ "${type}" == "Unified" ]]; then + echo "The L1 cache is Unified, hence it may not be fully available to the application" + fi +fi + +if [[ "${errval}" != "0" ]]; then + L1DCACHE_SIZE="${DEFAULT_L1CACHE_SIZE}" + CACHE_LINE_SIZE="${DEFAULT_CACHE_LINE_SIZE}" +fi + +ARCH_FLAGS="-D_SIMD_SIZE=${SIMD_SIZE} -D_L1DCACHE_SIZE=${L1DCACHE_SIZE} -D_CACHE_LINE_SIZE=${CACHE_LINE_SIZE}" diff --git a/src/grbcxx.in b/src/grbcxx.in index 3f30dfb2f..ea701bf6c 100644 --- a/src/grbcxx.in +++ b/src/grbcxx.in @@ -1,7 +1,7 @@ #!/bin/bash # -# Copyright 2021 Huawei Technologies Co., Ltd. +# Copyright 2024 Huawei Technologies Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -88,6 +88,12 @@ then exit 255 fi +. $( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )/arch_info/get_arch_info +if [[ -z "${ARCH_FLAGS}" ]]; then + echo "Cannot get architecture-specific flags!" + exit 1 +fi + LFLAGS= if ${LINK} then @@ -96,6 +102,6 @@ fi COMPILER=${BACKENDCOMPILERS[${BACKENDID}]} -CMD="${COMPILER} -std=${CXXSTD} -I'${INCLUDEDIR}' ${COMMONCFLAGS} ${BACKENDCFLAGS[${BACKENDID}]} "${ARGS[@]}" "$@" ${LFLAGS}" +CMD="${COMPILER} -std=${CXXSTD} ${ARCH_FLAGS} -I'${INCLUDEDIR}' ${COMMONCFLAGS} ${BACKENDCFLAGS[${BACKENDID}]} "${ARGS[@]}" "$@" ${LFLAGS}" ${SHOW} "${CMD}" From a5a67fb37c341f5cd637f4b6a716ec994bf7917f Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Thu, 7 Mar 2024 17:19:13 +0100 Subject: [PATCH 5/8] adding documentation --- cmake/DetectArchInfo.cmake | 25 +++++++++++++++++++++ cmake/arm_simd_detect.c | 11 +++++++++ cmake/l1_cache_info.sh | 4 ++++ cmake/x86_64_simd_detect.c | 10 ++++++++- src/CMakeLists.txt | 6 +++-- src/{get_arch_info.in => get_arch_flags.in} | 2 ++ src/grbcxx.in | 3 ++- 7 files changed, 57 insertions(+), 4 deletions(-) rename src/{get_arch_info.in => get_arch_flags.in} (97%) diff --git a/cmake/DetectArchInfo.cmake b/cmake/DetectArchInfo.cmake index 4b656ca72..e39882e17 100644 --- a/cmake/DetectArchInfo.cmake +++ b/cmake/DetectArchInfo.cmake @@ -14,6 +14,17 @@ # limitations under the License. # +#[===================================================================[ +Detect Architectural Info for the system CPU + +Three parameters are detected and used during compilation: +1. maximum supported size of the SIMD vector +2. size of the L1 Data cache (shortly L1D) +3. size of the L1D cache line (typically the same for all caches) + +If any of this information cannot be gathered from hardware, a default is used. +#]===================================================================] + assert_valid_variables( ARCH_DETECT_APPS_DIR ) set( _supported_arches "x86_64;arm" ) @@ -31,15 +42,19 @@ if( CMAKE_VERSION VERSION_LESS "3.25.0" ) set( _dest ${CMAKE_CURRENT_BINARY_DIR} ) endif() +# compile executable to detect SIMD ISA and run it set( ARCH_DETECT_APPS_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/arch_info ) set( _simd_detect_destination detect_simd_isa ) set( SIMD_ISA_DETECT_APP OFF ) +# compile and also copy the file to a known folder in order to use it in the +# installation infrastructure: the grbcxx script needs it try_compile( COMPILED ${_dest} SOURCES ${CMAKE_SOURCE_DIR}/cmake/${CMAKE_SYSTEM_PROCESSOR}_simd_detect.c COPY_FILE ${ARCH_DETECT_APPS_DIR}/${_simd_detect_destination} COPY_FILE_ERROR COPY_MSG ) if( COMPILED ) + # attemtp to run the compiled app execute_process( COMMAND ${ARCH_DETECT_APPS_DIR}/${_simd_detect_destination} RESULT_VARIABLE RES @@ -49,9 +64,12 @@ if( COMPILED ) endif() if( NOT COMPILED OR ( NOT RES STREQUAL "0" ) OR COPY_MSG ) + # if we could not compile or run, set defaults set( SIMD_SIZE ${DEFAULT_SIMD_SIZE} ) message( WARNING "Cannot detect SIMD ISA, thus applying default vector size: ${SIMD_SIZE}B" ) else() + # set vector size based on detected SIMD ISA and wanr in case of SVE or SVE2 + # not yet implemented set( SIMD_ISA_DETECT_APP ${_simd_detect_destination} ) if( SIMD_ISA STREQUAL "SVE" OR SIMD_ISA STREQUAL "SVE2" ) set( SIMD_SIZE 64 ) @@ -71,19 +89,26 @@ else() endif() set( L1CACHE_DETECT_APP OFF ) +# for L1D information, use a Bash script, so just try to run it execute_process( COMMAND ${CMAKE_SOURCE_DIR}/cmake/l1_cache_info.sh RESULT_VARIABLE RES OUTPUT_VARIABLE CACHE_DETECT_OUTPUT OUTPUT_STRIP_TRAILING_WHITESPACE ) +# copy the script to the build infrastructure, for testing and for installation file( COPY ${CMAKE_SOURCE_DIR}/cmake/l1_cache_info.sh DESTINATION ${ARCH_DETECT_APPS_DIR} ) if( NOT RES STREQUAL "0" ) + # could not run properly, set defaults set( L1CACHE_SIZE ${DEFAULT_L1CACHE_SIZE} ) set( CACHE_LINE_SIZE ${DEFAULT_CACHE_LINE_SIZE} ) message( WARNING "Cannot detect L1 cache features, thus applying default settigs" ) else() set( L1CACHE_DETECT_APP l1_cache_info.sh ) + # parse multi-lines output and get each info; example output: + # TYPE: Data + # SIZE: 32768 + # LINE: 64 string( REGEX MATCHALL "TYPE:[ \t]*(Data|Unified)[ \t\r\n]+SIZE:[ \t]*([0-9]+)[ \t\r\n]+LINE:[ \t]*([0-9]+)[ \t\r\n]*" MATCH_OUTPUT "${CACHE_DETECT_OUTPUT}" diff --git a/cmake/arm_simd_detect.c b/cmake/arm_simd_detect.c index 4b0e435e7..68fa7661e 100644 --- a/cmake/arm_simd_detect.c +++ b/cmake/arm_simd_detect.c @@ -18,6 +18,17 @@ #include #include +/* + * Check the supported SIMD ISA in an ARM architecture, via getauxval(): + * https://man7.org/linux/man-pages/man3/getauxval.3.html + * + * Note that support for SVE2 may be too recent for the kernel/GLIBC version in + * use, hence the #ifdef on HWCAP2_SVE2. + * Also note that SVE (and SVE2) has implementation-dependant vector size, whose + * retrieval is currently not implemented; the build infrastructure properly + * warns about this case. + */ + int main() { #ifdef HWCAP2_SVE2 diff --git a/cmake/l1_cache_info.sh b/cmake/l1_cache_info.sh index 20893805c..62353aed1 100755 --- a/cmake/l1_cache_info.sh +++ b/cmake/l1_cache_info.sh @@ -16,6 +16,9 @@ # limitations under the License. # +# Detect information about the L1D cache from the SYSFS interface, which is in +# Linux since 2008 + function symbolic_to_bytes { local symbolic_size="$1" local _symbolic_size=${symbolic_size//M/*1024*1024} @@ -40,6 +43,7 @@ for f in ${INFO_ROOT}/index*; do exit 1 fi if [[ "${level}" != "1" ]]; then + # if it's not a cache of level 1, skip continue fi type=$(cat ${f}/type) diff --git a/cmake/x86_64_simd_detect.c b/cmake/x86_64_simd_detect.c index 06bd5d473..b25fb97db 100644 --- a/cmake/x86_64_simd_detect.c +++ b/cmake/x86_64_simd_detect.c @@ -17,7 +17,15 @@ #include -// from https://gcc.gnu.org/onlinedocs/gcc/x86-Built-in-Functions.html#index-_005f_005fbuiltin_005fcpu_005fsupports-1 +/* + * This file detects the SIMD ISA for x86 architectures using compiler built-in functionalities + * from https://gcc.gnu.org/onlinedocs/gcc/x86-Built-in-Functions.html#index-_005f_005fbuiltin_005fcpu_005fsupports-1 + * also supported in clang. + * + * Note that the SIMD support can be advertised by the CPU (e.g., via the CPUID + * instruction) despite being disabled by the Operating System. + * The compiler's built-in functions check both conditions. + */ int main() { __builtin_cpu_init (); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4e56f279a..faaf867b6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -97,7 +97,9 @@ list( JOIN runenv_list " " BACKEND_RUNENV_SPACED ) list( JOIN runner_list " " BACKEND_RUNNER_SPACED ) configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/grbrun.in ${CMAKE_CURRENT_BINARY_DIR}/grbrun @ONLY ) -configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/get_arch_info.in ${ARCH_DETECT_APPS_DIR}/get_arch_info @ONLY ) +# generate script to get architecture-specific flags +configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/get_arch_flags.in ${ARCH_DETECT_APPS_DIR}/get_arch_flags @ONLY ) +# copy the whole directory with all executables to get arch-specific info install( DIRECTORY ${ARCH_DETECT_APPS_DIR} DESTINATION ${BIN_INSTALL_DIR} FILE_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE @@ -118,7 +120,7 @@ install( FILES ${CMAKE_CURRENT_BINARY_DIR}/setenv ### GENERATE CMAKE INFRASTRUCTURE INSIDE INSTALLATION DIRECTORY -include(CMakePackageConfigHelpers) +include( CMakePackageConfigHelpers ) # write file with version information write_basic_package_version_file( diff --git a/src/get_arch_info.in b/src/get_arch_flags.in similarity index 97% rename from src/get_arch_info.in rename to src/get_arch_flags.in index c2f9c70b8..cc72ef59e 100755 --- a/src/get_arch_info.in +++ b/src/get_arch_flags.in @@ -32,6 +32,7 @@ L1CACHE_DETECT_APP_PATH="${SCRIPT_DIR}/${L1CACHE_DETECT_APP}" errval="1" if [[ "${SIMD_ISA_DETECT_APP}" != "OFF" ]]; then + # call app to detect SIMD ISA simd_isa=$(${SIMD_ISA_DETECT_APP_PATH}) errval="$?" fi @@ -58,6 +59,7 @@ fi errval="1" if [[ "${L1CACHE_DETECT_APP}" != "OFF" ]]; then + # call app to detect L1D cache info cache_info="$(${L1CACHE_DETECT_APP_PATH})" errval="$?" fi diff --git a/src/grbcxx.in b/src/grbcxx.in index ea701bf6c..77cd2660a 100644 --- a/src/grbcxx.in +++ b/src/grbcxx.in @@ -88,7 +88,8 @@ then exit 255 fi -. $( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )/arch_info/get_arch_info +# the get_arch_info() script MUST populate the ARCH_FLAGS variable with arch-specific flags +. $( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )/arch_info/get_arch_flags if [[ -z "${ARCH_FLAGS}" ]]; then echo "Cannot get architecture-specific flags!" exit 1 From b977746da383053b9ac72d83cf655f3058b39927 Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Thu, 7 Mar 2024 17:28:22 +0100 Subject: [PATCH 6/8] fix for aarch64 architecture --- cmake/DetectArchInfo.cmake | 16 ++++++++++------ .../{arm_simd_detect.c => aarch64_simd_detect.c} | 0 2 files changed, 10 insertions(+), 6 deletions(-) rename cmake/{arm_simd_detect.c => aarch64_simd_detect.c} (100%) diff --git a/cmake/DetectArchInfo.cmake b/cmake/DetectArchInfo.cmake index e39882e17..e260f8b2b 100644 --- a/cmake/DetectArchInfo.cmake +++ b/cmake/DetectArchInfo.cmake @@ -27,9 +27,11 @@ If any of this information cannot be gathered from hardware, a default is used. assert_valid_variables( ARCH_DETECT_APPS_DIR ) -set( _supported_arches "x86_64;arm" ) +set( _supported_arches "x86_64;aarch64" ) if( NOT CMAKE_SYSTEM_PROCESSOR IN_LIST _supported_arches ) - message( FATAL_ERROR "Architecture \"${CMAKE_SYSTEM_PROCESSOR}\" not supported" ) + message( WARNING "Architecture \"${CMAKE_SYSTEM_PROCESSOR}\" not supported" ) +else() + set( _supported_arch ON ) endif() set( DEFAULT_SIMD_SIZE 64 ) @@ -49,10 +51,12 @@ set( _simd_detect_destination detect_simd_isa ) set( SIMD_ISA_DETECT_APP OFF ) # compile and also copy the file to a known folder in order to use it in the # installation infrastructure: the grbcxx script needs it -try_compile( COMPILED ${_dest} SOURCES ${CMAKE_SOURCE_DIR}/cmake/${CMAKE_SYSTEM_PROCESSOR}_simd_detect.c - COPY_FILE ${ARCH_DETECT_APPS_DIR}/${_simd_detect_destination} - COPY_FILE_ERROR COPY_MSG -) +if( _supported_arch ) + try_compile( COMPILED ${_dest} SOURCES ${CMAKE_SOURCE_DIR}/cmake/${CMAKE_SYSTEM_PROCESSOR}_simd_detect.c + COPY_FILE ${ARCH_DETECT_APPS_DIR}/${_simd_detect_destination} + COPY_FILE_ERROR COPY_MSG + ) +endif() if( COMPILED ) # attemtp to run the compiled app execute_process( diff --git a/cmake/arm_simd_detect.c b/cmake/aarch64_simd_detect.c similarity index 100% rename from cmake/arm_simd_detect.c rename to cmake/aarch64_simd_detect.c From f245828806b27444f60bb3f84adafe832f595755 Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Thu, 7 Mar 2024 17:32:42 +0100 Subject: [PATCH 7/8] add reference to kernel docs for HWCAP --- cmake/aarch64_simd_detect.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/aarch64_simd_detect.c b/cmake/aarch64_simd_detect.c index 68fa7661e..f21bfccff 100644 --- a/cmake/aarch64_simd_detect.c +++ b/cmake/aarch64_simd_detect.c @@ -24,6 +24,8 @@ * * Note that support for SVE2 may be too recent for the kernel/GLIBC version in * use, hence the #ifdef on HWCAP2_SVE2. + * https://docs.kernel.org/arch/arm64/elf_hwcaps.html + * * Also note that SVE (and SVE2) has implementation-dependant vector size, whose * retrieval is currently not implemented; the build infrastructure properly * warns about this case. From e7d7f7202753c2ee962e5cc24ebcf8ee1328ac3c Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Thu, 7 Mar 2024 18:04:02 +0100 Subject: [PATCH 8/8] update user docs --- README.md | 35 ++++++++++++++++++----------------- cmake/DetectArchInfo.cmake | 2 +- cmake/aarch64_simd_detect.c | 2 +- docs/Build_and_test_infra.md | 11 +++++++++++ src/get_arch_flags.in | 2 +- 5 files changed, 32 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 54c891dd4..e5c66683d 100644 --- a/README.md +++ b/README.md @@ -126,14 +126,10 @@ grbrun ./a.out In more detail, the steps to follow are: -1. Edit the `include/graphblas/base/config.hpp`. In particular, please ensure - that `config::SIMD_SIZE::bytes` defined in that file is set correctly with - respect to the target architecture. - -2. Create an empty directory for building ALP and move into it: +1. Create an empty directory for building ALP and move into it: `mkdir build && cd build`. -3. Invoke the `bootstrap.sh` script located inside the ALP root directory +2. Invoke the `bootstrap.sh` script located inside the ALP root directory `` to generate the build infrastructure via CMake inside the the current directory: @@ -142,9 +138,9 @@ In more detail, the steps to follow are: - note: add `--with-lpf=/path/to/lpf/install/dir` if you have LPF installed and would like to use it. -4. Issue `make -j` to compile the C++11 ALP library for the configured backends. +3. Issue `make -j` to compile the C++11 ALP library for the configured backends. -5. (*Optional*) To later run all unit tests, several datasets must be made +4. (*Optional*) To later run all unit tests, several datasets must be made available. Please run the `/tools/downloadDatasets.sh` script for @@ -152,7 +148,7 @@ In more detail, the steps to follow are: b. the option to automatically download them. -6. (*Optional*) To make the ALP documentation, issue `make userdocs`. This +5. (*Optional*) To make the ALP documentation, issue `make userdocs`. This generates both a. LaTeX in `/docs/user/latex/refman.tex`, and @@ -162,7 +158,7 @@ In more detail, the steps to follow are: To build a PDF from the LaTeX sources, cd into the directory mentioned, and issue `make`. -7. (*Optional*) Issue `make -j smoketests` to run a quick set of functional +6. (*Optional*) Issue `make -j smoketests` to run a quick set of functional tests. Please scan the output for any failed tests. If you do this with LPF enabled, and LPF was configured to use an MPI engine (which is the default), and the MPI implementation used is _not_ MPICH, then @@ -171,15 +167,15 @@ In more detail, the steps to follow are: implementation you used, and uncomment the lines directly below each occurrence. -8. (*Optional*) Issue `make -j unittests` to run an exhaustive set of unit +7. (*Optional*) Issue `make -j unittests` to run an exhaustive set of unit tests. Please scan the output for any failed tests. If you do this with LPF enabled, please edit `tests/parse_env.sh` if required as described in step 5. -9. Issue `make -j install` to install ALP into the install directory configured +8. Issue `make -j install` to install ALP into the install directory configured during step 1. -10. (*Optional*) Issue `source /bin/setenv` to make +9. (*Optional*) Issue `source /bin/setenv` to make available the `grbcxx` and `grbrun` compiler wrapper and runner. Congratulations, you are now ready for developing and integrating ALP @@ -230,6 +226,8 @@ and lists technical papers. - [Development in ALP](#development-in-alp) - [Acknowledgements](#acknowledgements) - [Citing ALP, ALP/GraphBLAS, and ALP/Pregel](#citing-alp-alpgraphblas-and-alppregel) + - [ALP and ALP/GraphBLAS](#alp-and-alpgraphblas) + - [ALP/Pregel](#alppregel) # Configuration @@ -259,6 +257,8 @@ classes of backends. The main configuration file is found in 8. type used for indexing nonzeroes, as the `NonzeroIndexType` typedef; 9. index type used for vector coordinates, as the `VectorIndexType` typedef. +The most important parameters are automatically detected during the CMake +configuration (vector size, cache line size, L1 Data cache size). Other configuration values in this file are automatically inferred, are fixed non-configurable settings, or are presently not used by any ALP backend. @@ -418,10 +418,11 @@ large outputs is strongly advisable. ### Compilation -Our backends auto-vectorise, hence please recall step 1 from the quick start -guide, and make sure the `include/graphblas/base/config.hpp` file reflects the -correct value for `config::SIMD_SIZE::bytes`. This value must be updated prior -to the compilation and installation of ALP. +Our backends auto-vectorise using the information in the +`include/graphblas/base/config.hpp`, especially `config::SIMD_SIZE::bytes`. +This and other values are automatically detected for the CPU the CMake +configuration runs on; the user may however want to set it to a different value +manually prior to the compilation and installation of ALP. When targeting different architectures with differing SIMD widths, different ALP installations for different architectures could be maintained. diff --git a/cmake/DetectArchInfo.cmake b/cmake/DetectArchInfo.cmake index e260f8b2b..3c7128a10 100644 --- a/cmake/DetectArchInfo.cmake +++ b/cmake/DetectArchInfo.cmake @@ -77,7 +77,7 @@ else() set( SIMD_ISA_DETECT_APP ${_simd_detect_destination} ) if( SIMD_ISA STREQUAL "SVE" OR SIMD_ISA STREQUAL "SVE2" ) set( SIMD_SIZE 64 ) - message( WARNING "Detected SIMD ISA ${SIMD_ISA}, whose size is implementation-dependant and currently not detected. Please, consider filing an issue to the authors. Applying default vector size: ${SIMD_SIZE}B" ) + message( WARNING "Detected SIMD ISA ${SIMD_ISA}, whose size is implementation-dependent and currently not detected. Please, consider filing an issue to the authors. Applying default vector size: ${SIMD_SIZE}B" ) else() if( SIMD_ISA STREQUAL "AVX512" ) set( SIMD_SIZE 64 ) diff --git a/cmake/aarch64_simd_detect.c b/cmake/aarch64_simd_detect.c index f21bfccff..094e5880d 100644 --- a/cmake/aarch64_simd_detect.c +++ b/cmake/aarch64_simd_detect.c @@ -26,7 +26,7 @@ * use, hence the #ifdef on HWCAP2_SVE2. * https://docs.kernel.org/arch/arm64/elf_hwcaps.html * - * Also note that SVE (and SVE2) has implementation-dependant vector size, whose + * Also note that SVE (and SVE2) has implementation-dependent vector size, whose * retrieval is currently not implemented; the build infrastructure properly * warns about this case. */ diff --git a/docs/Build_and_test_infra.md b/docs/Build_and_test_infra.md index 8e28e47cb..701e4f78b 100644 --- a/docs/Build_and_test_infra.md +++ b/docs/Build_and_test_infra.md @@ -75,6 +75,17 @@ The building infrastructure allows users to select which backends are to be built together with the relevant build options (dependencies, additional compilation/optimization flags, ...). +As part of the CMake configuration, several architecture-dependent features are +detected automatically and reported (e.g., SIMD vector size, L!Data cache size, +...), which are used to optimize the performance of ALP/GraphBLAS-based +applications. +In case of issues (e.g., failed detection of a feature), a warning +is displayed and sensible defaults are used to let users try ALP/GraphBLAS, even +without optimal settings. +Users should check the configuration output and verify the presence of warnings; +they are also invited to check the normal messages of the configuration output +to make sure the reported information is accurate. + There are **two ways to create the building infrastructure**, depending on the level of control you want over the build options. diff --git a/src/get_arch_flags.in b/src/get_arch_flags.in index cc72ef59e..e573778c7 100755 --- a/src/get_arch_flags.in +++ b/src/get_arch_flags.in @@ -48,7 +48,7 @@ if [[ "${errval}" == "0" ]]; then elif [[ "${simd_isa}" == "NEON" ]]; then SIMD_SIZE=16 elif [[ "${simd_isa}" == "SVE" || "${simd_isa}" == "SVE2" ]]; then - error_echo "Detected SIMD ISA is ${simd_isa}": the detection of its implementation-dependant vector size is currently not implemented. + error_echo "Detected SIMD ISA is ${simd_isa}": the detection of its implementation-dependent vector size is currently not implemented. error_echo "Applying therefore a standard vector size of ${DEFAULT_SIMD_SIZE}" error_echo "Please, consider filing an issue to the authors" SIMD_SIZE="${DEFAULT_SIMD_SIZE}"