From 83386471224da56316d3de126587377a95d54c69 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 21 Mar 2019 17:33:20 +0000 Subject: [PATCH 01/38] [X86] Use the CPUKind enum from PROC_ALIAS to directly get the CPUKind in fillValidCPUList. We were using getCPUKind which translates the string to the enum also using PROC_ALIAS. This just cuts out the string compares. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356686 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Basic/Targets/X86.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/Basic/Targets/X86.cpp b/lib/Basic/Targets/X86.cpp index 87a954fcfd3..fa5ffd5e801 100644 --- a/lib/Basic/Targets/X86.cpp +++ b/lib/Basic/Targets/X86.cpp @@ -1819,10 +1819,9 @@ void X86TargetInfo::fillValidCPUList(SmallVectorImpl &Values) const { #define PROC(ENUM, STRING, IS64BIT) \ if (IS64BIT || getTriple().getArch() == llvm::Triple::x86) \ Values.emplace_back(STRING); - // Go through CPUKind checking to ensure that the alias is de-aliased and - // 64 bit-ness is checked. + // For aliases we need to lookup the CPUKind to check get the 64-bit ness. #define PROC_ALIAS(ENUM, ALIAS) \ - if (checkCPUKind(getCPUKind(ALIAS))) \ + if (checkCPUKind(CK_##ENUM)) \ Values.emplace_back(ALIAS); #include "clang/Basic/X86Target.def" } From 2f63b89563b3b38799e89c2897eea594314a37bd Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 21 Mar 2019 17:43:53 +0000 Subject: [PATCH 02/38] [X86] Add __popcntd and __popcntq to ia32intrin.h to match gcc and icc. Remove popcnt feature flag from _popcnt32/_popcnt64 and move to ia32intrin.h to match gcc gcc and icc both implement popcntd and popcntq which we did not. gcc doesn't seem to require a feature flag for the _popcnt32/_popcnt64 spelling and will use a libcall if its not supported. Differential Revision: https://reviews.llvm.org/D59567 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356689 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Headers/ia32intrin.h | 42 ++++++++++++++++++++++++++++++++++ lib/Headers/popcntintrin.h | 32 -------------------------- test/CodeGen/popcnt-builtins.c | 31 ++++++++++++++++++------- 3 files changed, 65 insertions(+), 40 deletions(-) diff --git a/lib/Headers/ia32intrin.h b/lib/Headers/ia32intrin.h index dcbb1e0ab51..837ea369cd6 100644 --- a/lib/Headers/ia32intrin.h +++ b/lib/Headers/ia32intrin.h @@ -28,6 +28,48 @@ #ifndef __IA32INTRIN_H #define __IA32INTRIN_H +/** Counts the number of bits in the source operand having a value of 1. + * + * \headerfile + * + * This intrinsic corresponds to the POPCNT instruction or a + * a sequence of arithmetic and logic ops to calculate it. + * + * \param __A + * An unsigned 32-bit integer operand. + * \returns A 32-bit integer containing the number of bits with value 1 in the + * source operand. + */ +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +__popcntd(unsigned int __A) +{ + return __builtin_popcount(__A); +} + +#define _popcnt32(A) __popcntd((A)) + +#ifdef __x86_64__ +/** Counts the number of bits in the source operand having a value of 1. + * + * \headerfile + * + * This intrinsic corresponds to the POPCNT instruction or a + * a sequence of arithmetic and logic ops to calculate it. + * + * \param __A + * An unsigned 64-bit integer operand. + * \returns A 64-bit integer containing the number of bits with value 1 in the + * source operand. + */ +static __inline__ long long __attribute__((__always_inline__, __nodebug__)) +__popcntq(unsigned long long __A) +{ + return __builtin_popcountll(__A); +} + +#define _popcnt64(A) __popcntq((A)) +#endif /* __x86_64__ */ + #ifdef __x86_64__ static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) __readeflags(void) diff --git a/lib/Headers/popcntintrin.h b/lib/Headers/popcntintrin.h index 75ceab9e150..dc4a8bd260d 100644 --- a/lib/Headers/popcntintrin.h +++ b/lib/Headers/popcntintrin.h @@ -43,22 +43,6 @@ _mm_popcnt_u32(unsigned int __A) return __builtin_popcount(__A); } -/// Counts the number of bits in the source operand having a value of 1. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the POPCNT instruction. -/// -/// \param __A -/// A signed 32-bit integer operand. -/// \returns A 32-bit integer containing the number of bits with value 1 in the -/// source operand. -static __inline__ int __DEFAULT_FN_ATTRS -_popcnt32(int __A) -{ - return __builtin_popcount(__A); -} - #ifdef __x86_64__ /// Counts the number of bits in the source operand having a value of 1. /// @@ -75,22 +59,6 @@ _mm_popcnt_u64(unsigned long long __A) { return __builtin_popcountll(__A); } - -/// Counts the number of bits in the source operand having a value of 1. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the POPCNT instruction. -/// -/// \param __A -/// A signed 64-bit integer operand. -/// \returns A 64-bit integer containing the number of bits with value 1 in the -/// source operand. -static __inline__ long long __DEFAULT_FN_ATTRS -_popcnt64(long long __A) -{ - return __builtin_popcountll(__A); -} #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS diff --git a/test/CodeGen/popcnt-builtins.c b/test/CodeGen/popcnt-builtins.c index 1fdb43339a8..800e759bba1 100644 --- a/test/CodeGen/popcnt-builtins.c +++ b/test/CodeGen/popcnt-builtins.c @@ -1,24 +1,39 @@ -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +popcnt -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +popcnt -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-POPCNT +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -emit-llvm -o - | FileCheck %s -#include +#include -unsigned int test_mm_popcnt_u32(unsigned int __X) { - //CHECK: call i32 @llvm.ctpop.i32 +#ifdef __POPCNT__ +int test_mm_popcnt_u32(unsigned int __X) { + //CHECK-POPCNT: call i32 @llvm.ctpop.i32 return _mm_popcnt_u32(__X); } +#endif -unsigned int test_popcnt_32(int __X) { +int test_popcnt32(unsigned int __X) { //CHECK: call i32 @llvm.ctpop.i32 return _popcnt32(__X); } -unsigned long long test_mm_popcnt_u64(unsigned long long __X) { - //CHECK: call i64 @llvm.ctpop.i64 +int test__popcntd(unsigned int __X) { + //CHECK: call i32 @llvm.ctpop.i32 + return __popcntd(__X); +} + +#ifdef __POPCNT__ +long long test_mm_popcnt_u64(unsigned long long __X) { + //CHECK-POPCNT: call i64 @llvm.ctpop.i64 return _mm_popcnt_u64(__X); } +#endif -unsigned long long test_popcnt_64(long long __X) { +long long test_popcnt64(unsigned long long __X) { //CHECK: call i64 @llvm.ctpop.i64 return _popcnt64(__X); } + +long long test__popcntq(unsigned long long __X) { + //CHECK: call i64 @llvm.ctpop.i64 + return __popcntq(__X); +} From cd6203d849e5085306ca6ea9be7beeb189726b70 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 21 Mar 2019 19:05:07 +0000 Subject: [PATCH 03/38] [OPENMP]Simplify the check for the predefined allocators, NFC. Previously implemented check required the reevaluation of the already evaluated predefined allocator kind for the global variables. Patch simplifies this evaluation and removes extra code. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356699 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Sema/SemaOpenMP.cpp | 46 +++++++---------------------------------- 1 file changed, 7 insertions(+), 39 deletions(-) diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp index 5d7ad8c5be6..e54652650ed 100644 --- a/lib/Sema/SemaOpenMP.cpp +++ b/lib/Sema/SemaOpenMP.cpp @@ -2219,35 +2219,22 @@ getAllocatorKind(Sema &S, DSAStackTy *Stack, Expr *Allocator) { return OMPAllocateDeclAttr::OMPDefaultMemAlloc; if (Allocator->isTypeDependent() || Allocator->isValueDependent() || Allocator->isInstantiationDependent() || - Allocator->containsUnexpandedParameterPack() || - !Allocator->isEvaluatable(S.getASTContext())) + Allocator->containsUnexpandedParameterPack()) return OMPAllocateDeclAttr::OMPUserDefinedMemAlloc; - bool Suppress = S.getDiagnostics().getSuppressAllDiagnostics(); - S.getDiagnostics().setSuppressAllDiagnostics(/*Val=*/true); auto AllocatorKindRes = OMPAllocateDeclAttr::OMPUserDefinedMemAlloc; for (int I = OMPAllocateDeclAttr::OMPDefaultMemAlloc; I < OMPAllocateDeclAttr::OMPUserDefinedMemAlloc; ++I) { auto AllocatorKind = static_cast(I); Expr *DefAllocator = Stack->getAllocator(AllocatorKind); - // Compare allocator with the predefined allocator and if true - return - // predefined allocator kind. - ExprResult DefAllocRes = S.DefaultLvalueConversion(DefAllocator); - ExprResult AllocRes = S.DefaultLvalueConversion(Allocator); - ExprResult CompareRes = S.CreateBuiltinBinOp( - Allocator->getExprLoc(), BO_EQ, DefAllocRes.get(), AllocRes.get()); - if (!CompareRes.isUsable()) - continue; - bool Result; - if (!CompareRes.get()->EvaluateAsBooleanCondition(Result, - S.getASTContext())) - continue; - if (Result) { + const Expr *AE = Allocator->IgnoreParenImpCasts(); + llvm::FoldingSetNodeID AEId, DAEId; + AE->Profile(AEId, S.getASTContext(), /*Canonical=*/true); + DefAllocator->Profile(DAEId, S.getASTContext(), /*Canonical=*/true); + if (AEId == DAEId) { AllocatorKindRes = AllocatorKind; break; } - } - S.getDiagnostics().setSuppressAllDiagnostics(Suppress); return AllocatorKindRes; } @@ -2325,26 +2312,7 @@ Sema::DeclGroupPtrTy Sema::ActOnOpenMPAllocateDirective( // allocator clause must be a constant expression that evaluates to one of // the predefined memory allocator values. if (Allocator && VD->hasGlobalStorage()) { - bool IsPredefinedAllocator = false; - if (const auto *DRE = - dyn_cast(Allocator->IgnoreParenImpCasts())) { - if (DRE->getType().isConstant(getASTContext())) { - DeclarationName DN = DRE->getDecl()->getDeclName(); - if (DN.isIdentifier()) { - StringRef PredefinedAllocators[] = { - "omp_default_mem_alloc", "omp_large_cap_mem_alloc", - "omp_const_mem_alloc", "omp_high_bw_mem_alloc", - "omp_low_lat_mem_alloc", "omp_cgroup_mem_alloc", - "omp_pteam_mem_alloc", "omp_thread_mem_alloc", - }; - IsPredefinedAllocator = - llvm::any_of(PredefinedAllocators, [&DN](StringRef S) { - return DN.getAsIdentifierInfo()->isStr(S); - }); - } - } - } - if (!IsPredefinedAllocator) { + if (AllocatorKind == OMPAllocateDeclAttr::OMPUserDefinedMemAlloc) { Diag(Allocator->getExprLoc(), diag::err_omp_expected_predefined_allocator) << Allocator->getSourceRange(); From aa23c03a84ca290bab40b6da61aea9b1821d9c44 Mon Sep 17 00:00:00 2001 From: Jordan Rupprecht Date: Thu, 21 Mar 2019 19:13:22 +0000 Subject: [PATCH 04/38] [clang][OpenMP] Fix another test when using libgomp. Similarly to r356614, -fopenmp=libomp needs to be used for some omp-related AST matching. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356700 91177308-0d34-0410-b5e6-96231b3b80d8 --- unittests/ASTMatchers/ASTMatchersTest.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unittests/ASTMatchers/ASTMatchersTest.h b/unittests/ASTMatchers/ASTMatchersTest.h index 3f3118c2ef4..78c551f806f 100644 --- a/unittests/ASTMatchers/ASTMatchersTest.h +++ b/unittests/ASTMatchers/ASTMatchersTest.h @@ -237,13 +237,13 @@ testing::AssertionResult notMatchesWithCuda(const std::string &Code, template testing::AssertionResult matchesWithOpenMP(const std::string &Code, const T &AMatcher) { - return matchesConditionally(Code, AMatcher, true, "-fopenmp"); + return matchesConditionally(Code, AMatcher, true, "-fopenmp=libomp"); } template testing::AssertionResult notMatchesWithOpenMP(const std::string &Code, const T &AMatcher) { - return matchesConditionally(Code, AMatcher, false, "-fopenmp"); + return matchesConditionally(Code, AMatcher, false, "-fopenmp=libomp"); } template From bf77be7a1ebf54d2f3b6260bb905ed04e9314049 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 21 Mar 2019 19:35:27 +0000 Subject: [PATCH 05/38] [OPENMP]Codegen support for allocate directive on global variables. For the global variables the allocate directive must specify only the predefined allocator. This allocator must be translated into the correct form of the address space for the targets that support different address spaces. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356702 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGOpenMPRuntime.cpp | 24 +++++++++ lib/CodeGen/CGOpenMPRuntime.h | 5 ++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 28 ++++++++++ lib/CodeGen/CGOpenMPRuntimeNVPTX.h | 5 ++ lib/CodeGen/CodeGenModule.cpp | 5 ++ test/OpenMP/nvptx_allocate_codegen.cpp | 71 ++++++++++++++++++++++++++ 6 files changed, 138 insertions(+) create mode 100644 test/OpenMP/nvptx_allocate_codegen.cpp diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 6dbc244bb29..d3736b7244d 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -8928,6 +8928,30 @@ void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( " Expected target-based directive."); } +bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, + LangAS &AS) { + if (!VD || !VD->hasAttr()) + return false; + const auto *A = VD->getAttr(); + switch(A->getAllocatorType()) { + case OMPAllocateDeclAttr::OMPDefaultMemAlloc: + // Not supported, fallback to the default mem space. + case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: + case OMPAllocateDeclAttr::OMPCGroupMemAlloc: + case OMPAllocateDeclAttr::OMPHighBWMemAlloc: + case OMPAllocateDeclAttr::OMPLowLatMemAlloc: + case OMPAllocateDeclAttr::OMPThreadMemAlloc: + case OMPAllocateDeclAttr::OMPConstMemAlloc: + case OMPAllocateDeclAttr::OMPPTeamMemAlloc: + AS = LangAS::Default; + return true; + case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: + llvm_unreachable("Expected predefined allocator for the variables with the " + "static storage."); + } + return false; +} + CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( CodeGenModule &CGM) : CGM(CGM) { diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index 2896a659b98..7b2c0f1b914 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -1598,6 +1598,11 @@ class CGOpenMPRuntime { /// Perform check on requires decl to ensure that target architecture /// supports unified addressing virtual void checkArchForUnifiedAddressing(const OMPRequiresDecl *D) const {} + + /// Checks if the variable has associated OMPAllocateDeclAttr attribute with + /// the predefined allocator and translates it into the corresponding address + /// space. + virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS); }; /// Class supports emissionof SIMD-only code. diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 59066e8813d..7de16032269 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -4840,6 +4840,34 @@ unsigned CGOpenMPRuntimeNVPTX::getDefaultFirstprivateAddressSpace() const { return CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant); } +bool CGOpenMPRuntimeNVPTX::hasAllocateAttributeForGlobalVar(const VarDecl *VD, + LangAS &AS) { + if (!VD || !VD->hasAttr()) + return false; + const auto *A = VD->getAttr(); + switch(A->getAllocatorType()) { + case OMPAllocateDeclAttr::OMPDefaultMemAlloc: + // Not supported, fallback to the default mem space. + case OMPAllocateDeclAttr::OMPThreadMemAlloc: + case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: + case OMPAllocateDeclAttr::OMPCGroupMemAlloc: + case OMPAllocateDeclAttr::OMPHighBWMemAlloc: + case OMPAllocateDeclAttr::OMPLowLatMemAlloc: + AS = LangAS::Default; + return true; + case OMPAllocateDeclAttr::OMPConstMemAlloc: + AS = LangAS::cuda_constant; + return true; + case OMPAllocateDeclAttr::OMPPTeamMemAlloc: + AS = LangAS::cuda_shared; + return true; + case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: + llvm_unreachable("Expected predefined allocator for the variables with the " + "static storage."); + } + return false; +} + // Get current CudaArch and ignore any unknown values static CudaArch getCudaArch(CodeGenModule &CGM) { if (!CGM.getTarget().hasFeature("ptx")) diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index 8a92c500b8f..6709ae322a6 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -389,6 +389,11 @@ class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime { /// address space by default. unsigned getDefaultFirstprivateAddressSpace() const override; + /// Checks if the variable has associated OMPAllocateDeclAttr attribute with + /// the predefined allocator and translates it into the corresponding address + /// space. + bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override; + private: /// Track the execution mode when codegening directives within a target /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index 3a9df23a495..b9d4ee9f8c4 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -3387,6 +3387,11 @@ LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) { return LangAS::cuda_device; } + if (LangOpts.OpenMP) { + LangAS AS; + if (OpenMPRuntime->hasAllocateAttributeForGlobalVar(D, AS)) + return AS; + } return getTargetCodeGenInfo().getGlobalVarAddressSpace(*this, D); } diff --git a/test/OpenMP/nvptx_allocate_codegen.cpp b/test/OpenMP/nvptx_allocate_codegen.cpp new file mode 100644 index 00000000000..e9b9509334e --- /dev/null +++ b/test/OpenMP/nvptx_allocate_codegen.cpp @@ -0,0 +1,71 @@ +// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin10.6.0 -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc -o %t-host.bc %s +// RUN: %clang_cc1 -verify -fopenmp -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +#pragma omp declare target +typedef void **omp_allocator_handle_t; +extern const omp_allocator_handle_t omp_default_mem_alloc; +extern const omp_allocator_handle_t omp_large_cap_mem_alloc; +extern const omp_allocator_handle_t omp_const_mem_alloc; +extern const omp_allocator_handle_t omp_high_bw_mem_alloc; +extern const omp_allocator_handle_t omp_low_lat_mem_alloc; +extern const omp_allocator_handle_t omp_cgroup_mem_alloc; +extern const omp_allocator_handle_t omp_pteam_mem_alloc; +extern const omp_allocator_handle_t omp_thread_mem_alloc; + +// CHECK-DAG: @{{.+}}St1{{.+}}b{{.+}} = external global i32, +// CHECK-DAG: @a = global i32 0, +// CHECK-DAG: @b = addrspace(4) global i32 0, +// CHECK-DAG: @c = global i32 0, +// CHECK-DAG: @d = global %struct.St1 zeroinitializer, +// CHECK-DAG: @{{.+}}ns{{.+}}a{{.+}} = addrspace(3) global i32 0, +// CHECK-DAG: @{{.+}}main{{.+}}a{{.*}} = internal global i32 0, +// CHECK-DAG: @{{.+}}ST{{.+}}m{{.+}} = external global i32, +struct St{ + int a; +}; + +struct St1{ + int a; + static int b; +#pragma omp allocate(b) allocator(omp_default_mem_alloc) +} d; + +int a, b, c; +#pragma omp allocate(a) allocator(omp_large_cap_mem_alloc) +#pragma omp allocate(b) allocator(omp_const_mem_alloc) +#pragma omp allocate(d, c) allocator(omp_high_bw_mem_alloc) + +template +struct ST { + static T m; + #pragma omp allocate(m) allocator(omp_low_lat_mem_alloc) +}; + +template T foo() { + T v; + #pragma omp allocate(v) allocator(omp_cgroup_mem_alloc) + v = ST::m; + return v; +} + +namespace ns{ + int a; +} +#pragma omp allocate(ns::a) allocator(omp_pteam_mem_alloc) + +int main () { + static int a; +#pragma omp allocate(a) allocator(omp_thread_mem_alloc) + a=2; + double b = 3; +#pragma omp allocate(b) + return (foo()); +} + +extern template int ST::m; +#pragma omp end declare target +#endif From 40a2645eeea93efdd50b577577fd0db5ce95a90a Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Thu, 21 Mar 2019 19:44:17 +0000 Subject: [PATCH 06/38] Refactor handling of #include directives to cleanly separate the "skipped header because it should be imported as a module" cases from the "skipped header because of some other reason" cases. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356704 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Lex/PPDirectives.cpp | 82 ++++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 33 deletions(-) diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp index 8d4b9f06569..179b0cbf696 100644 --- a/lib/Lex/PPDirectives.cpp +++ b/lib/Lex/PPDirectives.cpp @@ -1813,26 +1813,26 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, return; } - // Should we enter the source file? Set to false if either the source file is + // Should we enter the source file? Set to Skip if either the source file is // known to have no effect beyond its effect on module visibility -- that is, - // if it's got an include guard that is already defined or is a modular header - // we've imported or already built. - bool ShouldEnter = true; + // if it's got an include guard that is already defined, set to Import if it + // is a modular header we've already built and should import. + enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter; if (PPOpts->SingleFileParseMode) - ShouldEnter = false; + Action = IncludeLimitReached; // If we've reached the max allowed include depth, it is usually due to an // include cycle. Don't enter already processed files again as it can lead to // reaching the max allowed include depth again. - if (ShouldEnter && HasReachedMaxIncludeDepth && File && + if (Action == Enter && HasReachedMaxIncludeDepth && File && HeaderInfo.getFileInfo(File).NumIncludes) - ShouldEnter = false; + Action = IncludeLimitReached; // Determine whether we should try to import the module for this #include, if // there is one. Don't do so if precompiled module support is disabled or we // are processing this module textually (because we're building the module). - if (ShouldEnter && File && SuggestedModule && getLangOpts().Modules && + if (Action == Enter && File && SuggestedModule && getLangOpts().Modules && !isForModuleBuilding(SuggestedModule.getModule(), getLangOpts().CurrentModule, getLangOpts().ModuleName)) { @@ -1872,9 +1872,9 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, assert((Imported == nullptr || Imported == SuggestedModule.getModule()) && "the imported module is different than the suggested one"); - if (Imported) - ShouldEnter = false; - else if (Imported.isMissingExpected()) { + if (Imported) { + Action = Import; + } else if (Imported.isMissingExpected()) { // We failed to find a submodule that we assumed would exist (because it // was in the directory of an umbrella header, for instance), but no // actual module containing it exists (because the umbrella header is @@ -1907,13 +1907,18 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, // Ask HeaderInfo if we should enter this #include file. If not, #including // this file will have no effect. - bool SkipHeader = false; - if (ShouldEnter && File && + if (Action == Enter && File && !HeaderInfo.ShouldEnterIncludeFile(*this, File, isImport, getLangOpts().Modules, SuggestedModule.getModule())) { - ShouldEnter = false; - SkipHeader = true; + // Even if we've already preprocessed this header once and know that we + // don't need to see its contents again, we still need to import it if it's + // modular because we might not have imported it from this submodule before. + // + // FIXME: We don't do this when compiling a PCH because the AST + // serialization layer can't cope with it. This means we get local + // submodule visibility semantics wrong in that case. + Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip; } if (Callbacks) { @@ -1922,8 +1927,9 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, HashLoc, IncludeTok, LangOpts.MSVCCompat ? NormalizedPath.c_str() : Filename, isAngled, FilenameRange, File, SearchPath, RelativePath, - ShouldEnter ? nullptr : SuggestedModule.getModule(), FileCharacter); - if (SkipHeader && !SuggestedModule.getModule()) + Action == Import ? SuggestedModule.getModule() : nullptr, + FileCharacter); + if (Action == Skip) Callbacks->FileSkipped(*File, FilenameTok, FileCharacter); } @@ -1968,28 +1974,33 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, } } - // If we don't need to enter the file, stop now. - if (!ShouldEnter) { + switch (Action) { + case Skip: + // If we don't need to enter the file, stop now. + return; + + case IncludeLimitReached: + // If we reached our include limit and don't want to enter any more files, + // don't go any further. + return; + + case Import: { // If this is a module import, make it visible if needed. - if (auto *M = SuggestedModule.getModule()) { - // When building a pch, -fmodule-name tells the compiler to textually - // include headers in the specified module. But it is possible that - // ShouldEnter is false because we are skipping the header. In that - // case, We are not importing the specified module. - if (SkipHeader && getLangOpts().CompilingPCH && - isForModuleBuilding(M, getLangOpts().CurrentModule, - getLangOpts().ModuleName)) - return; + Module *M = SuggestedModule.getModule(); + assert(M && "no module to import"); - makeModuleVisible(M, HashLoc); + makeModuleVisible(M, HashLoc); - if (IncludeTok.getIdentifierInfo()->getPPKeywordID() != - tok::pp___include_macros) - EnterAnnotationToken(DirectiveRange, tok::annot_module_include, M); - } + if (IncludeTok.getIdentifierInfo()->getPPKeywordID() != + tok::pp___include_macros) + EnterAnnotationToken(DirectiveRange, tok::annot_module_include, M); return; } + case Enter: + break; + } + // Check that we don't have infinite #include recursion. if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) { Diag(FilenameTok, diag::err_pp_include_too_deep); @@ -2024,6 +2035,11 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, // When building a pch, -fmodule-name tells the compiler to textually // include headers in the specified module. We are not building the // specified module. + // + // FIXME: This is the wrong way to handle this. We should produce a PCH + // that behaves the same as the header would behave in a compilation using + // that PCH, which means we should enter the submodule. We need to teach + // the AST serialization layer to deal with the resulting AST. if (getLangOpts().CompilingPCH && isForModuleBuilding(M, getLangOpts().CurrentModule, getLangOpts().ModuleName)) From 4fe8e717896ca103376be6a3cb30d648ef359e6a Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Thu, 21 Mar 2019 19:59:49 +0000 Subject: [PATCH 07/38] [CodeGen][ObjC] Annotate calls to objc_retainAutoreleasedReturnValue with notail on x86-64. On x86-64, the epilogue code inserted before the tail jump blocks the autoreleased return optimization. rdar://problem/38675807 Differential Revision: https://reviews.llvm.org/D59656 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356705 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGObjC.cpp | 27 +++++++----- lib/CodeGen/TargetInfo.cpp | 6 +++ lib/CodeGen/TargetInfo.h | 6 +++ test/CodeGenObjC/arc-blocks.m | 8 ++-- test/CodeGenObjC/arc-foreach.m | 2 +- test/CodeGenObjC/arc-literals.m | 6 +-- test/CodeGenObjC/arc-precise-lifetime.m | 16 +++---- test/CodeGenObjC/arc-property.m | 2 +- test/CodeGenObjC/arc-related-result-type.m | 2 +- test/CodeGenObjC/arc-ternary-op.m | 2 +- test/CodeGenObjC/arc-unsafeclaim.m | 10 ++--- test/CodeGenObjC/arc-with-atthrow.m | 2 +- test/CodeGenObjC/arc.m | 44 +++++++++---------- .../objc-arc-container-subscripting.m | 2 +- test/CodeGenObjC/os_log.m | 4 +- .../arc-forwarded-lambda-call.mm | 4 +- test/CodeGenObjCXX/arc.mm | 6 +-- .../inheriting-constructor-cleanup.mm | 2 +- test/CodeGenObjCXX/literals.mm | 8 ++-- 19 files changed, 88 insertions(+), 71 deletions(-) diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp index 561f21afdb1..69ced587957 100644 --- a/lib/CodeGen/CGObjC.cpp +++ b/lib/CodeGen/CGObjC.cpp @@ -1958,10 +1958,10 @@ static void setARCRuntimeFunctionLinkage(CodeGenModule &CGM, /// Perform an operation having the signature /// i8* (i8*) /// where a null input causes a no-op and returns null. -static llvm::Value * -emitARCValueOperation(CodeGenFunction &CGF, llvm::Value *value, - llvm::Type *returnType, llvm::Function *&fn, - llvm::Intrinsic::ID IntID, bool isTailCall = false) { +static llvm::Value *emitARCValueOperation( + CodeGenFunction &CGF, llvm::Value *value, llvm::Type *returnType, + llvm::Function *&fn, llvm::Intrinsic::ID IntID, + llvm::CallInst::TailCallKind tailKind = llvm::CallInst::TCK_None) { if (isa(value)) return value; @@ -1976,8 +1976,7 @@ emitARCValueOperation(CodeGenFunction &CGF, llvm::Value *value, // Call the function. llvm::CallInst *call = CGF.EmitNounwindRuntimeCall(fn, value); - if (isTailCall) - call->setTailCall(); + call->setTailCallKind(tailKind); // Cast the result back to the original type. return CGF.Builder.CreateBitCast(call, origType); @@ -2187,9 +2186,15 @@ static void emitAutoreleasedReturnValueMarker(CodeGenFunction &CGF) { llvm::Value * CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) { emitAutoreleasedReturnValueMarker(*this); - return emitARCValueOperation(*this, value, nullptr, - CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue, - llvm::Intrinsic::objc_retainAutoreleasedReturnValue); + llvm::CallInst::TailCallKind tailKind = + CGM.getTargetCodeGenInfo() + .shouldSuppressTailCallsOfRetainAutoreleasedReturnValue() + ? llvm::CallInst::TCK_NoTail + : llvm::CallInst::TCK_None; + return emitARCValueOperation( + *this, value, nullptr, + CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue, + llvm::Intrinsic::objc_retainAutoreleasedReturnValue, tailKind); } /// Claim a possibly-autoreleased return value at +0. This is only @@ -2326,7 +2331,7 @@ CodeGenFunction::EmitARCAutoreleaseReturnValue(llvm::Value *value) { return emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_autoreleaseReturnValue, llvm::Intrinsic::objc_autoreleaseReturnValue, - /*isTailCall*/ true); + llvm::CallInst::TCK_Tail); } /// Do a fused retain/autorelease of the given object. @@ -2336,7 +2341,7 @@ CodeGenFunction::EmitARCRetainAutoreleaseReturnValue(llvm::Value *value) { return emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_retainAutoreleaseReturnValue, llvm::Intrinsic::objc_retainAutoreleaseReturnValue, - /*isTailCall*/ true); + llvm::CallInst::TCK_Tail); } /// Do a fused retain/autorelease of the given object. diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index 63e991c9370..f48f19966e5 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -2268,6 +2268,12 @@ class X86_64TargetCodeGenInfo : public TargetCodeGenInfo { return static_cast(TargetCodeGenInfo::getABIInfo()); } + /// Disable tail call on x86-64. The epilogue code before the tail jump blocks + /// the autoreleaseRV/retainRV optimization. + bool shouldSuppressTailCallsOfRetainAutoreleasedReturnValue() const override { + return true; + } + int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { return 7; } diff --git a/lib/CodeGen/TargetInfo.h b/lib/CodeGen/TargetInfo.h index b5d5c1fb300..8a4154030ce 100644 --- a/lib/CodeGen/TargetInfo.h +++ b/lib/CodeGen/TargetInfo.h @@ -156,6 +156,12 @@ class TargetCodeGenInfo { return ""; } + /// Determine whether a call to objc_retainAutoreleasedReturnValue should be + /// marked as 'notail'. + virtual bool shouldSuppressTailCallsOfRetainAutoreleasedReturnValue() const { + return false; + } + /// Return a constant used by UBSan as a signature to identify functions /// possessing type information, or 0 if the platform is unsupported. virtual llvm::Constant * diff --git a/test/CodeGenObjC/arc-blocks.m b/test/CodeGenObjC/arc-blocks.m index 47e2723b511..e64a7e4c2fe 100644 --- a/test/CodeGenObjC/arc-blocks.m +++ b/test/CodeGenObjC/arc-blocks.m @@ -127,7 +127,7 @@ void test4(void) { // CHECK-NEXT: store i32 838860800, i32* [[T0]] // CHECK: [[SLOT:%.*]] = getelementptr inbounds [[BYREF_T]], [[BYREF_T]]* [[VAR]], i32 0, i32 6 // CHECK-NEXT: [[T0:%.*]] = call i8* @test4_source() - // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) + // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) // CHECK-NEXT: store i8* [[T1]], i8** [[SLOT]] // CHECK-NEXT: [[SLOT:%.*]] = getelementptr inbounds [[BYREF_T]], [[BYREF_T]]* [[VAR]], i32 0, i32 6 // 0x42800000 - has signature, copy/dispose helpers, as well as BLOCK_HAS_EXTENDED_LAYOUT @@ -181,7 +181,7 @@ void test5(void) { // CHECK-NEXT: [[VARPTR1:%.*]] = bitcast i8** [[VAR]] to i8* // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[VARPTR1]]) // CHECK: [[T0:%.*]] = call i8* @test5_source() - // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) + // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) // CHECK-NEXT: store i8* [[T1]], i8** [[VAR]], // CHECK-NEXT: call void @llvm.objc.release(i8* [[T1]]) // 0x40800000 - has signature but no copy/dispose, as well as BLOCK_HAS_EXTENDED_LAYOUT @@ -212,7 +212,7 @@ void test6(void) { // CHECK-NEXT: store i32 1107296256, i32* [[T0]] // CHECK: [[SLOT:%.*]] = getelementptr inbounds [[BYREF_T]], [[BYREF_T]]* [[VAR]], i32 0, i32 6 // CHECK-NEXT: [[T0:%.*]] = call i8* @test6_source() - // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) + // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) // CHECK-NEXT: call i8* @llvm.objc.initWeak(i8** [[SLOT]], i8* [[T1]]) // CHECK-NEXT: call void @llvm.objc.release(i8* [[T1]]) // CHECK-NEXT: [[SLOT:%.*]] = getelementptr inbounds [[BYREF_T]], [[BYREF_T]]* [[VAR]], i32 0, i32 6 @@ -258,7 +258,7 @@ void test7(void) { // CHECK: [[VAR:%.*]] = alloca i8*, // CHECK-NEXT: [[BLOCK:%.*]] = alloca [[BLOCK_T:<{.*}>]], // CHECK: [[T0:%.*]] = call i8* @test7_source() - // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) + // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) // CHECK-NEXT: call i8* @llvm.objc.initWeak(i8** [[VAR]], i8* [[T1]]) // CHECK-NEXT: call void @llvm.objc.release(i8* [[T1]]) // 0x42800000 - has signature, copy/dispose helpers, as well as BLOCK_HAS_EXTENDED_LAYOUT diff --git a/test/CodeGenObjC/arc-foreach.m b/test/CodeGenObjC/arc-foreach.m index c8c7120bae7..575bb0b711e 100644 --- a/test/CodeGenObjC/arc-foreach.m +++ b/test/CodeGenObjC/arc-foreach.m @@ -139,7 +139,7 @@ void test2(Test2 *a) { // CHECK-LP64-LABEL: define void @test2( // CHECK-LP64: [[T0:%.*]] = call [[ARRAY_T]]* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to [[ARRAY_T]]* (i8*, i8*)*)( // CHECK-LP64-NEXT: [[T1:%.*]] = bitcast [[ARRAY_T]]* [[T0]] to i8* -// CHECK-LP64-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) +// CHECK-LP64-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) // CHECK-LP64-NEXT: [[COLL:%.*]] = bitcast i8* [[T2]] to [[ARRAY_T]]* // Make sure it's not immediately released before starting the iteration. diff --git a/test/CodeGenObjC/arc-literals.m b/test/CodeGenObjC/arc-literals.m index a9cb7691380..2e613401d1c 100644 --- a/test/CodeGenObjC/arc-literals.m +++ b/test/CodeGenObjC/arc-literals.m @@ -59,7 +59,7 @@ void test_array(id a, id b) { // CHECK-NEXT: [[T1:%.*]] = bitcast [[CLASS_T]]* [[T0]] to i8* // CHECK-NEXT: [[T2:%.*]] = bitcast [2 x i8*]* [[OBJECTS]] to i8** // CHECK-NEXT: [[T3:%.*]] = call i8* bitcast ({{.*@objc_msgSend.*}})(i8* [[T1]], i8* [[SEL]], i8** [[T2]], i64 2) - // CHECK-NEXT: [[T4:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T3]]) + // CHECK-NEXT: [[T4:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T3]]) // CHECK: call void (...) @llvm.objc.clang.arc.use(i8* [[V0]], i8* [[V1]]) id arr = @[a, b]; @@ -103,7 +103,7 @@ void test_dictionary(id k1, id o1, id k2, id o2) { // CHECK-NEXT: [[T2:%.*]] = bitcast [2 x i8*]* [[OBJECTS]] to i8** // CHECK-NEXT: [[T3:%.*]] = bitcast [2 x i8*]* [[KEYS]] to i8** // CHECK-NEXT: [[T4:%.*]] = call i8* bitcast ({{.*@objc_msgSend.*}})(i8* [[T1]], i8* [[SEL]], i8** [[T2]], i8** [[T3]], i64 2) - // CHECK-NEXT: [[T5:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T4]]) + // CHECK-NEXT: [[T5:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T4]]) // CHECK-NEXT: call void (...) @llvm.objc.clang.arc.use(i8* [[V0]], i8* [[V1]], i8* [[V2]], i8* [[V3]]) id dict = @{ k1 : o1, k2 : o2 }; @@ -135,7 +135,7 @@ void test_property(B *b) { // CHECK-NEXT: [[T1:%.*]] = bitcast // CHECK-NEXT: [[T2:%.*]] = call [[B:%.*]]* bitcast ({{.*}} @objc_msgSend to {{.*}})(i8* [[T1]], i8* [[SEL]]) // CHECK-NEXT: [[T3:%.*]] = bitcast [[B]]* [[T2]] to i8* - // CHECK-NEXT: [[T4:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T3]]) + // CHECK-NEXT: [[T4:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T3]]) // CHECK-NEXT: [[V0:%.*]] = bitcast i8* [[T4]] to [[B]]* // CHECK-NEXT: [[V1:%.*]] = bitcast [[B]]* [[V0]] to i8* diff --git a/test/CodeGenObjC/arc-precise-lifetime.m b/test/CodeGenObjC/arc-precise-lifetime.m index 4c563c5b844..bbde18ee6ca 100644 --- a/test/CodeGenObjC/arc-precise-lifetime.m +++ b/test/CodeGenObjC/arc-precise-lifetime.m @@ -43,7 +43,7 @@ void test1a_message(void) { // CHECK: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[PTRPTR1]]) // CHECK: [[T0:%.*]] = call [[TEST1:%.*]]* @test1_helper() // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8* - // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) + // CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]* // CHECK-NEXT: store [[TEST1]]* [[T3]] // CHECK-NEXT: [[CPTR1:%.*]] = bitcast i8** [[C]] to i8* @@ -77,7 +77,7 @@ void test1a_property(void) { // CHECK: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[PTRPTR1]]) // CHECK: [[T0:%.*]] = call [[TEST1:%.*]]* @test1_helper() // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8* - // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) + // CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]* // CHECK-NEXT: store [[TEST1]]* [[T3]] // CHECK-NEXT: [[CPTR1:%.*]] = bitcast i8** [[C]] to i8* @@ -111,7 +111,7 @@ void test1b_message(void) { // CHECK: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[PTRPTR1]]) // CHECK: [[T0:%.*]] = call [[TEST1:%.*]]* @test1_helper() // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8* - // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) + // CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]* // CHECK-NEXT: store [[TEST1]]* [[T3]] // CHECK-NEXT: [[CPTR1:%.*]] = bitcast i8** [[C]] to i8* @@ -142,7 +142,7 @@ void test1b_property(void) { // CHECK: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[PTRPTR1]]) // CHECK: [[T0:%.*]] = call [[TEST1:%.*]]* @test1_helper() // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8* - // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) + // CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]* // CHECK-NEXT: store [[TEST1]]* [[T3]] // CHECK-NEXT: [[CPTR1:%.*]] = bitcast i8** [[C]] to i8* @@ -173,7 +173,7 @@ void test1c_message(void) { // CHECK: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[PTRPTR1]]) // CHECK: [[T0:%.*]] = call [[TEST1:%.*]]* @test1_helper() // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8* - // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) + // CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]* // CHECK-NEXT: store [[TEST1]]* [[T3]] // CHECK-NEXT: [[PCPTR1:%.*]] = bitcast i8** [[PC]] to i8* @@ -206,7 +206,7 @@ void test1c_property(void) { // CHECK: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[PTRPTR1]]) // CHECK: [[T0:%.*]] = call [[TEST1:%.*]]* @test1_helper() // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8* - // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) + // CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]* // CHECK-NEXT: store [[TEST1]]* [[T3]] // CHECK-NEXT: [[PCPTR1:%.*]] = bitcast i8** [[PC]] to i8* @@ -239,7 +239,7 @@ void test1d_message(void) { // CHECK: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[PTRPTR1]]) // CHECK: [[T0:%.*]] = call [[TEST1:%.*]]* @test1_helper() // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8* - // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) + // CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]* // CHECK-NEXT: store [[TEST1]]* [[T3]] // CHECK-NEXT: [[PCPTR1:%.*]] = bitcast i8** [[PC]] to i8* @@ -269,7 +269,7 @@ void test1d_property(void) { // CHECK: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[PTRPTR1]]) // CHECK: [[T0:%.*]] = call [[TEST1:%.*]]* @test1_helper() // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8* - // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) + // CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]* // CHECK-NEXT: store [[TEST1]]* [[T3]] // CHECK-NEXT: [[PCPTR1:%.*]] = bitcast i8** [[PC]] to i8* diff --git a/test/CodeGenObjC/arc-property.m b/test/CodeGenObjC/arc-property.m index b417f9505e1..6e4cbb5944e 100644 --- a/test/CodeGenObjC/arc-property.m +++ b/test/CodeGenObjC/arc-property.m @@ -126,7 +126,7 @@ - (id) copyMachine { } // CHECK: define internal i8* @"\01-[Test3 copyMachine]"( // CHECK: [[T0:%.*]] = call i8* @test3_helper() -// CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) +// CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) // CHECK-NEXT: ret i8* [[T1]] - (void) setCopyMachine: (id) x {} @end diff --git a/test/CodeGenObjC/arc-related-result-type.m b/test/CodeGenObjC/arc-related-result-type.m index ac69e0d9f64..1d3805bf9bb 100644 --- a/test/CodeGenObjC/arc-related-result-type.m +++ b/test/CodeGenObjC/arc-related-result-type.m @@ -17,7 +17,7 @@ void test0(Test0 *val) { // CHECK-NEXT: load // CHECK-NEXT: bitcast // CHECK-NEXT: [[T0:%.*]] = call i8* bitcast ( -// CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) +// CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) // CHECK-NEXT: [[T2:%.*]] = bitcast i8* [[T1]] to [[TEST0]]* // CHECK-NEXT: store [[TEST0]]* [[T2]], [[TEST0]]** [[X]] // CHECK-NEXT: [[T0:%.*]] = bitcast [[TEST0]]** [[X]] to i8** diff --git a/test/CodeGenObjC/arc-ternary-op.m b/test/CodeGenObjC/arc-ternary-op.m index 2be04628464..4883143791a 100644 --- a/test/CodeGenObjC/arc-ternary-op.m +++ b/test/CodeGenObjC/arc-ternary-op.m @@ -130,7 +130,7 @@ void test2(int cond) { // CHECK-NEXT: br i1 // Within true branch, cleanup enabled. // CHECK: [[T0:%.*]] = call i8* @test2_producer() - // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) + // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) // CHECK-NEXT: store i8* [[T1]], i8** [[CLEANUP_SAVE]] // CHECK-NEXT: store i1 true, i1* [[RUN_CLEANUP]] // CHECK-NEXT: br label diff --git a/test/CodeGenObjC/arc-unsafeclaim.m b/test/CodeGenObjC/arc-unsafeclaim.m index fd063bfccf9..f5982ddc04d 100644 --- a/test/CodeGenObjC/arc-unsafeclaim.m +++ b/test/CodeGenObjC/arc-unsafeclaim.m @@ -41,7 +41,7 @@ void test_assign() { // DISABLED: [[T0:%.*]] = call [[A:.*]]* @makeA() // DISABLED-MARKED-NEXT: call void asm sideeffect // DISABLED-NEXT: [[T1:%.*]] = bitcast [[A]]* [[T0]] to i8* -// DISABLED-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) +// DISABLED-NEXT: [[T2:%.*]] = {{.*}}call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) void test_assign_assign() { __unsafe_unretained id x, y; @@ -75,7 +75,7 @@ void test_strong_assign_assign() { // CHECK: [[T0:%.*]] = call [[A]]* @makeA() // CHECK-MARKED-NEXT: call void asm sideeffect // CHECK-NEXT: [[T1:%.*]] = bitcast [[A]]* [[T0]] to i8* -// CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) +// CHECK-NEXT: [[T2:%.*]] = {{.*}}call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[A]]* // CHECK-NEXT: [[T4:%.*]] = bitcast [[A]]* [[T3]] to i8* // CHECK-NEXT: store i8* [[T4]], i8** [[Y]] @@ -102,7 +102,7 @@ void test_assign_strong_assign() { // CHECK: [[T0:%.*]] = call [[A]]* @makeA() // CHECK-MARKED-NEXT: call void asm sideeffect // CHECK-NEXT: [[T1:%.*]] = bitcast [[A]]* [[T0]] to i8* -// CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) +// CHECK-NEXT: [[T2:%.*]] = {{.*}}call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[A]]* // CHECK-NEXT: [[T4:%.*]] = bitcast [[A]]* [[T3]] to i8* // CHECK-NEXT: [[OLD:%.*]] = load i8*, i8** [[Y]] @@ -165,7 +165,7 @@ void test_strong_init_assignment() { // CHECK: [[T0:%.*]] = call [[A]]* @makeA() // CHECK-MARKED-NEXT: call void asm sideeffect // CHECK-NEXT: [[T1:%.*]] = bitcast [[A]]* [[T0]] to i8* -// CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) +// CHECK-NEXT: [[T2:%.*]] = {{.*}}call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[A]]* // CHECK-NEXT: [[T4:%.*]] = bitcast [[A]]* [[T3]] to i8* // CHECK-NEXT: store i8* [[T4]], i8** [[X]] @@ -189,7 +189,7 @@ void test_init_strong_assignment() { // CHECK: [[T0:%.*]] = call [[A]]* @makeA() // CHECK-MARKED-NEXT: call void asm sideeffect // CHECK-NEXT: [[T1:%.*]] = bitcast [[A]]* [[T0]] to i8* -// CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) +// CHECK-NEXT: [[T2:%.*]] = {{.*}}call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[A]]* // CHECK-NEXT: [[T4:%.*]] = bitcast [[A]]* [[T3]] to i8* // CHECK-NEXT: [[OLD:%.*]] = load i8*, i8** [[X]] diff --git a/test/CodeGenObjC/arc-with-atthrow.m b/test/CodeGenObjC/arc-with-atthrow.m index 93fa228c8b0..c5e9dc92beb 100644 --- a/test/CodeGenObjC/arc-with-atthrow.m +++ b/test/CodeGenObjC/arc-with-atthrow.m @@ -11,7 +11,7 @@ void test() { // CHECK-LABEL: define void @test() // CHECK: [[T0:%.*]] = call i8* @make() -// CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) +// CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.autorelease(i8* [[T1]]) // CHECK-NEXT: call void @objc_exception_throw(i8* [[T2]]) [[NR:#[0-9]+]] // CHECK-NEXT: unreachable diff --git a/test/CodeGenObjC/arc.m b/test/CodeGenObjC/arc.m index cbdb03205a6..bfabfb9349a 100644 --- a/test/CodeGenObjC/arc.m +++ b/test/CodeGenObjC/arc.m @@ -318,13 +318,13 @@ void test10() { // CHECK-NEXT: bitcast // CHECK-NEXT: [[T0:%.*]] = call [[TEST10]]* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST10]]* [[T0]] to i8* - // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) + // CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) // CHECK-NEXT: [[V:%.*]] = bitcast i8* [[T2]] to [[TEST10]]* // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_{{[0-9]*}} // CHECK-NEXT: bitcast // CHECK-NEXT: [[T0:%.*]] = call [[TEST10]]* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST10]]* [[T0]] to i8* - // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) + // CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST10]]* // CHECK-NEXT: [[T4:%.*]] = bitcast [[TEST10]]* [[T3]] to i8* // CHECK-NEXT: store i8* [[T4]], i8** [[Y]] @@ -371,13 +371,13 @@ void test12(void) { // CHECK-NEXT: [[XPTR1:%.*]] = bitcast i8** [[X]] to i8* // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[XPTR1]]) // CHECK-NEXT: [[T0:%.*]] = call i8* @test12_helper() - // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) + // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) // CHECK-NEXT: call i8* @llvm.objc.initWeak(i8** [[X]], i8* [[T1]]) // CHECK-NEXT: call void @llvm.objc.release(i8* [[T1]]) x = test12_helper(); // CHECK-NEXT: [[T0:%.*]] = call i8* @test12_helper() - // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) + // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) // CHECK-NEXT: call i8* @llvm.objc.storeWeak(i8** [[X]], i8* [[T1]]) // CHECK-NEXT: call void @llvm.objc.release(i8* [[T1]]) @@ -514,7 +514,7 @@ void test19() { x[2] = test19_helper(); // CHECK-NEXT: [[CALL:%.*]] = call i8* @test19_helper() - // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[CALL]]) [[NUW]] + // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[CALL]]) [[NUW]] // CHECK-NEXT: [[SLOT:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[X]], i64 0, i64 2 // CHECK-NEXT: [[T0:%.*]] = load i8*, i8** [[SLOT]] // CHECK-NEXT: store i8* [[T1]], i8** [[SLOT]] @@ -876,7 +876,7 @@ - (Test30_helper*) initHelper { __attribute__((ns_returns_retained)) id test32(void) { // CHECK-LABEL: define i8* @test32() // CHECK: [[CALL:%.*]] = call i8* @test32_helper() -// CHECK-NEXT: [[T0:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[CALL]]) +// CHECK-NEXT: [[T0:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[CALL]]) // CHECK-NEXT: ret i8* [[T0]] extern id test32_helper(void); return test32_helper(); @@ -1045,7 +1045,7 @@ - (id) test __attribute__((ns_returns_retained)) { extern id test43_produce(void); return test43_produce(); // CHECK: call i8* @test43_produce() - // CHECK-NEXT: call i8* @llvm.objc.retainAutoreleasedReturnValue( + // CHECK-NEXT: notail call i8* @llvm.objc.retainAutoreleasedReturnValue( // CHECK-NEXT: ret } @end @@ -1067,7 +1067,7 @@ void test46(__weak id *wp, __weak volatile id *wvp) { // TODO: this is sub-optimal, we should retain at the actual call site. // CHECK: [[T0:%.*]] = call i8* @test46_helper() - // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) + // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) // CHECK-NEXT: [[T2:%.*]] = load i8**, i8*** {{%.*}}, align 8 // CHECK-NEXT: [[T3:%.*]] = call i8* @llvm.objc.storeWeak(i8** [[T2]], i8* [[T1]]) // CHECK-NEXT: [[T4:%.*]] = call i8* @llvm.objc.retain(i8* [[T3]]) @@ -1076,7 +1076,7 @@ void test46(__weak id *wp, __weak volatile id *wvp) { id x = *wp = test46_helper(); // CHECK: [[T0:%.*]] = call i8* @test46_helper() - // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) + // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) // CHECK-NEXT: [[T2:%.*]] = load i8**, i8*** {{%.*}}, align 8 // CHECK-NEXT: [[T3:%.*]] = call i8* @llvm.objc.storeWeak(i8** [[T2]], i8* [[T1]]) // CHECK-NEXT: [[T4:%.*]] = call i8* @llvm.objc.retain(i8* [[T3]]) @@ -1096,7 +1096,7 @@ void test47(void) { // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[XPTR1]]) // CHECK-NEXT: store i8* null, i8** [[X]] // CHECK-NEXT: [[CALL:%.*]] = call i8* @test47_helper() - // CHECK-NEXT: [[T0:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[CALL]]) + // CHECK-NEXT: [[T0:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[CALL]]) // CHECK-NEXT: [[T1:%.*]] = load i8*, i8** [[X]] // CHECK-NEXT: store i8* [[T0]], i8** [[X]] // CHECK-NEXT: call void @llvm.objc.release(i8* [[T1]]) @@ -1120,7 +1120,7 @@ void test48(void) { // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[XPTR1]]) // CHECK-NEXT: [[T0:%.*]] = call i8* @llvm.objc.initWeak(i8** [[X]], i8* null) // CHECK-NEXT: [[T1:%.*]] = call i8* @test48_helper() - // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) + // CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) // CHECK-NEXT: [[T3:%.*]] = call i8* @llvm.objc.storeWeak(i8** [[X]], i8* [[T2]]) // CHECK-NEXT: [[T4:%.*]] = call i8* @llvm.objc.storeWeak(i8** [[X]], i8* [[T3]]) // CHECK-NEXT: call void @llvm.objc.release(i8* [[T2]]) @@ -1139,7 +1139,7 @@ void test49(void) { // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[XPTR1]]) // CHECK-NEXT: store i8* null, i8** [[X]] // CHECK-NEXT: [[CALL:%.*]] = call i8* @test49_helper() - // CHECK-NEXT: [[T0:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[CALL]]) + // CHECK-NEXT: [[T0:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[CALL]]) // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.autorelease(i8* [[T0]]) // CHECK-NEXT: store i8* [[T2]], i8** [[X]] // CHECK-NEXT: [[T3:%.*]] = call i8* @llvm.objc.retainAutorelease(i8* [[T1]]) @@ -1208,7 +1208,7 @@ void test53(void) { // CHECK-NEXT: [[YPTR1:%.*]] = bitcast i8** [[Y]] to i8* // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[YPTR1]]) // CHECK-NEXT: [[T0:%.*]] = call i8* @test53_helper() -// CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) +// CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) // CHECK-NEXT: store i8* [[T1]], i8** [[Y]], // CHECK-NEXT: [[T0:%.*]] = load i8*, i8** [[Y]], // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retain(i8* [[T0]]) @@ -1261,7 +1261,7 @@ @implementation Test56 + (id) make { extern id test56_helper(void); // CHECK: [[T0:%.*]] = call i8* @test56_helper() - // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) + // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) // CHECK-NEXT: ret i8* [[T1]] return test56_helper(); } @@ -1328,7 +1328,7 @@ void test59(void) { // CHECK-LABEL: define void @test59() // CHECK: [[T0:%.*]] = call i8* @test59_getlock() - // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) + // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) // CHECK-NEXT: call i32 @objc_sync_enter(i8* [[T1]]) // CHECK-NEXT: call void @test59_body() // CHECK-NEXT: call i32 @objc_sync_exit(i8* [[T1]]) @@ -1350,7 +1350,7 @@ void test61(void) { extern id test61_make(void); // CHECK-NEXT: [[T0:%.*]] = call i8* @test61_make() - // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) + // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) // CHECK-NEXT: [[T2:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_ // CHECK-NEXT: [[T3:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_ // CHECK-NEXT: [[T4:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* [[T1]], i8* [[T3]], i8* [[T2]]) @@ -1360,11 +1360,11 @@ void test61(void) { // CHECK-NEXT: [[YPTR1:%.*]] = bitcast i8** [[Y]] to i8* // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[YPTR1]]) // CHECK-NEXT: [[T0:%.*]] = call i8* @test61_make() - // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) + // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) // CHECK-NEXT: [[T2:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_ // CHECK-NEXT: [[T3:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_ // CHECK-NEXT: [[T4:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* [[T1]], i8* [[T3]], i8* [[T2]]) - // CHECK-NEXT: [[T5:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T4]]) + // CHECK-NEXT: [[T5:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T4]]) // CHECK-NEXT: store i8* [[T5]], i8** [[Y]] // CHECK-NEXT: call void @llvm.objc.release(i8* [[T1]]) id y = [test61_make() performSelector: @selector(test61_id)]; @@ -1400,7 +1400,7 @@ void test62(void) { // CHECK-NEXT: store i1 false, i1* [[CLEANUP_REQUIRED]] // CHECK-NEXT: br i1 [[T1]], // CHECK: [[T0:%.*]] = call i8* @test62_make() - // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) + // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) // CHECK-NEXT: store i8* [[T1]], i8** [[CLEANUP_VALUE]] // CHECK-NEXT: store i1 true, i1* [[CLEANUP_REQUIRED]] // CHECK-NEXT: [[T2:%.*]] = icmp ne i8* [[T1]], null @@ -1455,10 +1455,10 @@ void test66(void) { // CHECK-LABEL: define void @test66() // CHECK: [[T0:%.*]] = call [[TEST66:%.*]]* @test66_receiver() // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST66]]* [[T0]] to i8* -// CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) +// CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST66]]* // CHECK-NEXT: [[T4:%.*]] = call i8* @test66_arg() -// CHECK-NEXT: [[T5:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T4]]) +// CHECK-NEXT: [[T5:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T4]]) // CHECK-NEXT: [[T6:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES // CHECK-NEXT: [[T7:%.*]] = bitcast [[TEST66]]* [[T3]] to i8* // CHECK-NEXT: [[SIX:%.*]] = icmp eq i8* [[T7]], null @@ -1495,7 +1495,7 @@ void test68(void) { // CHECK-NEXT: [[CLPTR1:%.*]] = bitcast i8** [[CL]] to i8* // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[CLPTR1]]) // CHECK-NEXT: [[T0:%.*]] = call i8* @test67_helper() -// CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) +// CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) // CHECK-NEXT: store i8* [[T1]], i8** [[CL]], align 8 // CHECK-NEXT: [[T2:%.*]] = load i8*, i8** [[CL]] // CHECK-NEXT: call void @llvm.objc.release(i8* [[T2]]) diff --git a/test/CodeGenObjC/objc-arc-container-subscripting.m b/test/CodeGenObjC/objc-arc-container-subscripting.m index 339415e3c05..2f6062d7ce6 100644 --- a/test/CodeGenObjC/objc-arc-container-subscripting.m +++ b/test/CodeGenObjC/objc-arc-container-subscripting.m @@ -12,7 +12,7 @@ id func() { } // CHECK: [[call:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend -// CHECK: [[SIX:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[call]]) [[NUW:#[0-9]+]] +// CHECK: [[SIX:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[call]]) [[NUW:#[0-9]+]] // CHECK: [[ARRAY_CASTED:%.*]] = bitcast %0** {{%.*}} to i8** // CHECK: call void @llvm.objc.storeStrong(i8** [[ARRAY_CASTED]], i8* null) // CHECK: [[EIGHT:%.*]] = tail call i8* @llvm.objc.autoreleaseReturnValue(i8* [[SIX]]) [[NUW]] diff --git a/test/CodeGenObjC/os_log.m b/test/CodeGenObjC/os_log.m index 150987b0820..15999c6315d 100644 --- a/test/CodeGenObjC/os_log.m +++ b/test/CodeGenObjC/os_log.m @@ -21,7 +21,7 @@ // CHECK: %[[CALL:.*]] = tail call %[[TY0:.*]]* (...) @GenString() // CHECK: %[[V0:.*]] = bitcast %[[TY0]]* %[[CALL]] to i8* - // CHECK: %[[V1:.*]] = tail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* %[[V0]]) + // CHECK: %[[V1:.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* %[[V0]]) // CHECK: %[[V2:.*]] = ptrtoint %[[TY0]]* %[[CALL]] to i64 // CHECK: store i8 2, i8* %[[BUF]], align 1 // CHECK: %[[NUMARGS_I:.*]] = getelementptr i8, i8* %[[BUF]], i64 1 @@ -45,7 +45,7 @@ // CHECK-O0: %[[V0:.*]] = load i8*, i8** %[[BUF_ADDR]], align 8 // CHECK-O0: %[[CALL:.*]] = call %[[TY0:.*]]* (...) @GenString() // CHECK-O0: %[[V1:.*]] = bitcast %[[TY0]]* %[[CALL]] to i8* - // CHECK-O0: %[[V2:.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* %[[V1]]) + // CHECK-O0: %[[V2:.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* %[[V1]]) // CHECK-O0: %[[V3:.*]] = bitcast i8* %[[V2]] to %[[TY0]]* // CHECK-O0: %[[V4:.*]] = ptrtoint %[[TY0]]* %[[V3]] to i64 // CHECK-O0: call void @__os_log_helper_1_2_1_8_64(i8* %[[V0]], i64 %[[V4]]) diff --git a/test/CodeGenObjCXX/arc-forwarded-lambda-call.mm b/test/CodeGenObjCXX/arc-forwarded-lambda-call.mm index 5a5cb42067d..37a68136dd5 100644 --- a/test/CodeGenObjCXX/arc-forwarded-lambda-call.mm +++ b/test/CodeGenObjCXX/arc-forwarded-lambda-call.mm @@ -5,7 +5,7 @@ void test0(id x) { test0_helper([=]() { return x; }); // CHECK-LABEL: define internal i8* @___Z5test0P11objc_object_block_invoke // CHECK: [[T0:%.*]] = call i8* @"_ZZ5test0P11objc_objectENK3$_0clEv" - // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) + // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) // CHECK-NEXT: [[T2:%.*]] = tail call i8* @llvm.objc.autoreleaseReturnValue(i8* [[T1]]) // CHECK-NEXT: ret i8* [[T2]] } @@ -28,7 +28,7 @@ void test1() { test1_helper([](){ return test1_rv; }); // CHECK-LABEL: define internal i8* @"_ZZ5test1vEN3$_18__invokeEv" // CHECK: [[T0:%.*]] = call i8* @"_ZZ5test1vENK3$_1clEv" - // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) + // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) // CHECK-NEXT: [[T2:%.*]] = tail call i8* @llvm.objc.autoreleaseReturnValue(i8* [[T1]]) // CHECK-NEXT: ret i8* [[T2]] } diff --git a/test/CodeGenObjCXX/arc.mm b/test/CodeGenObjCXX/arc.mm index e32c1f89215..f351ff6089c 100644 --- a/test/CodeGenObjCXX/arc.mm +++ b/test/CodeGenObjCXX/arc.mm @@ -20,7 +20,7 @@ void test0(__weak id *wp, __weak volatile id *wvp) { // TODO: in the non-volatile case, we do not need to be reloading. // CHECK: [[T0:%.*]] = call i8* @_Z12test0_helperv() - // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) + // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) // CHECK-NEXT: [[T2:%.*]] = load i8**, i8*** {{%.*}}, align 8 // CHECK-NEXT: [[T3:%.*]] = call i8* @llvm.objc.storeWeak(i8** [[T2]], i8* [[T1]]) // CHECK-NEXT: [[T4:%.*]] = call i8* @llvm.objc.retain(i8* [[T3]]) @@ -29,7 +29,7 @@ void test0(__weak id *wp, __weak volatile id *wvp) { id x = *wp = test0_helper(); // CHECK: [[T0:%.*]] = call i8* @_Z12test0_helperv() - // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) + // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]]) // CHECK-NEXT: [[T2:%.*]] = load i8**, i8*** {{%.*}}, align 8 // CHECK-NEXT: [[T3:%.*]] = call i8* @llvm.objc.storeWeak(i8** [[T2]], i8* [[T1]]) // CHECK-NEXT: [[T4:%.*]] = call i8* @llvm.objc.loadWeakRetained(i8** [[T2]]) @@ -224,7 +224,7 @@ - (NSArray *) array; // CHECK-LABEL: define weak_odr void @_Z6test37I6Test37EvPT_( // CHECK: [[T0:%.*]] = call [[NSARRAY]]* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to [[NSARRAY]]* (i8*, i8*)*)( // CHECK-NEXT: [[T1:%.*]] = bitcast [[NSARRAY]]* [[T0]] to i8* -// CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) +// CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]]) // CHECK-NEXT: [[COLL:%.*]] = bitcast i8* [[T2]] to [[NSARRAY]]* // Make sure it's not immediately released before starting the iteration. diff --git a/test/CodeGenObjCXX/inheriting-constructor-cleanup.mm b/test/CodeGenObjCXX/inheriting-constructor-cleanup.mm index 229e84a0f56..a7770e07e42 100644 --- a/test/CodeGenObjCXX/inheriting-constructor-cleanup.mm +++ b/test/CodeGenObjCXX/inheriting-constructor-cleanup.mm @@ -23,7 +23,7 @@ void f() { } // CHECK-LABEL: define void @_Z1fv // CHECK: %[[TMP:.*]] = call i8* @_Z1gv() -// CHECK: {{.*}} = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* %[[TMP]]) +// CHECK: {{.*}} = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* %[[TMP]]) // CHECK: call void (%struct.Base*, i8*, ...) @_ZN4BaseC2E6Strongz(%struct.Base* {{.*}}, i8* {{.*}}) // CHECK-NEXT: call void @_ZN9InheritorD1Ev(%struct.Inheritor* {{.*}}) diff --git a/test/CodeGenObjCXX/literals.mm b/test/CodeGenObjCXX/literals.mm index 0a14d330bdb..612d12dd135 100644 --- a/test/CodeGenObjCXX/literals.mm +++ b/test/CodeGenObjCXX/literals.mm @@ -29,7 +29,7 @@ void test_array() { // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[TMP_CAST]]) // CHECK-NEXT: call void @_ZN1XC1Ev({{.*}} [[TMPX]]) // CHECK-NEXT: [[OBJECT0:%[a-zA-Z0-9.]+]] = invoke i8* @_ZNK1XcvP11objc_objectEv - // CHECK: [[RET0:%[a-zA-Z0-9.]+]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[OBJECT0]]) + // CHECK: [[RET0:%[a-zA-Z0-9.]+]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[OBJECT0]]) // CHECK: store i8* [[RET0]], i8** [[ELEMENT0]] // Initializing the second element @@ -38,7 +38,7 @@ void test_array() { // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[TMP_CAST]]) // CHECK-NEXT: invoke void @_ZN1YC1Ev({{.*}} [[TMPY]]) // CHECK: [[OBJECT1:%[a-zA-Z0-9.]+]] = invoke i8* @_ZNK1YcvP11objc_objectEv - // CHECK: [[RET1:%[a-zA-Z0-9.]+]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[OBJECT1]]) + // CHECK: [[RET1:%[a-zA-Z0-9.]+]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[OBJECT1]]) // CHECK: store i8* [[RET1]], i8** [[ELEMENT1]] // Build the array @@ -83,14 +83,14 @@ void test_array_instantiation() { // CHECK: [[ELEMENT0:%[a-zA-Z0-9.]+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OBJECTS]], i64 0, i64 0 // CHECK: call void @_ZN1XC1Ev // CHECK-NEXT: [[OBJECT0:%[a-zA-Z0-9.]+]] = invoke i8* @_ZNK1XcvP11objc_objectEv - // CHECK: [[RET0:%[a-zA-Z0-9.]+]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[OBJECT0]]) + // CHECK: [[RET0:%[a-zA-Z0-9.]+]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[OBJECT0]]) // CHECK: store i8* [[RET0]], i8** [[ELEMENT0]] // Initializing the second element // CHECK: [[ELEMENT1:%[a-zA-Z0-9.]+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OBJECTS]], i64 0, i64 1 // CHECK: invoke void @_ZN1YC1Ev // CHECK: [[OBJECT1:%[a-zA-Z0-9.]+]] = invoke i8* @_ZNK1YcvP11objc_objectEv - // CHECK: [[RET1:%[a-zA-Z0-9.]+]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[OBJECT1]]) + // CHECK: [[RET1:%[a-zA-Z0-9.]+]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[OBJECT1]]) // CHECK: store i8* [[RET1]], i8** [[ELEMENT1]] // Build the array From 10535f02fa4221be9866d5b5572b622025e149bb Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 21 Mar 2019 20:07:24 +0000 Subject: [PATCH 08/38] [Driver] Pass -malign-double from the driver to the cc1 command line -malign-double is currently only implemented in the -cc1 interface. But its declared in Options.td so it is a driver option too. But you try to use it with the driver you'll get a message about the option being unused. This patch teaches the driver to pass the option through to cc1 so it won't be unused. The Options.td says the option is x86 only but I didn't see any x86 specific code in its impementation in cc1 so not sure if the documentation is wrong or if I should only pass this option through the driver on x86 targets. Differential Revision: https://reviews.llvm.org/D59624 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356706 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Driver/ToolChains/Clang.cpp | 1 + test/Driver/malign_double.c | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 test/Driver/malign_double.c diff --git a/lib/Driver/ToolChains/Clang.cpp b/lib/Driver/ToolChains/Clang.cpp index bf9ab9dd107..20f55dff5ab 100644 --- a/lib/Driver/ToolChains/Clang.cpp +++ b/lib/Driver/ToolChains/Clang.cpp @@ -4547,6 +4547,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_parseable_fixits); Args.AddLastArg(CmdArgs, options::OPT_ftime_report); Args.AddLastArg(CmdArgs, options::OPT_ftrapv); + Args.AddLastArg(CmdArgs, options::OPT_malign_double); if (Arg *A = Args.getLastArg(options::OPT_ftrapv_handler_EQ)) { CmdArgs.push_back("-ftrapv-handler"); diff --git a/test/Driver/malign_double.c b/test/Driver/malign_double.c new file mode 100644 index 00000000000..2c5cc35ea19 --- /dev/null +++ b/test/Driver/malign_double.c @@ -0,0 +1,5 @@ +// RUN: %clang -### -malign-double %s 2>&1 | FileCheck %s + +// Make sure -malign-double is passed through the driver. + +// CHECK: "-malign-double" From d311fb0d8ab0cbd7d5abb3da427188dd42b04583 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 21 Mar 2019 20:36:08 +0000 Subject: [PATCH 09/38] [X86] Correct the value of MaxAtomicInlineWidth for pre-586 cpus Use the new cx8 feature flag that was added to the backend to represent support for cmpxchg8b. Use this flag to set the MaxAtomicInlineWidth. This also assumes all the cmpxchg instructions are enabled for CK_Generic which is what cc1 defaults to when nothing is specified. Differential Revision: https://reviews.llvm.org/D59566 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356709 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Basic/Targets/X86.cpp | 12 ++++- lib/Basic/Targets/X86.h | 11 +++-- test/CodeGen/attr-cpuspecific.c | 6 +-- test/CodeGen/attr-target-x86-mmx.c | 2 +- test/CodeGen/attr-target-x86.c | 16 +++---- test/CodeGen/attr-target-x87-softfp.c | 4 +- test/Preprocessor/init.c | 56 +++++++++++++++++++++-- test/Preprocessor/predefined-win-macros.c | 12 ++++- 8 files changed, 94 insertions(+), 25 deletions(-) diff --git a/lib/Basic/Targets/X86.cpp b/lib/Basic/Targets/X86.cpp index fa5ffd5e801..87535824adf 100644 --- a/lib/Basic/Targets/X86.cpp +++ b/lib/Basic/Targets/X86.cpp @@ -115,6 +115,11 @@ bool X86TargetInfo::initFeatureMap( if (Kind != CK_Lakemont) setFeatureEnabledImpl(Features, "x87", true); + // Enable cmpxchg8 for i586 and greater CPUs. Include generic for backwards + // compatibility. + if (Kind >= CK_i586 || Kind == CK_Generic) + setFeatureEnabledImpl(Features, "cx8", true); + switch (Kind) { case CK_Generic: case CK_i386: @@ -777,6 +782,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features, HasMOVBE = true; } else if (Feature == "+sgx") { HasSGX = true; + } else if (Feature == "+cx8") { + HasCX8 = true; } else if (Feature == "+cx16") { HasCX16 = true; } else if (Feature == "+fxsr") { @@ -1275,12 +1282,12 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, break; } - if (CPU >= CK_i486) { + if (CPU >= CK_i486 || CPU == CK_Generic) { Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4"); } - if (CPU >= CK_i586) + if (HasCX8) Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8"); if (HasCX16 && getTriple().getArch() == llvm::Triple::x86_64) Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16"); @@ -1394,6 +1401,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("clflushopt", HasCLFLUSHOPT) .Case("clwb", HasCLWB) .Case("clzero", HasCLZERO) + .Case("cx8", HasCX8) .Case("cx16", HasCX16) .Case("f16c", HasF16C) .Case("fma", HasFMA) diff --git a/lib/Basic/Targets/X86.h b/lib/Basic/Targets/X86.h index 313bf28aa1f..9f285d8d50d 100644 --- a/lib/Basic/Targets/X86.h +++ b/lib/Basic/Targets/X86.h @@ -81,6 +81,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasMPX = false; bool HasSHSTK = false; bool HasSGX = false; + bool HasCX8 = false; bool HasCX16 = false; bool HasFXSR = false; bool HasXSAVE = false; @@ -344,9 +345,8 @@ class LLVM_LIBRARY_VISIBILITY X86_32TargetInfo : public X86TargetInfo { (1 << TargetInfo::LongDouble)); // x86-32 has atomics up to 8 bytes - // FIXME: Check that we actually have cmpxchg8b before setting - // MaxAtomicInlineWidth. (cmpxchg8b is an i586 instruction.) - MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; + MaxAtomicPromoteWidth = 64; + MaxAtomicInlineWidth = 32; } BuiltinVaListKind getBuiltinVaListKind() const override { @@ -382,6 +382,11 @@ class LLVM_LIBRARY_VISIBILITY X86_32TargetInfo : public X86TargetInfo { return X86TargetInfo::validateOperandSize(Constraint, Size); } + void setMaxAtomicWidth() override { + if (hasFeature("cx8")) + MaxAtomicInlineWidth = 64; + } + ArrayRef getTargetBuiltins() const override; }; diff --git a/test/CodeGen/attr-cpuspecific.c b/test/CodeGen/attr-cpuspecific.c index d6c99648cb7..2c5e411ce3a 100644 --- a/test/CodeGen/attr-cpuspecific.c +++ b/test/CodeGen/attr-cpuspecific.c @@ -254,6 +254,6 @@ int DispatchFirst(void) {return 1;} // WINDOWS: define dso_local i32 @DispatchFirst.B // WINDOWS: ret i32 1 -// CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" -// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" -// CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+mmx,+movbe,+sse,+sse2,+sse3,+ssse3,+x87" +// CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+cx8,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+cx8,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+cx8,+mmx,+movbe,+sse,+sse2,+sse3,+ssse3,+x87" diff --git a/test/CodeGen/attr-target-x86-mmx.c b/test/CodeGen/attr-target-x86-mmx.c index 412e8e93af9..01663766d98 100644 --- a/test/CodeGen/attr-target-x86-mmx.c +++ b/test/CodeGen/attr-target-x86-mmx.c @@ -19,4 +19,4 @@ void __attribute__((target("sse"))) shift(__m64 a, __m64 b, int c) { _mm_srai_pi32(a, c); } -// CHECK: "target-features"="+mmx,+sse,+x87" +// CHECK: "target-features"="+cx8,+mmx,+sse,+x87" diff --git a/test/CodeGen/attr-target-x86.c b/test/CodeGen/attr-target-x86.c index 153cdb3e94c..56ccaf98ea4 100644 --- a/test/CodeGen/attr-target-x86.c +++ b/test/CodeGen/attr-target-x86.c @@ -48,11 +48,11 @@ int __attribute__((target("arch=lakemont,mmx"))) use_before_def(void) { // CHECK: qq{{.*}} #6 // CHECK: lake{{.*}} #7 // CHECK: use_before_def{{.*}} #7 -// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+x87" -// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" -// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt" -// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" -// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt" -// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes" -// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-3dnow,-3dnowa,-mmx" -// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+mmx" +// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" +// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" +// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt" +// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" +// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt" +// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes" +// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-3dnow,-3dnowa,-mmx" +// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx" diff --git a/test/CodeGen/attr-target-x87-softfp.c b/test/CodeGen/attr-target-x87-softfp.c index 16b7cfe8277..0d26dab74ec 100644 --- a/test/CodeGen/attr-target-x87-softfp.c +++ b/test/CodeGen/attr-target-x87-softfp.c @@ -7,10 +7,10 @@ int __attribute__((target("no-x87"))) bar(int a) { return 4; } // CHECK: foo{{.*}} #0 // CHECK: bar{{.*}} #1 -// CHECK: #0 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" +// CHECK: #0 = {{.*}}"target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" // HARD: "use-soft-float"="false" // SOFT: "use-soft-float"="true" -// CHECK: #1 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,-x87" +// CHECK: #1 = {{.*}}"target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,-x87" // HARD: "use-soft-float"="false" // SOFT: "use-soft-float"="true" diff --git a/test/Preprocessor/init.c b/test/Preprocessor/init.c index dffcdf7f4d8..641d1006499 100644 --- a/test/Preprocessor/init.c +++ b/test/Preprocessor/init.c @@ -2845,8 +2845,9 @@ // I386:#define __i386__ 1 // I386:#define i386 1 // -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=i386-pc-linux-gnu -target-cpu pentium4 < /dev/null | FileCheck -match-full-lines -check-prefix I386-LINUX %s -// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=i386-pc-linux-gnu -target-cpu pentium4 < /dev/null | FileCheck -match-full-lines -check-prefix I386-LINUX -check-prefix I386-LINUX-CXX %s +// RUN: %clang_cc1 -E -dM -ffreestanding -triple=i386-pc-linux-gnu -target-cpu pentium4 < /dev/null | FileCheck -match-full-lines -check-prefix I386-LINUX -check-prefix I386-LINUX-ALIGN32 %s +// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=i386-pc-linux-gnu -target-cpu pentium4 < /dev/null | FileCheck -match-full-lines -check-prefix I386-LINUX -check-prefix I386-LINUX-CXX -check-prefix I386-LINUX-ALIGN32 %s +// RUN: %clang_cc1 -E -dM -ffreestanding -triple=i386-pc-linux-gnu -target-cpu pentium4 -malign-double < /dev/null | FileCheck -match-full-lines -check-prefix I386-LINUX -check-prefix I386-LINUX-ALIGN64 %s // // I386-LINUX-NOT:#define _LP64 // I386-LINUX:#define __BIGGEST_ALIGNMENT__ 16 @@ -2883,6 +2884,18 @@ // I386-LINUX:#define __FLT_MIN_EXP__ (-125) // I386-LINUX:#define __FLT_MIN__ 1.17549435e-38F // I386-LINUX:#define __FLT_RADIX__ 2 +// I386-LINUX:#define __GCC_ATOMIC_BOOL_LOCK_FREE 2 +// I386-LINUX:#define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2 +// I386-LINUX:#define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2 +// I386-LINUX:#define __GCC_ATOMIC_CHAR_LOCK_FREE 2 +// I386-LINUX:#define __GCC_ATOMIC_INT_LOCK_FREE 2 +// I386-LINUX-ALIGN32:#define __GCC_ATOMIC_LLONG_LOCK_FREE 1 +// I386-LINUX-ALIGN64:#define __GCC_ATOMIC_LLONG_LOCK_FREE 2 +// I386-LINUX:#define __GCC_ATOMIC_LONG_LOCK_FREE 2 +// I386-LINUX:#define __GCC_ATOMIC_POINTER_LOCK_FREE 2 +// I386-LINUX:#define __GCC_ATOMIC_SHORT_LOCK_FREE 2 +// I386-LINUX:#define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1 +// I386-LINUX:#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2 // I386-LINUX:#define __INT16_C_SUFFIX__ // I386-LINUX:#define __INT16_FMTd__ "hd" // I386-LINUX:#define __INT16_FMTi__ "hi" @@ -3034,8 +3047,10 @@ // I386-LINUX:#define __i386__ 1 // I386-LINUX:#define i386 1 // -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=i386-netbsd < /dev/null | FileCheck -match-full-lines -check-prefix I386-NETBSD %s -// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=i386-netbsd < /dev/null | FileCheck -match-full-lines -check-prefix I386-NETBSD -check-prefix I386-NETBSD-CXX %s +// RUN: %clang_cc1 -E -dM -ffreestanding -triple=i386-netbsd -target-cpu i486 < /dev/null | FileCheck -match-full-lines -check-prefix I386-NETBSD %s +// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=i386-netbsd -target-cpu i486 < /dev/null | FileCheck -match-full-lines -check-prefix I386-NETBSD -check-prefix I386-NETBSD-CXX %s +// RUN: %clang_cc1 -E -dM -ffreestanding -triple=i386-netbsd -target-cpu i486 -malign-double < /dev/null | FileCheck -match-full-lines -check-prefix I386-NETBSD %s +// // // I386-NETBSD-NOT:#define _LP64 // I386-NETBSD:#define __BIGGEST_ALIGNMENT__ 16 @@ -3072,6 +3087,17 @@ // I386-NETBSD:#define __FLT_MIN_EXP__ (-125) // I386-NETBSD:#define __FLT_MIN__ 1.17549435e-38F // I386-NETBSD:#define __FLT_RADIX__ 2 +// I386-NETBSD:#define __GCC_ATOMIC_BOOL_LOCK_FREE 2 +// I386-NETBSD:#define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2 +// I386-NETBSD:#define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2 +// I386-NETBSD:#define __GCC_ATOMIC_CHAR_LOCK_FREE 2 +// I386-NETBSD:#define __GCC_ATOMIC_INT_LOCK_FREE 2 +// I386-NETBSD:#define __GCC_ATOMIC_LLONG_LOCK_FREE 1 +// I386-NETBSD:#define __GCC_ATOMIC_LONG_LOCK_FREE 2 +// I386-NETBSD:#define __GCC_ATOMIC_POINTER_LOCK_FREE 2 +// I386-NETBSD:#define __GCC_ATOMIC_SHORT_LOCK_FREE 2 +// I386-NETBSD:#define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1 +// I386-NETBSD:#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2 // I386-NETBSD:#define __INT16_C_SUFFIX__ // I386-NETBSD:#define __INT16_FMTd__ "hd" // I386-NETBSD:#define __INT16_FMTi__ "hi" @@ -8947,6 +8973,17 @@ // X86_64-LINUX:#define __FLT_MIN_EXP__ (-125) // X86_64-LINUX:#define __FLT_MIN__ 1.17549435e-38F // X86_64-LINUX:#define __FLT_RADIX__ 2 +// X86_64-LINUX:#define __GCC_ATOMIC_BOOL_LOCK_FREE 2 +// X86_64-LINUX:#define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2 +// X86_64-LINUX:#define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2 +// X86_64-LINUX:#define __GCC_ATOMIC_CHAR_LOCK_FREE 2 +// X86_64-LINUX:#define __GCC_ATOMIC_INT_LOCK_FREE 2 +// X86_64-LINUX:#define __GCC_ATOMIC_LLONG_LOCK_FREE 2 +// X86_64-LINUX:#define __GCC_ATOMIC_LONG_LOCK_FREE 2 +// X86_64-LINUX:#define __GCC_ATOMIC_POINTER_LOCK_FREE 2 +// X86_64-LINUX:#define __GCC_ATOMIC_SHORT_LOCK_FREE 2 +// X86_64-LINUX:#define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1 +// X86_64-LINUX:#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2 // X86_64-LINUX:#define __INT16_C_SUFFIX__ // X86_64-LINUX:#define __INT16_FMTd__ "hd" // X86_64-LINUX:#define __INT16_FMTi__ "hi" @@ -9149,6 +9186,17 @@ // X86_64-NETBSD:#define __FLT_MIN_EXP__ (-125) // X86_64-NETBSD:#define __FLT_MIN__ 1.17549435e-38F // X86_64-NETBSD:#define __FLT_RADIX__ 2 +// X86_64-NETBSD:#define __GCC_ATOMIC_BOOL_LOCK_FREE 2 +// X86_64-NETBSD:#define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2 +// X86_64-NETBSD:#define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2 +// X86_64-NETBSD:#define __GCC_ATOMIC_CHAR_LOCK_FREE 2 +// X86_64-NETBSD:#define __GCC_ATOMIC_INT_LOCK_FREE 2 +// X86_64-NETBSD:#define __GCC_ATOMIC_LLONG_LOCK_FREE 2 +// X86_64-NETBSD:#define __GCC_ATOMIC_LONG_LOCK_FREE 2 +// X86_64-NETBSD:#define __GCC_ATOMIC_POINTER_LOCK_FREE 2 +// X86_64-NETBSD:#define __GCC_ATOMIC_SHORT_LOCK_FREE 2 +// X86_64-NETBSD:#define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1 +// X86_64-NETBSD:#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2 // X86_64-NETBSD:#define __INT16_C_SUFFIX__ // X86_64-NETBSD:#define __INT16_FMTd__ "hd" // X86_64-NETBSD:#define __INT16_FMTi__ "hi" diff --git a/test/Preprocessor/predefined-win-macros.c b/test/Preprocessor/predefined-win-macros.c index 5e5f1e82f9b..6034c085024 100644 --- a/test/Preprocessor/predefined-win-macros.c +++ b/test/Preprocessor/predefined-win-macros.c @@ -3,7 +3,7 @@ // RUN: %clang_cc1 %s -x c++ -E -dM -triple x86_64-pc-win32 -fms-extensions -fms-compatibility \ // RUN: -fms-compatibility-version=19.00 -std=c++14 -o - | FileCheck -match-full-lines %s --check-prefix=CHECK-MS64 // RUN: %clang_cc1 %s -x c++ -E -dM -triple x86_64-pc-win32 -fms-extensions -fms-compatibility \ -// RUN: -fms-compatibility-version=19.00 -std=c++14 -o - | grep GCC | count 1 +// RUN: -fms-compatibility-version=19.00 -std=c++14 -o - | grep GCC | count 5 // CHECK-MS64: #define _INTEGRAL_MAX_BITS 64 // CHECK-MS64: #define _MSC_EXTENSIONS 1 // CHECK-MS64: #define _MSC_VER 1900 @@ -15,13 +15,17 @@ // CHECK-MS64-NOT: GNU // CHECK-MS64-NOT: GXX // CHECK-MS64: #define __GCC_ASM_FLAG_OUTPUTS__ 1 +// CHECK-MS64: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 +// CHECK-MS64: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 +// CHECK-MS64: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 +// CHECK-MS64: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1 // CHECK-MS64-NOT: GNU // CHECK-MS64-NOT: GXX // RUN: %clang_cc1 %s -x c++ -E -dM -triple i686-pc-win32 -fms-extensions -fms-compatibility \ // RUN: -fms-compatibility-version=19.00 -std=c++17 -o - | FileCheck -match-full-lines %s --check-prefix=CHECK-MS // RUN: %clang_cc1 %s -x c++ -E -dM -triple i686-pc-win32 -fms-extensions -fms-compatibility \ -// RUN: -fms-compatibility-version=19.00 -std=c++17 -o - | grep GCC | count 1 +// RUN: -fms-compatibility-version=19.00 -std=c++17 -o - | grep GCC | count 5 // CHECK-MS: #define _INTEGRAL_MAX_BITS 64 // CHECK-MS: #define _MSC_EXTENSIONS 1 // CHECK-MS: #define _MSC_VER 1900 @@ -33,6 +37,10 @@ // CHECK-MS-NOT: GNU // CHECK-MS-NOT: GXX // CHECK-MS: #define __GCC_ASM_FLAG_OUTPUTS__ 1 +// CHECK-MS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 +// CHECK-MS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 +// CHECK-MS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 +// CHECK-MS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1 // CHECK-MS-NOT: GNU // CHECK-MS-NOT: GXX From a91bba1be08af6be68b577cd0e2a237c728f1a03 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 21 Mar 2019 20:36:16 +0000 Subject: [PATCH 10/38] [OPENMP] Simplify codegen for allocate directive on local variables. Simplified codegen for the allocate directive for local variables, initial implementation of the codegen for NVPTX target. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356710 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGOpenMPRuntime.cpp | 86 ++++++++++++-------------- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 24 +++++++ test/OpenMP/allocate_codegen.cpp | 14 +---- test/OpenMP/nvptx_allocate_codegen.cpp | 5 ++ 4 files changed, 72 insertions(+), 57 deletions(-) diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index d3736b7244d..a8af23b63ac 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -9745,54 +9745,50 @@ class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) { + if (!VD) + return Address::invalid(); const VarDecl *CVD = VD->getCanonicalDecl(); if (!CVD->hasAttr()) return Address::invalid(); - for (const Attr *A: CVD->getAttrs()) { - if (const auto *AA = dyn_cast(A)) { - auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); - if (!Elem.second.ServiceInsertPt) - setLocThreadIdInsertPt(CGF); - CGBuilderTy::InsertPointGuard IPG(CGF.Builder); - CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); - llvm::Value *Size; - CharUnits Align = CGM.getContext().getDeclAlign(CVD); - if (CVD->getType()->isVariablyModifiedType()) { - Size = CGF.getTypeSize(CVD->getType()); - Align = CGM.getContext().getTypeAlignInChars(CVD->getType()); - } else { - CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); - Align = CGM.getContext().getDeclAlign(CVD); - Size = CGM.getSize(Sz.alignTo(Align)); - } - llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); - llvm::Value *Allocator; - if (const Expr *AllocExpr = AA->getAllocator()) { - Allocator = CGF.EmitScalarExpr(AllocExpr); - } else { - // Default allocator in libomp is nullptr. - Allocator = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); - } - llvm::Value *Args[] = {ThreadID, Size, Allocator}; - - llvm::Value *Addr = - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, - CVD->getName() + ".void.addr"); - llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = { - ThreadID, Addr, Allocator}; - llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); - - CGF.EHStack.pushCleanup( - NormalAndEHCleanup, FiniRTLFn, llvm::makeArrayRef(FiniArgs)); - Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - Addr, - CGF.ConvertTypeForMem( - CGM.getContext().getPointerType(CVD->getType())), - CVD->getName() + ".addr"); - return Address(Addr, Align); - } - } - return Address::invalid(); + const auto *AA = CVD->getAttr(); + // Use the default allocation. + if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc) + return Address::invalid(); + auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); + if (!Elem.second.ServiceInsertPt) + setLocThreadIdInsertPt(CGF); + CGBuilderTy::InsertPointGuard IPG(CGF.Builder); + CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); + llvm::Value *Size; + CharUnits Align = CGM.getContext().getDeclAlign(CVD); + if (CVD->getType()->isVariablyModifiedType()) { + Size = CGF.getTypeSize(CVD->getType()); + Align = CGM.getContext().getTypeAlignInChars(CVD->getType()); + } else { + CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); + Align = CGM.getContext().getDeclAlign(CVD); + Size = CGM.getSize(Sz.alignTo(Align)); + } + llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); + assert(AA->getAllocator() && + "Expected allocator expression for non-default allocator."); + llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); + llvm::Value *Args[] = {ThreadID, Size, Allocator}; + + llvm::Value *Addr = + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, + CVD->getName() + ".void.addr"); + llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, + Allocator}; + llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); + + CGF.EHStack.pushCleanup(NormalAndEHCleanup, FiniRTLFn, + llvm::makeArrayRef(FiniArgs)); + Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Addr, + CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), + CVD->getName() + ".addr"); + return Address(Addr, Align); } llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 7de16032269..fd294dab640 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -4725,6 +4725,28 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF, Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) { + bool UseDefaultAllocator = true; + if (VD && VD->hasAttr()) { + const auto *A = VD->getAttr(); + switch (A->getAllocatorType()) { + // Use the default allocator here as by default local vars are + // threadlocal. + case OMPAllocateDeclAttr::OMPDefaultMemAlloc: + case OMPAllocateDeclAttr::OMPThreadMemAlloc: + // Just pass-through to check if the globalization is required. + break; + case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: + case OMPAllocateDeclAttr::OMPCGroupMemAlloc: + case OMPAllocateDeclAttr::OMPHighBWMemAlloc: + case OMPAllocateDeclAttr::OMPLowLatMemAlloc: + case OMPAllocateDeclAttr::OMPConstMemAlloc: + case OMPAllocateDeclAttr::OMPPTeamMemAlloc: + case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: + UseDefaultAllocator = false; + break; + } + } + if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic) return Address::invalid(); @@ -4746,7 +4768,9 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, return VDI->second.PrivateAddr; } } + // TODO: replace it with return + // UseDefaultAllocator ? Address::invalid : // CGOpenMPRuntime::getAddressOfLocalVariable(CGF, VD); when NVPTX libomp // supports __kmpc_alloc|__kmpc_free. return Address::invalid(); diff --git a/test/OpenMP/allocate_codegen.cpp b/test/OpenMP/allocate_codegen.cpp index 6239ded455d..daad9353368 100644 --- a/test/OpenMP/allocate_codegen.cpp +++ b/test/OpenMP/allocate_codegen.cpp @@ -67,25 +67,15 @@ int main () { static int a; #pragma omp allocate(a) allocator(omp_thread_mem_alloc) a=2; - // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @{{.+}}) - // CHECK-NEXT: [[B_VOID_ADDR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 8, i8** null) - // CHECK-NEXT: [[B_ADDR:%.+]] = bitcast i8* [[B_VOID_ADDR]] to double* // CHECK-NOT: {{__kmpc_alloc|__kmpc_free}} - // CHECK: store double 3.000000e+00, double* [[B_ADDR]], - // CHECK: [[RES:%.+]] = call i32 [[FOO:@.+]]() - // CHECK: store i32 [[RES]], i32* [[RET:%.+]], - // CHECK-NEXT: call void @__kmpc_free(i32 [[GTID]], i8* [[B_VOID_ADDR]], i8** null) + // CHECK: alloca double, // CHECK-NOT: {{__kmpc_alloc|__kmpc_free}} double b = 3; #pragma omp allocate(b) - // CHECK: [[RETVAL:%.+]] = load i32, i32* [[RET]], - // CHECK: ret i32 [[RETVAL]] return (foo()); } -// CHECK-NOT: call {{.+}} {{__kmpc_alloc|__kmpc_free}} - -// CHECK: define {{.*}}i32 [[FOO]]() +// CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}() // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @{{.+}}) // CHECK-NEXT: [[OMP_CGROUP_MEM_ALLOC:%.+]] = load i8**, i8*** @omp_cgroup_mem_alloc, // CHECK-NEXT: [[V_VOID_ADDR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 4, i8** [[OMP_CGROUP_MEM_ALLOC]]) diff --git a/test/OpenMP/nvptx_allocate_codegen.cpp b/test/OpenMP/nvptx_allocate_codegen.cpp index e9b9509334e..df6a727c7ac 100644 --- a/test/OpenMP/nvptx_allocate_codegen.cpp +++ b/test/OpenMP/nvptx_allocate_codegen.cpp @@ -57,7 +57,9 @@ namespace ns{ } #pragma omp allocate(ns::a) allocator(omp_pteam_mem_alloc) +// CHECK-LABEL: @main int main () { + // CHECK: alloca double, static int a; #pragma omp allocate(a) allocator(omp_thread_mem_alloc) a=2; @@ -66,6 +68,9 @@ int main () { return (foo()); } +// CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}() +// CHECK: alloca i32, + extern template int ST::m; #pragma omp end declare target #endif From 8d5dd692a0d082c292de64a3a093d9901c8238e2 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Thu, 21 Mar 2019 20:42:13 +0000 Subject: [PATCH 11/38] Improve the diagnostic for #include_next occurring in a file not found in the include path. Instead of making the incorrect claim that the included file has an absolute path, describe the actual problem: the including file was found either by absolute path, or relative to such a file, or relative to the primary source file. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356712 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/DiagnosticLexKinds.td | 6 ++-- lib/Lex/PPDirectives.cpp | 4 +++ test/Preprocessor/Inputs/include-next-1/bar.h | 1 + test/Preprocessor/Inputs/include-next-1/foo.h | 1 + test/Preprocessor/Inputs/include-next-2/bar.h | 1 + test/Preprocessor/include-next.c | 29 +++++++++++++++++++ 6 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 test/Preprocessor/Inputs/include-next-1/bar.h create mode 100644 test/Preprocessor/Inputs/include-next-1/foo.h create mode 100644 test/Preprocessor/Inputs/include-next-2/bar.h create mode 100644 test/Preprocessor/include-next.c diff --git a/include/clang/Basic/DiagnosticLexKinds.td b/include/clang/Basic/DiagnosticLexKinds.td index 45e50c9a8cd..48d9551d686 100644 --- a/include/clang/Basic/DiagnosticLexKinds.td +++ b/include/clang/Basic/DiagnosticLexKinds.td @@ -268,12 +268,14 @@ def err_pp_hash_error : Error<"%0">; } def pp_include_next_in_primary : Warning< - "#include_next in primary source file">, + "#include_next in primary source file; " + "will search from start of include path">, InGroup>; def pp_include_macros_out_of_predefines : Error< "the #__include_macros directive is only for internal use by -imacros">; def pp_include_next_absolute_path : Warning< - "#include_next with absolute path">, + "#include_next in file found relative to primary source file or found by " + "absolute path; will search from start of include path">, InGroup>; def ext_c99_whitespace_required_after_macro_name : ExtWarn< "ISO C99 requires whitespace after the macro name">, InGroup; diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp index 179b0cbf696..1850af9cfa4 100644 --- a/lib/Lex/PPDirectives.cpp +++ b/lib/Lex/PPDirectives.cpp @@ -2085,6 +2085,10 @@ void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc, LookupFromFile = CurPPLexer->getFileEntry(); Lookup = nullptr; } else if (!Lookup) { + // The current file was not found by walking the include path. Either it + // is the primary file (handled above), or it was found by absolute path, + // or it was found relative to such a file. + // FIXME: Track enough information so we know which case we're in. Diag(IncludeNextTok, diag::pp_include_next_absolute_path); } else { // Start looking up in the next directory. diff --git a/test/Preprocessor/Inputs/include-next-1/bar.h b/test/Preprocessor/Inputs/include-next-1/bar.h new file mode 100644 index 00000000000..1cf97aeb50c --- /dev/null +++ b/test/Preprocessor/Inputs/include-next-1/bar.h @@ -0,0 +1 @@ +#define BAR 1 diff --git a/test/Preprocessor/Inputs/include-next-1/foo.h b/test/Preprocessor/Inputs/include-next-1/foo.h new file mode 100644 index 00000000000..7d2753c4e4d --- /dev/null +++ b/test/Preprocessor/Inputs/include-next-1/foo.h @@ -0,0 +1 @@ +#include_next "bar.h" diff --git a/test/Preprocessor/Inputs/include-next-2/bar.h b/test/Preprocessor/Inputs/include-next-2/bar.h new file mode 100644 index 00000000000..3ac8411a18d --- /dev/null +++ b/test/Preprocessor/Inputs/include-next-2/bar.h @@ -0,0 +1 @@ +#define BAR 2 diff --git a/test/Preprocessor/include-next.c b/test/Preprocessor/include-next.c new file mode 100644 index 00000000000..4b9a0e870ce --- /dev/null +++ b/test/Preprocessor/include-next.c @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -verify %s -E -o /dev/null -I%S/Inputs/include-next-1 -I%S/Inputs/include-next-2 -DTEST=1 +// RUN: %clang_cc1 -verify %s -E -o /dev/null -I%S/Inputs/include-next-1 -I%S/Inputs/include-next-2 -DTEST=2 +// RUN: %clang_cc1 -verify %s -E -o /dev/null -I%S/Inputs/include-next-1 -I%S/Inputs/include-next-2 -DTEST=3 + +#if TEST == 1 +// expected-warning@+1 {{#include_next in primary source file}} +#include_next "bar.h" +#if BAR != 1 +#error wrong bar +#endif + +#elif TEST == 2 +// expected-no-diagnostics +#include "foo.h" +#if BAR != 2 +#error wrong bar +#endif + +#elif TEST == 3 +// expected-warning@foo.h:1 {{#include_next in file found relative to primary source file or found by absolute path}} +#include "Inputs/include-next-1/foo.h" +#if BAR != 1 +#error wrong bar +#endif +#undef BAR + +#else +#error unknown test +#endif From d489988e53d97d625e8dfe02ca57f39bb36d97d9 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 21 Mar 2019 20:52:04 +0000 Subject: [PATCH 12/38] [OPENMP]Fix a warning about unused variable, NFC. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356715 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index fd294dab640..46b1b0faaee 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -4773,6 +4773,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, // UseDefaultAllocator ? Address::invalid : // CGOpenMPRuntime::getAddressOfLocalVariable(CGF, VD); when NVPTX libomp // supports __kmpc_alloc|__kmpc_free. + (void)UseDefaultAllocator; // Prevent a warning. return Address::invalid(); } From 28647f65ca9348a7b245c0ca4684c3ee2fa4d56b Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Thu, 21 Mar 2019 22:31:37 +0000 Subject: [PATCH 13/38] [AArch64] Split the neon.addp intrinsic into integer and fp variants. This is the result of discussions on the list about how to deal with intrinsics which require codegen to disambiguate them via only the integer/fp overloads. It causes problems for GlobalISel as some of that information is lost during translation, while with other operations like IR instructions the information is encoded into the instruction opcode. This patch changes clang to emit the new faddp intrinsic if the vector operands to the builtin have FP element types. LLVM IR AutoUpgrade has been taught to upgrade existing calls to aarch64.neon.addp with fp vector arguments, and we remove the workarounds introduced for GlobalISel in r355865. This is a more permanent solution to PR40968. Differential Revision: https://reviews.llvm.org/D59655 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356722 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGBuiltin.cpp | 7 +++++++ test/CodeGen/aarch64-neon-intrinsics.c | 6 +++--- test/CodeGen/aarch64-v8.2a-neon-intrinsics.c | 4 ++-- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 4d9c76ae7fc..5042d09227b 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -5095,6 +5095,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( switch (BuiltinID) { default: break; + case NEON::BI__builtin_neon_vpadd_v: + case NEON::BI__builtin_neon_vpaddq_v: + // We don't allow fp/int overloading of intrinsics. + if (VTy->getElementType()->isFloatingPointTy() && + Int == Intrinsic::aarch64_neon_addp) + Int = Intrinsic::aarch64_neon_faddp; + break; case NEON::BI__builtin_neon_vabs_v: case NEON::BI__builtin_neon_vabsq_v: if (VTy->getElementType()->isFloatingPointTy()) diff --git a/test/CodeGen/aarch64-neon-intrinsics.c b/test/CodeGen/aarch64-neon-intrinsics.c index 40e39912be9..9a5b3a9f18b 100644 --- a/test/CodeGen/aarch64-neon-intrinsics.c +++ b/test/CodeGen/aarch64-neon-intrinsics.c @@ -4411,7 +4411,7 @@ uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) { // CHECK-LABEL: @test_vpadd_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %a, <2 x float> %b) +// CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %a, <2 x float> %b) // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8> // CHECK: ret <2 x float> [[VPADD_V2_I]] float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) { @@ -4475,7 +4475,7 @@ uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vpaddq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %a, <4 x float> %b) +// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %a, <4 x float> %b) // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x float> [[VPADDQ_V2_I]] float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) { @@ -4485,7 +4485,7 @@ float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) { // CHECK-LABEL: @test_vpaddq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %a, <2 x double> %b) +// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %a, <2 x double> %b) // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8> // CHECK: ret <2 x double> [[VPADDQ_V2_I]] float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) { diff --git a/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c b/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c index e1a2e3fb92d..a4bf8753363 100644 --- a/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c +++ b/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c @@ -736,14 +736,14 @@ float16x8_t test_vmulxq_f16(float16x8_t a, float16x8_t b) { } // CHECK-LABEL: test_vpadd_f16 -// CHECK: [[ADD:%.*]] = call <4 x half> @llvm.aarch64.neon.addp.v4f16(<4 x half> %a, <4 x half> %b) +// CHECK: [[ADD:%.*]] = call <4 x half> @llvm.aarch64.neon.faddp.v4f16(<4 x half> %a, <4 x half> %b) // CHECK: ret <4 x half> [[ADD]] float16x4_t test_vpadd_f16(float16x4_t a, float16x4_t b) { return vpadd_f16(a, b); } // CHECK-LABEL: test_vpaddq_f16 -// CHECK: [[ADD:%.*]] = call <8 x half> @llvm.aarch64.neon.addp.v8f16(<8 x half> %a, <8 x half> %b) +// CHECK: [[ADD:%.*]] = call <8 x half> @llvm.aarch64.neon.faddp.v8f16(<8 x half> %a, <8 x half> %b) // CHECK: ret <8 x half> [[ADD]] float16x8_t test_vpaddq_f16(float16x8_t a, float16x8_t b) { return vpaddq_f16(a, b); From 249cf79bade5af443c2a997b5cf645aa1a9e2843 Mon Sep 17 00:00:00 2001 From: Sterling Augustine Date: Thu, 21 Mar 2019 23:30:50 +0000 Subject: [PATCH 14/38] This test assumes that -rtlib defaults to libgcc. But that isn't true in the face of -DCLANG_DEFAULT_RTLIB=compiler-rt. Subscribers: dberris, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59674 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356724 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Driver/compiler-rt-unwind.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/test/Driver/compiler-rt-unwind.c b/test/Driver/compiler-rt-unwind.c index 00024dfa7ed..0ec067cbfc6 100644 --- a/test/Driver/compiler-rt-unwind.c +++ b/test/Driver/compiler-rt-unwind.c @@ -2,13 +2,6 @@ // --unwindlib=XXX properly. // // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: --target=x86_64-unknown-linux \ -// RUN: --gcc-toolchain="" \ -// RUN: | FileCheck --check-prefix=RTLIB-EMPTY %s -// RTLIB-EMPTY: "{{.*}}lgcc" -// RTLIB-EMPTY: "{{.*}}-lgcc_s" -// -// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ // RUN: --target=x86_64-unknown-linux -rtlib=libgcc \ // RUN: --gcc-toolchain="" \ // RUN: | FileCheck --check-prefix=RTLIB-GCC %s From 8b92bc6733da59f4c1bd433ea54e2b0a2f1cfb0e Mon Sep 17 00:00:00 2001 From: Yaxun Sam Liu Date: Thu, 21 Mar 2019 11:14:21 -0400 Subject: [PATCH 15/38] [HIP-Clang][WA]SWDEV-183909 Warn shared var initialization Emit a warning for shared variable initialization instead of error. https://reviews.llvm.org/D59647 Change-Id: Ice72c306fee92237c6029ad259f4a42df17ffb06 --- include/clang/Basic/DiagnosticSemaKinds.td | 5 +++-- lib/Sema/SemaCUDA.cpp | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td index 8b5ce427713..e2c1fe2abd5 100644 --- a/include/clang/Basic/DiagnosticSemaKinds.td +++ b/include/clang/Basic/DiagnosticSemaKinds.td @@ -7198,8 +7198,9 @@ def err_cuda_device_exceptions : Error< def err_dynamic_var_init : Error< "dynamic initialization is not supported for " "__device__, __constant__, and __shared__ variables.">; -def err_shared_var_init : Error< - "initialization is not supported for __shared__ variables.">; +def warn_shared_var_init : Warning< + "initialization is not supported for __shared__ variables.">, + InGroup>, DefaultError; def err_device_static_local_var : Error< "within a %select{__device__|__global__|__host__|__host__ __device__}0 " "function, only __shared__ variables or const variables without device " diff --git a/lib/Sema/SemaCUDA.cpp b/lib/Sema/SemaCUDA.cpp index d062e8b201a..4c7626f27d8 100644 --- a/lib/Sema/SemaCUDA.cpp +++ b/lib/Sema/SemaCUDA.cpp @@ -499,7 +499,7 @@ void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { if (!AllowedInit) { Diag(VD->getLocation(), VD->hasAttr() - ? diag::err_shared_var_init + ? diag::warn_shared_var_init : diag::err_dynamic_var_init) << Init->getSourceRange(); VD->setInvalidDecl(); From 8e93e6dc62856e1611157707ac99f59570ea66ed Mon Sep 17 00:00:00 2001 From: Luke Cheeseman Date: Fri, 22 Mar 2019 10:58:15 +0000 Subject: [PATCH 16/38] [ARM] Add Cortex-M35P Support - Add clang frontend testing for Cortex-M35P Differential Revision: https://reviews.llvm.org/D57765 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356742 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Driver/arm-cortex-cpus.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/Driver/arm-cortex-cpus.c b/test/Driver/arm-cortex-cpus.c index cfb569312e0..880e2346d77 100644 --- a/test/Driver/arm-cortex-cpus.c +++ b/test/Driver/arm-cortex-cpus.c @@ -822,8 +822,10 @@ // RUN: %clang -target arm -mcpu=cortex-m23 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8MBASE %s // CHECK-CPUV8MBASE: "-cc1"{{.*}} "-triple" "thumbv8m.base- -// RUN: %clang -target arm -mcpu=cortex-m33 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8MMAIN %s -// CHECK-CPUV8MMAIN: "-cc1"{{.*}} "-triple" "thumbv8m.main- +// RUN: %clang -target arm -mcpu=cortex-m33 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CORTEX-M33 %s +// RUN: %clang -target arm -mcpu=cortex-m35p -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CORTEX-M35P %s +// CHECK-CORTEX-M33: "-cc1"{{.*}} "-triple" "thumbv8m.main-{{.*}} "-target-cpu" "cortex-m33" +// CHECK-CORTEX-M35P: "-cc1"{{.*}} "-triple" "thumbv8m.main-{{.*}} "-target-cpu" "cortex-m35p" // ================== Check whether -mcpu accepts mixed-case values. // RUN: %clang -target arm-linux-gnueabi -mcpu=Cortex-a5 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CASE-INSENSITIVE-CPUV7A %s From e3172cb65ff2f5bfb5d664a2ba4e97effc100c44 Mon Sep 17 00:00:00 2001 From: Ilya Biryukov Date: Fri, 22 Mar 2019 11:01:13 +0000 Subject: [PATCH 17/38] [Tooling] Avoid working-dir races in AllTUsToolExecutor Reviewers: ioeric Reviewed By: ioeric Subscribers: jdoerfert, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D59683 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356743 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Tooling/AllTUsExecution.cpp | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/lib/Tooling/AllTUsExecution.cpp b/lib/Tooling/AllTUsExecution.cpp index bc50412cf4f..ca9db7a561b 100644 --- a/lib/Tooling/AllTUsExecution.cpp +++ b/lib/Tooling/AllTUsExecution.cpp @@ -9,6 +9,7 @@ #include "clang/Tooling/AllTUsExecution.h" #include "clang/Tooling/ToolExecutorPluginRegistry.h" #include "llvm/Support/ThreadPool.h" +#include "llvm/Support/VirtualFileSystem.h" namespace clang { namespace tooling { @@ -114,25 +115,22 @@ llvm::Error AllTUsToolExecutor::execute( { llvm::ThreadPool Pool(ThreadCount == 0 ? llvm::hardware_concurrency() : ThreadCount); - llvm::SmallString<128> InitialWorkingDir; - if (auto EC = llvm::sys::fs::current_path(InitialWorkingDir)) { - InitialWorkingDir = ""; - llvm::errs() << "Error while getting current working directory: " - << EC.message() << "\n"; - } for (std::string File : Files) { Pool.async( [&](std::string Path) { Log("[" + std::to_string(Count()) + "/" + TotalNumStr + "] Processing file " + Path); - ClangTool Tool(Compilations, {Path}); + // Each thread gets an indepent copy of a VFS to allow different + // concurrent working directories. + IntrusiveRefCntPtr FS = + llvm::vfs::createPhysicalFileSystem().release(); + ClangTool Tool(Compilations, {Path}, + std::make_shared(), FS); Tool.appendArgumentsAdjuster(Action.second); Tool.appendArgumentsAdjuster(getDefaultArgumentsAdjusters()); for (const auto &FileAndContent : OverlayFiles) Tool.mapVirtualFile(FileAndContent.first(), FileAndContent.second); - // Do not restore working dir from multiple threads to avoid races. - Tool.setRestoreWorkingDir(false); if (Tool.run(Action.first.get())) AppendError(llvm::Twine("Failed to run action on ") + Path + "\n"); @@ -141,11 +139,6 @@ llvm::Error AllTUsToolExecutor::execute( } // Make sure all tasks have finished before resetting the working directory. Pool.wait(); - if (!InitialWorkingDir.empty()) { - if (auto EC = llvm::sys::fs::set_current_path(InitialWorkingDir)) - llvm::errs() << "Error while restoring working directory: " - << EC.message() << "\n"; - } } if (!ErrorMsg.empty()) From 990ffcb8042af8e83c4c742c3fa0410b8267cb1f Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 22 Mar 2019 13:40:36 +0000 Subject: [PATCH 18/38] [AST] OMPStructuredBlockTest: avoid using multiline string literals in macros That is what i have been doing elsewhere in these tests, maybe that's it? Maybe this helps with failing builds: http://lab.llvm.org:8011/builders/clang-cmake-aarch64-quick/builds/17921 http://lab.llvm.org:8011/builders/clang-cmake-aarch64-global-isel/builds/10248 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356749 91177308-0d34-0410-b5e6-96231b3b80d8 --- unittests/AST/OMPStructuredBlockTest.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/unittests/AST/OMPStructuredBlockTest.cpp b/unittests/AST/OMPStructuredBlockTest.cpp index 623bed388c0..f4a3fad4a1b 100644 --- a/unittests/AST/OMPStructuredBlockTest.cpp +++ b/unittests/AST/OMPStructuredBlockTest.cpp @@ -102,11 +102,12 @@ void test() { #pragma omp cancel parallel } })"; - ASSERT_TRUE( - PrintedOMPStmtMatches(Source, OMPInnermostStructuredBlockMatcher(), R"({ + const char *Expected = R"({ #pragma omp cancel parallel } -)")); +)"; + ASSERT_TRUE(PrintedOMPStmtMatches( + Source, OMPInnermostStructuredBlockMatcher(), Expected)); ASSERT_TRUE(PrintedOMPStmtMatches(Source, OMPStandaloneDirectiveMatcher(), "#pragma omp cancel parallel\n")); } @@ -117,14 +118,15 @@ TEST(OMPStructuredBlock, TestCancellationPoint) { void test() { #pragma omp parallel { -#pragma omp cancellation point parallel + #pragma omp cancellation point parallel } })"; - ASSERT_TRUE( - PrintedOMPStmtMatches(Source, OMPInnermostStructuredBlockMatcher(), R"({ + const char *Expected = R"({ #pragma omp cancellation point parallel } -)")); +)"; + ASSERT_TRUE(PrintedOMPStmtMatches( + Source, OMPInnermostStructuredBlockMatcher(), Expected)); ASSERT_TRUE( PrintedOMPStmtMatches(Source, OMPStandaloneDirectiveMatcher(), "#pragma omp cancellation point parallel\n")); From c1e4e8b66b8cc9ef54bc6484cb724e664798ae57 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 22 Mar 2019 14:41:39 +0000 Subject: [PATCH 19/38] [OPENMP]Emit error message for allocate directive without allocator clause in target region. According to the OpenMP 5.0, 2.11.3 allocate Directive, Restrictions, allocate directives that appear in a target region must specify an allocator clause unless a requires directive with the dynamic_allocators clause is present in the same compilation unit. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356752 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/DiagnosticSemaKinds.td | 3 + lib/Sema/SemaOpenMP.cpp | 6 +- test/OpenMP/nvptx_allocate_codegen.cpp | 2 +- test/OpenMP/nvptx_allocate_messages.cpp | 80 ++++++++++++++++++++++ 4 files changed, 89 insertions(+), 2 deletions(-) create mode 100644 test/OpenMP/nvptx_allocate_messages.cpp diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td index 8b5ce427713..a8776d9b433 100644 --- a/include/clang/Basic/DiagnosticSemaKinds.td +++ b/include/clang/Basic/DiagnosticSemaKinds.td @@ -9152,6 +9152,9 @@ def warn_omp_used_different_allocator : Warning< InGroup; def note_omp_previous_allocator : Note< "previous allocator is specified here">; +def err_expected_allocator_clause : Error<"expected an 'allocator' clause " + "inside of the target region; provide an 'allocator' clause or use 'requires'" + " directive with the 'dynamic_allocators' clause">; } // end of OpenMP category let CategoryName = "Related Result Type Issue" in { diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp index e54652650ed..9232ab9d6e8 100644 --- a/lib/Sema/SemaOpenMP.cpp +++ b/lib/Sema/SemaOpenMP.cpp @@ -2243,8 +2243,12 @@ Sema::DeclGroupPtrTy Sema::ActOnOpenMPAllocateDirective( ArrayRef Clauses, DeclContext *Owner) { assert(Clauses.size() <= 1 && "Expected at most one clause."); Expr *Allocator = nullptr; - if (!Clauses.empty()) + if (Clauses.empty()) { + if (LangOpts.OpenMPIsDevice) + targetDiag(Loc, diag::err_expected_allocator_clause); + } else { Allocator = cast(Clauses.back())->getAllocator(); + } OMPAllocateDeclAttr::AllocatorTypeTy AllocatorKind = getAllocatorKind(*this, DSAStack, Allocator); SmallVector Vars; diff --git a/test/OpenMP/nvptx_allocate_codegen.cpp b/test/OpenMP/nvptx_allocate_codegen.cpp index df6a727c7ac..ec1faff4265 100644 --- a/test/OpenMP/nvptx_allocate_codegen.cpp +++ b/test/OpenMP/nvptx_allocate_codegen.cpp @@ -64,7 +64,7 @@ int main () { #pragma omp allocate(a) allocator(omp_thread_mem_alloc) a=2; double b = 3; -#pragma omp allocate(b) +#pragma omp allocate(b) allocator(omp_default_mem_alloc) return (foo()); } diff --git a/test/OpenMP/nvptx_allocate_messages.cpp b/test/OpenMP/nvptx_allocate_messages.cpp new file mode 100644 index 00000000000..99aef4338d8 --- /dev/null +++ b/test/OpenMP/nvptx_allocate_messages.cpp @@ -0,0 +1,80 @@ +// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin10.6.0 -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc -o %t-host.bc %s +// RUN: %clang_cc1 -verify -DDEVICE -fopenmp -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -fsyntax-only %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc +#ifndef DEVICE +// expected-no-diagnostics +#endif // DEVICE + +#ifndef HEADER +#define HEADER + +int bar() { + int res = 0; +#ifdef DEVICE +// expected-error@+2 {{expected an 'allocator' clause inside of the target region; provide an 'allocator' clause or use 'requires' directive with the 'dynamic_allocators' clause}} +#endif // DEVICE +#pragma omp allocate(res) + return 0; +} + +#pragma omp declare target +typedef void **omp_allocator_handle_t; +extern const omp_allocator_handle_t omp_default_mem_alloc; +extern const omp_allocator_handle_t omp_large_cap_mem_alloc; +extern const omp_allocator_handle_t omp_const_mem_alloc; +extern const omp_allocator_handle_t omp_high_bw_mem_alloc; +extern const omp_allocator_handle_t omp_low_lat_mem_alloc; +extern const omp_allocator_handle_t omp_cgroup_mem_alloc; +extern const omp_allocator_handle_t omp_pteam_mem_alloc; +extern const omp_allocator_handle_t omp_thread_mem_alloc; + +struct St{ + int a; +}; + +struct St1{ + int a; + static int b; +#pragma omp allocate(b) allocator(omp_default_mem_alloc) +} d; + +int a, b, c; +#pragma omp allocate(a) allocator(omp_large_cap_mem_alloc) +#pragma omp allocate(b) allocator(omp_const_mem_alloc) +#pragma omp allocate(d, c) allocator(omp_high_bw_mem_alloc) + +template +struct ST { + static T m; + #pragma omp allocate(m) allocator(omp_low_lat_mem_alloc) +}; + +template T foo() { + T v; + #pragma omp allocate(v) allocator(omp_cgroup_mem_alloc) + v = ST::m; + return v; +} + +namespace ns{ + int a; +} +#pragma omp allocate(ns::a) allocator(omp_pteam_mem_alloc) + +int main () { + static int a; +#pragma omp allocate(a) allocator(omp_thread_mem_alloc) + a=2; + double b = 3; +#ifdef DEVICE +// expected-error@+2 {{expected an 'allocator' clause inside of the target region; provide an 'allocator' clause or use 'requires' directive with the 'dynamic_allocators' clause}} +#endif // DEVICE +#pragma omp allocate(b) +#ifdef DEVICE +// expected-note@+2 {{called by 'main'}} +#endif // DEVICE + return (foo() + bar()); +} + +extern template int ST::m; +#pragma omp end declare target +#endif From 2bfaf970c6ec8691877d16a7e7a83b41bd7282c9 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 22 Mar 2019 15:25:12 +0000 Subject: [PATCH 20/38] [OPENMP]Allow no allocator clause in target regions with requires dynamic_allocators. According to the OpenMP 5.0, 2.11.3 allocate Directive, Restrictions, allocate directives that appear in a target region must specify an allocator clause unless a requires directive with the dynamic_allocators clause is present in the same compilation unit. Patch adds a check for a presence of the requires directive with the dynamic_allocators clause. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356758 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Sema/SemaOpenMP.cpp | 13 ++++++++++++- test/OpenMP/nvptx_allocate_messages.cpp | 19 ++++++++++++------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp index 9232ab9d6e8..51e0dafbbc5 100644 --- a/lib/Sema/SemaOpenMP.cpp +++ b/lib/Sema/SemaOpenMP.cpp @@ -422,6 +422,16 @@ class DSAStackTy { RequiresDecls.push_back(RD); } + /// Checks if the defined 'requires' directive has specified type of clause. + template + bool hasRequiresDeclWithClause() { + return llvm::any_of(RequiresDecls, [](const OMPRequiresDecl *D) { + return llvm::any_of(D->clauselists(), [](const OMPClause *C) { + return isa(C); + }); + }); + } + /// Checks for a duplicate clause amongst previously declared requires /// directives bool hasDuplicateRequiresClause(ArrayRef ClauseList) const { @@ -2244,7 +2254,8 @@ Sema::DeclGroupPtrTy Sema::ActOnOpenMPAllocateDirective( assert(Clauses.size() <= 1 && "Expected at most one clause."); Expr *Allocator = nullptr; if (Clauses.empty()) { - if (LangOpts.OpenMPIsDevice) + if (LangOpts.OpenMPIsDevice && + !DSAStack->hasRequiresDeclWithClause()) targetDiag(Loc, diag::err_expected_allocator_clause); } else { Allocator = cast(Clauses.back())->getAllocator(); diff --git a/test/OpenMP/nvptx_allocate_messages.cpp b/test/OpenMP/nvptx_allocate_messages.cpp index 99aef4338d8..5bb94222270 100644 --- a/test/OpenMP/nvptx_allocate_messages.cpp +++ b/test/OpenMP/nvptx_allocate_messages.cpp @@ -1,17 +1,22 @@ // RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin10.6.0 -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc -o %t-host.bc %s // RUN: %clang_cc1 -verify -DDEVICE -fopenmp -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -fsyntax-only %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -#ifndef DEVICE +// RUN: %clang_cc1 -verify -DDEVICE -DREQUIRES -fopenmp -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -fsyntax-only %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc +#if !defined(DEVICE) || defined(REQUIRES) // expected-no-diagnostics #endif // DEVICE #ifndef HEADER #define HEADER +#if defined(REQUIRES) && defined(DEVICE) +#pragma omp requires dynamic_allocators +#endif // REQUIRES && DEVICE + int bar() { int res = 0; -#ifdef DEVICE +#if defined(DEVICE) && !defined(REQUIRES) // expected-error@+2 {{expected an 'allocator' clause inside of the target region; provide an 'allocator' clause or use 'requires' directive with the 'dynamic_allocators' clause}} -#endif // DEVICE +#endif // DEVICE && !REQUIRES #pragma omp allocate(res) return 0; } @@ -65,13 +70,13 @@ int main () { #pragma omp allocate(a) allocator(omp_thread_mem_alloc) a=2; double b = 3; -#ifdef DEVICE +#if defined(DEVICE) && !defined(REQUIRES) // expected-error@+2 {{expected an 'allocator' clause inside of the target region; provide an 'allocator' clause or use 'requires' directive with the 'dynamic_allocators' clause}} -#endif // DEVICE +#endif // DEVICE && !REQUIRES #pragma omp allocate(b) -#ifdef DEVICE +#if defined(DEVICE) && !defined(REQUIRES) // expected-note@+2 {{called by 'main'}} -#endif // DEVICE +#endif // DEVICE && !REQUIRES return (foo() + bar()); } From 29f42bb2962f052b4bf94caa9ead47a30f9911d7 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 22 Mar 2019 15:32:02 +0000 Subject: [PATCH 21/38] [OPENMP]Add missing comment, NFC. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356759 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Sema/SemaOpenMP.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp index 51e0dafbbc5..f541d75ce5e 100644 --- a/lib/Sema/SemaOpenMP.cpp +++ b/lib/Sema/SemaOpenMP.cpp @@ -2254,6 +2254,10 @@ Sema::DeclGroupPtrTy Sema::ActOnOpenMPAllocateDirective( assert(Clauses.size() <= 1 && "Expected at most one clause."); Expr *Allocator = nullptr; if (Clauses.empty()) { + // OpenMP 5.0, 2.11.3 allocate Directive, Restrictions. + // allocate directives that appear in a target region must specify an + // allocator clause unless a requires directive with the dynamic_allocators + // clause is present in the same compilation unit. if (LangOpts.OpenMPIsDevice && !DSAStack->hasRequiresDeclWithClause()) targetDiag(Loc, diag::err_expected_allocator_clause); From 55c482d7c1862a534afabee4bec00995226e5c89 Mon Sep 17 00:00:00 2001 From: Brian Gesiak Date: Fri, 22 Mar 2019 16:08:29 +0000 Subject: [PATCH 22/38] Revert "[coroutines][PR40978] Emit error for co_yield within catch block" The commit https://reviews.llvm.org/rC356296 is causing a regression in nested catch scopes, https://bugs.llvm.org/show_bug.cgi?id=41171. Revert this change for now in order to un-break that problem report. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356774 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/DiagnosticSemaKinds.td | 2 - include/clang/Sema/Scope.h | 3 - lib/Parse/ParseStmt.cpp | 8 +-- lib/Sema/Scope.cpp | 4 +- lib/Sema/SemaCoroutine.cpp | 64 +++++----------------- test/SemaCXX/coroutines.cpp | 63 ++------------------- 6 files changed, 25 insertions(+), 119 deletions(-) diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td index a8776d9b433..5467293ea64 100644 --- a/include/clang/Basic/DiagnosticSemaKinds.td +++ b/include/clang/Basic/DiagnosticSemaKinds.td @@ -9271,8 +9271,6 @@ def err_coroutine_objc_method : Error< "Objective-C methods as coroutines are not yet supported">; def err_coroutine_unevaluated_context : Error< "'%0' cannot be used in an unevaluated context">; -def err_coroutine_within_handler : Error< - "'%0' cannot be used in the handler of a try block">; def err_coroutine_outside_function : Error< "'%0' cannot be used outside a function">; def err_coroutine_invalid_func_context : Error< diff --git a/include/clang/Sema/Scope.h b/include/clang/Sema/Scope.h index 7848df8f70d..a1b451c85e9 100644 --- a/include/clang/Sema/Scope.h +++ b/include/clang/Sema/Scope.h @@ -131,9 +131,6 @@ class Scope { /// We are between inheritance colon and the real class/struct definition scope. ClassInheritanceScope = 0x800000, - - /// This is the scope of a C++ catch statement. - CatchScope = 0x1000000, }; private: diff --git a/lib/Parse/ParseStmt.cpp b/lib/Parse/ParseStmt.cpp index fde19b9ad87..267003b5189 100644 --- a/lib/Parse/ParseStmt.cpp +++ b/lib/Parse/ParseStmt.cpp @@ -2260,10 +2260,8 @@ StmtResult Parser::ParseCXXCatchBlock(bool FnCatch) { // C++ 3.3.2p3: // The name in a catch exception-declaration is local to the handler and // shall not be redeclared in the outermost block of the handler. - unsigned ScopeFlags = Scope::DeclScope | Scope::ControlScope | - Scope::CatchScope | - (FnCatch ? Scope::FnTryCatchScope : 0); - ParseScope CatchScope(this, ScopeFlags); + ParseScope CatchScope(this, Scope::DeclScope | Scope::ControlScope | + (FnCatch ? Scope::FnTryCatchScope : 0)); // exception-declaration is equivalent to '...' or a parameter-declaration // without default arguments. @@ -2292,7 +2290,7 @@ StmtResult Parser::ParseCXXCatchBlock(bool FnCatch) { return StmtError(Diag(Tok, diag::err_expected) << tok::l_brace); // FIXME: Possible draft standard bug: attribute-specifier should be allowed? - StmtResult Block(ParseCompoundStatement(/*isStmtExpr=*/false, ScopeFlags)); + StmtResult Block(ParseCompoundStatement()); if (Block.isInvalid()) return Block; diff --git a/lib/Sema/Scope.cpp b/lib/Sema/Scope.cpp index 51b0b24e57b..09940688efa 100644 --- a/lib/Sema/Scope.cpp +++ b/lib/Sema/Scope.cpp @@ -166,9 +166,7 @@ void Scope::dumpImpl(raw_ostream &OS) const { {SEHExceptScope, "SEHExceptScope"}, {SEHFilterScope, "SEHFilterScope"}, {CompoundStmtScope, "CompoundStmtScope"}, - {ClassInheritanceScope, "ClassInheritanceScope"}, - {CatchScope, "CatchScope"}, - }; + {ClassInheritanceScope, "ClassInheritanceScope"}}; for (auto Info : FlagInfo) { if (Flags & Info.first) { diff --git a/lib/Sema/SemaCoroutine.cpp b/lib/Sema/SemaCoroutine.cpp index 9d328f4926a..22a12801da6 100644 --- a/lib/Sema/SemaCoroutine.cpp +++ b/lib/Sema/SemaCoroutine.cpp @@ -185,8 +185,21 @@ static QualType lookupCoroutineHandleType(Sema &S, QualType PromiseType, static bool isValidCoroutineContext(Sema &S, SourceLocation Loc, StringRef Keyword) { - // [expr.await]p2 dictates that 'co_await' and 'co_yield' must be used within - // a function body. + // 'co_await' and 'co_yield' are not permitted in unevaluated operands, + // such as subexpressions of \c sizeof. + // + // [expr.await]p2, emphasis added: "An await-expression shall appear only in + // a *potentially evaluated* expression within the compound-statement of a + // function-body outside of a handler [...] A context within a function where + // an await-expression can appear is called a suspension context of the + // function." And per [expr.yield]p1: "A yield-expression shall appear only + // within a suspension context of a function." + if (S.isUnevaluatedContext()) { + S.Diag(Loc, diag::err_coroutine_unevaluated_context) << Keyword; + return false; + } + + // Per [expr.await]p2, any other usage must be within a function. // FIXME: This also covers [expr.await]p2: "An await-expression shall not // appear in a default argument." But the diagnostic QoI here could be // improved to inform the user that default arguments specifically are not @@ -655,57 +668,12 @@ bool Sema::ActOnCoroutineBodyStart(Scope *SC, SourceLocation KWLoc, return true; } -// Recursively walks up the scope hierarchy until either a 'catch' or a function -// scope is found, whichever comes first. -static bool isWithinCatchScope(Scope *S) { - // 'co_await' and 'co_yield' keywords are disallowed within catch blocks, but - // lambdas that use 'co_await' are allowed. The loop below ends when a - // function scope is found in order to ensure the following behavior: - // - // void foo() { // <- function scope - // try { // - // co_await x; // <- 'co_await' is OK within a function scope - // } catch { // <- catch scope - // co_await x; // <- 'co_await' is not OK within a catch scope - // []() { // <- function scope - // co_await x; // <- 'co_await' is OK within a function scope - // }(); - // } - // } - while (S && !(S->getFlags() & Scope::FnScope)) { - if (S->getFlags() & Scope::CatchScope) - return true; - S = S->getParent(); - } - return false; -} - -// [expr.await]p2, emphasis added: "An await-expression shall appear only in -// a *potentially evaluated* expression within the compound-statement of a -// function-body *outside of a handler* [...] A context within a function -// where an await-expression can appear is called a suspension context of the -// function." -static void checkSuspensionContext(Sema &S, SourceLocation Loc, - StringRef Keyword) { - // First emphasis of [expr.await]p2: must be a potentially evaluated context. - // That is, 'co_await' and 'co_yield' cannot appear in subexpressions of - // \c sizeof. - if (S.isUnevaluatedContext()) - S.Diag(Loc, diag::err_coroutine_unevaluated_context) << Keyword; - - // Second emphasis of [expr.await]p2: must be outside of an exception handler. - if (isWithinCatchScope(S.getCurScope())) - S.Diag(Loc, diag::err_coroutine_within_handler) << Keyword; -} - ExprResult Sema::ActOnCoawaitExpr(Scope *S, SourceLocation Loc, Expr *E) { if (!ActOnCoroutineBodyStart(S, Loc, "co_await")) { CorrectDelayedTyposInExpr(E); return ExprError(); } - checkSuspensionContext(*this, Loc, "co_await"); - if (E->getType()->isPlaceholderType()) { ExprResult R = CheckPlaceholderExpr(E); if (R.isInvalid()) return ExprError(); @@ -803,8 +771,6 @@ ExprResult Sema::ActOnCoyieldExpr(Scope *S, SourceLocation Loc, Expr *E) { return ExprError(); } - checkSuspensionContext(*this, Loc, "co_yield"); - // Build yield_value call. ExprResult Awaitable = buildPromiseCall( *this, getCurFunction()->CoroutinePromise, Loc, "yield_value", E); diff --git a/test/SemaCXX/coroutines.cpp b/test/SemaCXX/coroutines.cpp index 99964ef6bcb..5e1ff34c403 100644 --- a/test/SemaCXX/coroutines.cpp +++ b/test/SemaCXX/coroutines.cpp @@ -314,23 +314,13 @@ struct CtorDtor { } }; -namespace std { class type_info; } - void unevaluated() { - decltype(co_await a); // expected-error {{'co_await' cannot be used in an unevaluated context}} - // expected-warning@-1 {{declaration does not declare anything}} - sizeof(co_await a); // expected-error {{'co_await' cannot be used in an unevaluated context}} - // expected-error@-1 {{invalid application of 'sizeof' to an incomplete type 'void'}} - typeid(co_await a); // expected-error {{'co_await' cannot be used in an unevaluated context}} - // expected-warning@-1 {{expression with side effects has no effect in an unevaluated context}} - // expected-warning@-2 {{expression result unused}} - decltype(co_yield 1); // expected-error {{'co_yield' cannot be used in an unevaluated context}} - // expected-warning@-1 {{declaration does not declare anything}} - sizeof(co_yield 2); // expected-error {{'co_yield' cannot be used in an unevaluated context}} - // expected-error@-1 {{invalid application of 'sizeof' to an incomplete type 'void'}} - typeid(co_yield 3); // expected-error {{'co_yield' cannot be used in an unevaluated context}} - // expected-warning@-1 {{expression with side effects has no effect in an unevaluated context}} - // expected-warning@-2 {{expression result unused}} + decltype(co_await a); // expected-error {{cannot be used in an unevaluated context}} + sizeof(co_await a); // expected-error {{cannot be used in an unevaluated context}} + typeid(co_await a); // expected-error {{cannot be used in an unevaluated context}} + decltype(co_yield a); // expected-error {{cannot be used in an unevaluated context}} + sizeof(co_yield a); // expected-error {{cannot be used in an unevaluated context}} + typeid(co_yield a); // expected-error {{cannot be used in an unevaluated context}} } // [expr.await]p2: "An await-expression shall not appear in a default argument." @@ -338,47 +328,6 @@ void unevaluated() { // not allowed. A user may not understand that this is "outside a function." void default_argument(int arg = co_await 0) {} // expected-error {{'co_await' cannot be used outside a function}} -void await_in_catch_coroutine() { - try { - } catch (...) { // FIXME: Emit a note diagnostic pointing out the try handler on this line. - []() -> void { co_await a; }(); // OK - co_await a; // expected-error {{'co_await' cannot be used in the handler of a try block}} - } -} - -void await_nested_in_catch_coroutine() { - try { - } catch (...) { // FIXME: Emit a note diagnostic pointing out the try handler on this line. - try { - co_await a; // expected-error {{'co_await' cannot be used in the handler of a try block}} - []() -> void { co_await a; }(); // OK - } catch (...) { - co_return 123; - } - } -} - -void await_in_lambda_in_catch_coroutine() { - try { - } catch (...) { - []() -> void { co_await a; }(); // OK - } -} - -void yield_in_catch_coroutine() { - try { - } catch (...) { - co_yield 1; // expected-error {{'co_yield' cannot be used in the handler of a try block}} - } -} - -void return_in_catch_coroutine() { - try { - } catch (...) { - co_return 123; // OK - } -} - constexpr auto constexpr_deduced_return_coroutine() { co_yield 0; // expected-error {{'co_yield' cannot be used in a constexpr function}} // expected-error@-1 {{'co_yield' cannot be used in a function with a deduced return type}} From f69b9c5a06575236f75d3e543efe43db22de4c3c Mon Sep 17 00:00:00 2001 From: Carey Williams Date: Fri, 22 Mar 2019 16:20:45 +0000 Subject: [PATCH 23/38] [ARM] Fix bug 39982 - pcs("aapcs-vfp") is not consistent Correctly handle homogeneous aggregates when a function's ABI is specified via the pcs attribute. Bug: https://bugs.llvm.org/show_bug.cgi?id=39982 Differential Revision: https://reviews.llvm.org/D59094 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356776 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TargetInfo.cpp | 52 +++++++++++++++++++++++++------------ test/CodeGenCXX/arm-pcs.cpp | 51 ++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 16 deletions(-) create mode 100644 test/CodeGenCXX/arm-pcs.cpp diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index f48f19966e5..d9bbf594ed6 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -5597,8 +5597,10 @@ class ARMABIInfo : public SwiftABIInfo { ABIKind getABIKind() const { return Kind; } private: - ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic) const; - ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic) const; + ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic, + unsigned functionCallConv) const; + ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic, + unsigned functionCallConv) const; ABIArgInfo classifyHomogeneousAggregate(QualType Ty, const Type *Base, uint64_t Members) const; ABIArgInfo coerceIllegalVector(QualType Ty) const; @@ -5608,6 +5610,8 @@ class ARMABIInfo : public SwiftABIInfo { bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t Members) const override; + bool isEffectivelyAAPCS_VFP(unsigned callConvention, bool acceptHalf) const; + void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, @@ -5728,11 +5732,13 @@ void WindowsARMTargetCodeGenInfo::setTargetAttributes( void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const { if (!::classifyReturnType(getCXXABI(), FI, *this)) - FI.getReturnInfo() = - classifyReturnType(FI.getReturnType(), FI.isVariadic()); + FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic(), + FI.getCallingConvention()); for (auto &I : FI.arguments()) - I.info = classifyArgumentType(I.type, FI.isVariadic()); + I.info = classifyArgumentType(I.type, FI.isVariadic(), + FI.getCallingConvention()); + // Always honor user-specified calling convention. if (FI.getCallingConvention() != llvm::CallingConv::C) @@ -5811,8 +5817,8 @@ ABIArgInfo ARMABIInfo::classifyHomogeneousAggregate(QualType Ty, return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); } -ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, - bool isVariadic) const { +ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic, + unsigned functionCallConv) const { // 6.1.2.1 The following argument types are VFP CPRCs: // A single-precision floating-point type (including promoted // half-precision types); A double-precision floating-point type; @@ -5820,7 +5826,9 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, // with a Base Type of a single- or double-precision floating-point type, // 64-bit containerized vectors or 128-bit containerized vectors with one // to four Elements. - bool IsEffectivelyAAPCS_VFP = getABIKind() == AAPCS_VFP && !isVariadic; + // Variadic functions should always marshal to the base standard. + bool IsAAPCS_VFP = + !isVariadic && isEffectivelyAAPCS_VFP(functionCallConv, /* AAPCS16 */ false); Ty = useFirstFieldIfTransparentUnion(Ty); @@ -5833,7 +5841,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, // half type natively, and does not need to interwork with AAPCS code. if ((Ty->isFloat16Type() || Ty->isHalfType()) && !getContext().getLangOpts().NativeHalfArgsAndReturns) { - llvm::Type *ResType = IsEffectivelyAAPCS_VFP ? + llvm::Type *ResType = IsAAPCS_VFP ? llvm::Type::getFloatTy(getVMContext()) : llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(ResType); @@ -5857,7 +5865,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, if (isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); - if (IsEffectivelyAAPCS_VFP) { + if (IsAAPCS_VFP) { // Homogeneous Aggregates need to be expanded when we can fit the aggregate // into VFP registers. const Type *Base = nullptr; @@ -6014,10 +6022,12 @@ static bool isIntegerLikeType(QualType Ty, ASTContext &Context, return true; } -ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, - bool isVariadic) const { - bool IsEffectivelyAAPCS_VFP = - (getABIKind() == AAPCS_VFP || getABIKind() == AAPCS16_VFP) && !isVariadic; +ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, bool isVariadic, + unsigned functionCallConv) const { + + // Variadic functions should always marshal to the base standard. + bool IsAAPCS_VFP = + !isVariadic && isEffectivelyAAPCS_VFP(functionCallConv, /* AAPCS16 */ true); if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); @@ -6038,7 +6048,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, // half type natively, and does not need to interwork with AAPCS code. if ((RetTy->isFloat16Type() || RetTy->isHalfType()) && !getContext().getLangOpts().NativeHalfArgsAndReturns) { - llvm::Type *ResType = IsEffectivelyAAPCS_VFP ? + llvm::Type *ResType = IsAAPCS_VFP ? llvm::Type::getFloatTy(getVMContext()) : llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(ResType); @@ -6087,7 +6097,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, return ABIArgInfo::getIgnore(); // Check for homogeneous aggregates with AAPCS-VFP. - if (IsEffectivelyAAPCS_VFP) { + if (IsAAPCS_VFP) { const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(RetTy, Base, Members)) @@ -6192,6 +6202,16 @@ bool ARMABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, return Members <= 4; } +bool ARMABIInfo::isEffectivelyAAPCS_VFP(unsigned callConvention, + bool acceptHalf) const { + // Give precedence to user-specified calling conventions. + if (callConvention != llvm::CallingConv::C) + return (callConvention == llvm::CallingConv::ARM_AAPCS_VFP); + else + return (getABIKind() == AAPCS_VFP) || + (acceptHalf && (getABIKind() == AAPCS16_VFP)); +} + Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { CharUnits SlotSize = CharUnits::fromQuantity(4); diff --git a/test/CodeGenCXX/arm-pcs.cpp b/test/CodeGenCXX/arm-pcs.cpp new file mode 100644 index 00000000000..1d327d794b7 --- /dev/null +++ b/test/CodeGenCXX/arm-pcs.cpp @@ -0,0 +1,51 @@ +// Covers a bug fix for ABI selection with homogenous aggregates: +// See: https://bugs.llvm.org/show_bug.cgi?id=39982 + +// REQUIRES: arm-registered-target +// RUN: %clang -mfloat-abi=hard --target=armv7-unknown-linux-gnueabi -O3 -S -o - %s | FileCheck %s -check-prefixes=HARD,CHECK +// RUN: %clang -mfloat-abi=softfp --target=armv7-unknown-linux-gnueabi -O3 -S -o - %s | FileCheck %s -check-prefixes=SOFTFP,CHECK +// RUN: %clang -mfloat-abi=soft --target=armv7-unknown-linux-gnueabi -O3 -S -o - %s | FileCheck %s -check-prefixes=SOFT,CHECK + +struct S { + float f; + float d; + float c; + float t; +}; + +// Variadic functions should always marshal for the base standard. +// See section 5.5 (Parameter Passing) of the AAPCS. +float __attribute__((pcs("aapcs-vfp"))) variadic(S s, ...) { + // CHECK-NOT: vmov s{{[0-9]+}}, s{{[0-9]+}} + // CHECK: mov r{{[0-9]+}}, r{{[0-9]+}} + return s.d; +} + +float no_attribute(S s) { + // SOFT: mov r{{[0-9]+}}, r{{[0-9]+}} + // SOFTFP: mov r{{[0-9]+}}, r{{[0-9]+}} + // HARD: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}} + return s.d; +} + +float __attribute__((pcs("aapcs-vfp"))) baz(float x, float y) { + // CHECK-NOT: mov s{{[0-9]+}}, r{{[0-9]+}} + // SOFT: mov r{{[0-9]+}}, r{{[0-9]+}} + // SOFTFP: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}} + // HARD: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}} + return y; +} + +float __attribute__((pcs("aapcs-vfp"))) foo(S s) { + // CHECK-NOT: mov s{{[0-9]+}}, r{{[0-9]+}} + // SOFT: mov r{{[0-9]+}}, r{{[0-9]+}} + // SOFTFP: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}} + // HARD: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}} + return s.d; +} + +float __attribute__((pcs("aapcs"))) bar(S s) { + // CHECK-NOT: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}} + // CHECK: mov r{{[0-9]+}}, r{{[0-9]+}} + return s.d; +} From 210219ba269639a9ac04214d5ca0184f3bcf62c2 Mon Sep 17 00:00:00 2001 From: James Y Knight Date: Fri, 22 Mar 2019 18:27:13 +0000 Subject: [PATCH 24/38] IR: Support parsing numeric block ids, and emit them in textual output. Just as as llvm IR supports explicitly specifying numeric value ids for instructions, and emits them by default in textual output, now do the same for blocks. This is a slightly incompatible change in the textual IR format. Previously, llvm would parse numeric labels as string names. E.g. define void @f() { br label %"55" 55: ret void } defined a label *named* "55", even without needing to be quoted, while the reference required quoting. Now, if you intend a block label which looks like a value number to be a name, you must quote it in the definition too (e.g. `"55":`). Previously, llvm would print nameless blocks only as a comment, and would omit it if there was no predecessor. This could cause confusion for readers of the IR, just as unnamed instructions did prior to the addition of "%5 = " syntax, back in 2008 (PR2480). Now, it will always print a label for an unnamed block, with the exception of the entry block. (IMO it may be better to print it for the entry-block as well. However, that requires updating many more tests.) Thus, the following is supported, and is the canonical printing: define i32 @f(i32, i32) { %3 = add i32 %0, %1 br label %4 4: ret i32 %3 } New test cases covering this behavior are added, and other tests updated as required. Differential Revision: https://reviews.llvm.org/D58548 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356789 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGenCXX/discard-name-values.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/CodeGenCXX/discard-name-values.cpp b/test/CodeGenCXX/discard-name-values.cpp index d4d7527c285..aa30dae7501 100644 --- a/test/CodeGenCXX/discard-name-values.cpp +++ b/test/CodeGenCXX/discard-name-values.cpp @@ -10,7 +10,7 @@ bool test(bool pred) { // CHECK: br i1 %pred, label %if.then, label %if.end if (pred) { - // DISCARDVALUE: ;