Add LLVM patches for bugs introducing illegal ptrtoint

The two patches are: rL323946 [LSR] Don't force bases of foldable formulae to the final type. D49832 [SCEV] Don't expand Wrap predicate using inttoptr in ni addrspaces
tekverkp · Jul 27, 2018 · df45146 · df45146
1 parent ec2f2e7
commit df45146
Show file tree

Hide file tree

Showing 3 changed files with 234 additions and 0 deletions.
diff --git a/deps/llvm.mk b/deps/llvm.mk
@@ -500,6 +500,8 @@ $(eval $(call LLVM_PATCH,llvm-rL327898)) # remove for 7.0
 $(eval $(call LLVM_PATCH,llvm-6.0-DISABLE_ABI_CHECKS))
 $(eval $(call LLVM_PATCH,llvm-OProfile-line-num))
 $(eval $(call LLVM_PATCH,llvm-D44892-Perf-integration))
+$(eval $(call LLVM_PATCH,llvm-D49832-SCEVPred))
+$(eval $(call LLVM_PATCH,llvm-rL323946-LSRTy)) # Remove for 7.0
 endif # LLVM_VER
 
 # Remove hardcoded OS X requirements in compilter-rt cmake build

diff --git a/deps/patches/llvm-D49832-SCEVPred.patch b/deps/patches/llvm-D49832-SCEVPred.patch
@@ -0,0 +1,187 @@
+commit 98592fcc61307968f7df1362771534595a1e1c21
+Author: Keno Fischer <[email protected]>
+Date: Wed Jul 25 19:29:02 2018 -0400
+
+ [SCEV] Don't expand Wrap predicate using inttoptr in ni addrspaces
+
+ Summary:
+ In non-integral address spaces, we're not allowed to introduce inttoptr/ptrtoint
+ intrinsics. Instead, we need to expand any pointer arithmetic as geps on the
+ base pointer. Luckily this is a common task for SCEV, so all we have to do here
+ is hook up the corresponding helper function and add test case.
+
+ Fixes PR38290
+
+ Reviewers: reames, sanjoy
+
+ Subscribers: javed.absar, llvm-commits
+
+ Differential Revision: https://reviews.llvm.org/D49832
+
+diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
+index 7f76f057216..f441a3647fb 100644
+--- a/lib/Analysis/ScalarEvolutionExpander.cpp
++++ b/lib/Analysis/ScalarEvolutionExpander.cpp
+@@ -2157,8 +2157,9 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
+ const SCEV *Step = AR->getStepRecurrence(SE);
+ const SCEV *Start = AR->getStart();
+
++ Type *ARTy = AR->getType();
+ unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType());
+- unsigned DstBits = SE.getTypeSizeInBits(AR->getType());
++ unsigned DstBits = SE.getTypeSizeInBits(ARTy);
+
+ // The expression {Start,+,Step} has nusw/nssw if
+ // Step < 0, Start - |Step| * Backedge <= Start
+@@ -2170,11 +2171,12 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
+ Value *TripCountVal = expandCodeFor(ExitCount, CountTy, Loc);
+
+ IntegerType *Ty =
+- IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(AR->getType()));
++ IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy));
++ Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty;
+
+ Value *StepValue = expandCodeFor(Step, Ty, Loc);
+ Value *NegStepValue = expandCodeFor(SE.getNegativeSCEV(Step), Ty, Loc);
+- Value *StartValue = expandCodeFor(Start, Ty, Loc);
++ Value *StartValue = expandCodeFor(Start, ARExpandTy, Loc);
+
+ ConstantInt *Zero =
+ ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits));
+@@ -2197,8 +2199,21 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
+ // Compute:
+ // Start + |Step| * Backedge < Start
+ // Start - |Step| * Backedge > Start
+- Value *Add = Builder.CreateAdd(StartValue, MulV);
+- Value *Sub = Builder.CreateSub(StartValue, MulV);
++ Value *Add = nullptr, *Sub = nullptr;
++ if (ARExpandTy->isPointerTy()) {
++ PointerType *ARPtrTy = cast<PointerType>(ARExpandTy);
++ const SCEV *MulS = SE.getSCEV(MulV);
++ const SCEV *const StepArray[2] = {MulS, SE.getNegativeSCEV(MulS)};
++ Add = Builder.CreateBitCast(
++ expandAddToGEP(&StepArray[0], &StepArray[1], ARPtrTy, Ty, StartValue),
++ ARPtrTy);
++ Sub = Builder.CreateBitCast(
++ expandAddToGEP(&StepArray[1], &StepArray[2], ARPtrTy, Ty, StartValue),
++ ARPtrTy);
++ } else {
++ Add = Builder.CreateAdd(StartValue, MulV);
++ Sub = Builder.CreateSub(StartValue, MulV);
++ }
+
+ Value *EndCompareGT = Builder.CreateICmp(
+ Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
+diff --git a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll
+new file mode 100644
+index 00000000000..ddcf5e1a195
+--- /dev/null
++++ b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll
+@@ -0,0 +1,73 @@
++; RUN: opt -loop-versioning -S < %s | FileCheck %s -check-prefix=LV
++
++; NB: addrspaces 10-13 are non-integral
++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
++
++; This matches the test case from PR38290
++; Check that we expand the SCEV predicate check using GEP, rather
++; than ptrtoint.
++
++%jl_value_t = type opaque
++%jl_array_t = type { i8 addrspace(13)*, i64, i16, i16, i32 }
++
++declare i64 @julia_steprange_last_4949()
++
++define void @"japi1_align!_9477"(%jl_value_t addrspace(10)**) #0 {
++; LV-LAVEL: L26.lver.check
++; LV: [[OFMul:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[Step:%[^ ]*]])
++; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 0
++; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 1
++; LV-NEXT: [[PosGEP:%[^ ]*]] = getelementptr i32, i32 addrspace(13)* [[Base:%[^ ]*]], i64 [[Step]]
++; LV-NEXT: [[NegGEP:%[^ ]*]] = getelementptr i32, i32 addrspace(13)* [[Base]], i64 [[NegStep:%[^ ]*]]
++; LV-NEXT: icmp ugt i32 addrspace(13)* [[NegGEP]], [[Base]]
++; LV-NEXT: icmp ult i32 addrspace(13)* [[PosGEP]], [[Base]]
++; LV-NOT: inttoptr
++; LV-NOT: ptrtoint
++top:
++ %1 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, align 8, !nonnull !1, !dereferenceable !2, !align !3
++ %2 = load i32, i32* inttoptr (i64 12 to i32*), align 4, !tbaa !4
++ %3 = sub i32 0, %2
++ %4 = call i64 @julia_steprange_last_4949()
++ %5 = addrspacecast %jl_value_t addrspace(10)* %1 to %jl_value_t addrspace(11)*
++ %6 = bitcast %jl_value_t addrspace(11)* %5 to %jl_value_t addrspace(10)* addrspace(11)*
++ %7 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %6, align 8, !tbaa !4, !nonnull !1, !dereferenceable !9, !align !2
++ %8 = addrspacecast %jl_value_t addrspace(10)* %7 to %jl_value_t addrspace(11)*
++ %9 = bitcast %jl_value_t addrspace(11)* %8 to i32 addrspace(13)* addrspace(11)*
++ %10 = load i32 addrspace(13)*, i32 addrspace(13)* addrspace(11)* %9, align 8, !tbaa !10, !nonnull !1
++ %11 = sext i32 %3 to i64
++ br label %L26
++
++L26: ; preds = %L26, %top
++ %value_phi3 = phi i64 [ 0, %top ], [ %12, %L26 ]
++ %12 = add i64 %value_phi3, -1
++ %13 = getelementptr inbounds i32, i32 addrspace(13)* %10, i64 %12
++ %14 = load i32, i32 addrspace(13)* %13, align 4, !tbaa !13
++ %15 = add i64 %12, %11
++ %16 = getelementptr inbounds i32, i32 addrspace(13)* %10, i64 %15
++ store i32 %14, i32 addrspace(13)* %16, align 4, !tbaa !13
++ %17 = icmp eq i64 %value_phi3, %4
++ br i1 %17, label %L45, label %L26
++
++L45: ; preds = %L26
++ ret void
++}
++
++attributes #0 = { "thunk" }
++
++!llvm.module.flags = !{!0}
++
++!0 = !{i32 1, !"Debug Info Version", i32 3}
++!1 = !{}
++!2 = !{i64 16}
++!3 = !{i64 8}
++!4 = !{!5, !5, i64 0}
++!5 = !{!"jtbaa_mutab", !6, i64 0}
++!6 = !{!"jtbaa_value", !7, i64 0}
++!7 = !{!"jtbaa_data", !8, i64 0}
++!8 = !{!"jtbaa"}
++!9 = !{i64 40}
++!10 = !{!11, !11, i64 0}
++!11 = !{!"jtbaa_arrayptr", !12, i64 0}
++!12 = !{!"jtbaa_array", !8, i64 0}
++!13 = !{!14, !14, i64 0}
++!14 = !{!"jtbaa_arraybuf", !7, i64 0}
+diff --git a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
+index a7e5bce7445..fa6fccecbf1 100644
+--- a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
++++ b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
+@@ -58,10 +58,10 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+ ; LV-NEXT: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE]])
+ ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
+ ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
+-; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]]
+-; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]]
+-; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2
+-; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2
++; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[A0:%[^ ]*]], [[OFMulResult1]]
++; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[A0]], [[OFMulResult1]]
++; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[A0]]
++; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[A0]]
+ ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
+ ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
+
+@@ -233,10 +233,10 @@ for.end: ; preds = %for.body
+ ; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]])
+ ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
+ ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
+-; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]]
+-; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]]
+-; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2
+-; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2
++; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[A0:%[^ ]*]], [[OFMulResult1]]
++; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[A0]], [[OFMulResult1]]
++; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[A0]]
++; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[A0]]
+ ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
+ ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
+
diff --git a/deps/patches/llvm-rL323946-LSRTy.patch b/deps/patches/llvm-rL323946-LSRTy.patch
@@ -0,0 +1,45 @@
+commit ab60b05a472e8651cbe53c19513b7e62b9ff32df
+Author: Mikael Holmen <[email protected]>
+Date: Thu Feb 1 06:38:34 2018 +0000
+
+ [LSR] Don't force bases of foldable formulae to the final type.
+
+ Summary:
+ Before emitting code for scaled registers, we prevent
+ SCEVExpander from hoisting any scaled addressing mode
+ by emitting all the bases first. However, these bases
+ are being forced to the final type, resulting in some
+ odd code.
+
+ For example, if the type of the base is an integer and
+ the final type is a pointer, we will emit an inttoptr
+ for the base, a ptrtoint for the scale, and then a
+ 'reverse' GEP where the GEP pointer is actually the base
+ integer and the index is the pointer. It's more intuitive
+ to use the pointer as a pointer and the integer as index.
+
+ Patch by: Bevin Hansson
+
+ Reviewers: atrick, qcolombet, sanjoy
+
+ Reviewed By: qcolombet
+
+ Subscribers: llvm-commits
+
+ Differential Revision: https://reviews.llvm.org/D42103
+
+ git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323946 91177308-0d34-0410-b5e6-96231b3b80d8
+
+diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+index 332c074a1df..4b8e2286ed9 100644
+--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
++++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+@@ -4993,7 +4993,7 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
+ // Unless the addressing mode will not be folded.
+ if (!Ops.empty() && LU.Kind == LSRUse::Address &&
+ isAMCompletelyFolded(TTI, LU, F)) {
+- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
++ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), nullptr);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }