Skip to content

Commit

Permalink
Add LLVM patches for bugs introducing illegal ptrtoint
Browse files Browse the repository at this point in the history
The two patches are:
rL323946 [LSR] Don't force bases of foldable formulae to the final type.
D49832   [SCEV] Don't expand Wrap predicate using inttoptr in ni addrspaces
  • Loading branch information
Keno committed Jul 27, 2018
1 parent ec2f2e7 commit df45146
Show file tree
Hide file tree
Showing 3 changed files with 234 additions and 0 deletions.
2 changes: 2 additions & 0 deletions deps/llvm.mk
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,8 @@ $(eval $(call LLVM_PATCH,llvm-rL327898)) # remove for 7.0
$(eval $(call LLVM_PATCH,llvm-6.0-DISABLE_ABI_CHECKS))
$(eval $(call LLVM_PATCH,llvm-OProfile-line-num))
$(eval $(call LLVM_PATCH,llvm-D44892-Perf-integration))
$(eval $(call LLVM_PATCH,llvm-D49832-SCEVPred))
$(eval $(call LLVM_PATCH,llvm-rL323946-LSRTy)) # Remove for 7.0
endif # LLVM_VER

# Remove hardcoded OS X requirements in compilter-rt cmake build
Expand Down
187 changes: 187 additions & 0 deletions deps/patches/llvm-D49832-SCEVPred.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
commit 98592fcc61307968f7df1362771534595a1e1c21
Author: Keno Fischer <[email protected]>
Date: Wed Jul 25 19:29:02 2018 -0400

[SCEV] Don't expand Wrap predicate using inttoptr in ni addrspaces

Summary:
In non-integral address spaces, we're not allowed to introduce inttoptr/ptrtoint
intrinsics. Instead, we need to expand any pointer arithmetic as geps on the
base pointer. Luckily this is a common task for SCEV, so all we have to do here
is hook up the corresponding helper function and add test case.

Fixes PR38290

Reviewers: reames, sanjoy

Subscribers: javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D49832

diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 7f76f057216..f441a3647fb 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -2157,8 +2157,9 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
const SCEV *Step = AR->getStepRecurrence(SE);
const SCEV *Start = AR->getStart();

+ Type *ARTy = AR->getType();
unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType());
- unsigned DstBits = SE.getTypeSizeInBits(AR->getType());
+ unsigned DstBits = SE.getTypeSizeInBits(ARTy);

// The expression {Start,+,Step} has nusw/nssw if
// Step < 0, Start - |Step| * Backedge <= Start
@@ -2170,11 +2171,12 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
Value *TripCountVal = expandCodeFor(ExitCount, CountTy, Loc);

IntegerType *Ty =
- IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(AR->getType()));
+ IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy));
+ Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty;

Value *StepValue = expandCodeFor(Step, Ty, Loc);
Value *NegStepValue = expandCodeFor(SE.getNegativeSCEV(Step), Ty, Loc);
- Value *StartValue = expandCodeFor(Start, Ty, Loc);
+ Value *StartValue = expandCodeFor(Start, ARExpandTy, Loc);

ConstantInt *Zero =
ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits));
@@ -2197,8 +2199,21 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
// Compute:
// Start + |Step| * Backedge < Start
// Start - |Step| * Backedge > Start
- Value *Add = Builder.CreateAdd(StartValue, MulV);
- Value *Sub = Builder.CreateSub(StartValue, MulV);
+ Value *Add = nullptr, *Sub = nullptr;
+ if (ARExpandTy->isPointerTy()) {
+ PointerType *ARPtrTy = cast<PointerType>(ARExpandTy);
+ const SCEV *MulS = SE.getSCEV(MulV);
+ const SCEV *const StepArray[2] = {MulS, SE.getNegativeSCEV(MulS)};
+ Add = Builder.CreateBitCast(
+ expandAddToGEP(&StepArray[0], &StepArray[1], ARPtrTy, Ty, StartValue),
+ ARPtrTy);
+ Sub = Builder.CreateBitCast(
+ expandAddToGEP(&StepArray[1], &StepArray[2], ARPtrTy, Ty, StartValue),
+ ARPtrTy);
+ } else {
+ Add = Builder.CreateAdd(StartValue, MulV);
+ Sub = Builder.CreateSub(StartValue, MulV);
+ }

Value *EndCompareGT = Builder.CreateICmp(
Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
diff --git a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll
new file mode 100644
index 00000000000..ddcf5e1a195
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll
@@ -0,0 +1,73 @@
+; RUN: opt -loop-versioning -S < %s | FileCheck %s -check-prefix=LV
+
+; NB: addrspaces 10-13 are non-integral
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
+
+; This matches the test case from PR38290
+; Check that we expand the SCEV predicate check using GEP, rather
+; than ptrtoint.
+
+%jl_value_t = type opaque
+%jl_array_t = type { i8 addrspace(13)*, i64, i16, i16, i32 }
+
+declare i64 @julia_steprange_last_4949()
+
+define void @"japi1_align!_9477"(%jl_value_t addrspace(10)**) #0 {
+; LV-LAVEL: L26.lver.check
+; LV: [[OFMul:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[Step:%[^ ]*]])
+; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 0
+; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 1
+; LV-NEXT: [[PosGEP:%[^ ]*]] = getelementptr i32, i32 addrspace(13)* [[Base:%[^ ]*]], i64 [[Step]]
+; LV-NEXT: [[NegGEP:%[^ ]*]] = getelementptr i32, i32 addrspace(13)* [[Base]], i64 [[NegStep:%[^ ]*]]
+; LV-NEXT: icmp ugt i32 addrspace(13)* [[NegGEP]], [[Base]]
+; LV-NEXT: icmp ult i32 addrspace(13)* [[PosGEP]], [[Base]]
+; LV-NOT: inttoptr
+; LV-NOT: ptrtoint
+top:
+ %1 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, align 8, !nonnull !1, !dereferenceable !2, !align !3
+ %2 = load i32, i32* inttoptr (i64 12 to i32*), align 4, !tbaa !4
+ %3 = sub i32 0, %2
+ %4 = call i64 @julia_steprange_last_4949()
+ %5 = addrspacecast %jl_value_t addrspace(10)* %1 to %jl_value_t addrspace(11)*
+ %6 = bitcast %jl_value_t addrspace(11)* %5 to %jl_value_t addrspace(10)* addrspace(11)*
+ %7 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %6, align 8, !tbaa !4, !nonnull !1, !dereferenceable !9, !align !2
+ %8 = addrspacecast %jl_value_t addrspace(10)* %7 to %jl_value_t addrspace(11)*
+ %9 = bitcast %jl_value_t addrspace(11)* %8 to i32 addrspace(13)* addrspace(11)*
+ %10 = load i32 addrspace(13)*, i32 addrspace(13)* addrspace(11)* %9, align 8, !tbaa !10, !nonnull !1
+ %11 = sext i32 %3 to i64
+ br label %L26
+
+L26: ; preds = %L26, %top
+ %value_phi3 = phi i64 [ 0, %top ], [ %12, %L26 ]
+ %12 = add i64 %value_phi3, -1
+ %13 = getelementptr inbounds i32, i32 addrspace(13)* %10, i64 %12
+ %14 = load i32, i32 addrspace(13)* %13, align 4, !tbaa !13
+ %15 = add i64 %12, %11
+ %16 = getelementptr inbounds i32, i32 addrspace(13)* %10, i64 %15
+ store i32 %14, i32 addrspace(13)* %16, align 4, !tbaa !13
+ %17 = icmp eq i64 %value_phi3, %4
+ br i1 %17, label %L45, label %L26
+
+L45: ; preds = %L26
+ ret void
+}
+
+attributes #0 = { "thunk" }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"Debug Info Version", i32 3}
+!1 = !{}
+!2 = !{i64 16}
+!3 = !{i64 8}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"jtbaa_mutab", !6, i64 0}
+!6 = !{!"jtbaa_value", !7, i64 0}
+!7 = !{!"jtbaa_data", !8, i64 0}
+!8 = !{!"jtbaa"}
+!9 = !{i64 40}
+!10 = !{!11, !11, i64 0}
+!11 = !{!"jtbaa_arrayptr", !12, i64 0}
+!12 = !{!"jtbaa_array", !8, i64 0}
+!13 = !{!14, !14, i64 0}
+!14 = !{!"jtbaa_arraybuf", !7, i64 0}
diff --git a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
index a7e5bce7445..fa6fccecbf1 100644
--- a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
+++ b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
@@ -58,10 +58,10 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; LV-NEXT: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE]])
; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
-; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]]
-; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]]
-; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2
-; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2
+; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[A0:%[^ ]*]], [[OFMulResult1]]
+; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[A0]], [[OFMulResult1]]
+; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[A0]]
+; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[A0]]
; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]

@@ -233,10 +233,10 @@ for.end: ; preds = %for.body
; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]])
; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
-; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]]
-; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]]
-; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2
-; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2
+; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[A0:%[^ ]*]], [[OFMulResult1]]
+; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[A0]], [[OFMulResult1]]
+; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[A0]]
+; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[A0]]
; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]

45 changes: 45 additions & 0 deletions deps/patches/llvm-rL323946-LSRTy.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
commit ab60b05a472e8651cbe53c19513b7e62b9ff32df
Author: Mikael Holmen <[email protected]>
Date: Thu Feb 1 06:38:34 2018 +0000

[LSR] Don't force bases of foldable formulae to the final type.

Summary:
Before emitting code for scaled registers, we prevent
SCEVExpander from hoisting any scaled addressing mode
by emitting all the bases first. However, these bases
are being forced to the final type, resulting in some
odd code.

For example, if the type of the base is an integer and
the final type is a pointer, we will emit an inttoptr
for the base, a ptrtoint for the scale, and then a
'reverse' GEP where the GEP pointer is actually the base
integer and the index is the pointer. It's more intuitive
to use the pointer as a pointer and the integer as index.

Patch by: Bevin Hansson

Reviewers: atrick, qcolombet, sanjoy

Reviewed By: qcolombet

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D42103

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323946 91177308-0d34-0410-b5e6-96231b3b80d8

diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 332c074a1df..4b8e2286ed9 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -4993,7 +4993,7 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
// Unless the addressing mode will not be folded.
if (!Ops.empty() && LU.Kind == LSRUse::Address &&
isAMCompletelyFolded(TTI, LU, F)) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), nullptr);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}

0 comments on commit df45146

Please sign in to comment.