Skip to content

Commit

Permalink
Don't assume parallel memory accesses in simd macro
Browse files Browse the repository at this point in the history
If your code can gurantee that parallel memory accesses are legal you can
use `@simd ivdep` to give the compiler extra leeway. For generic code this
is an unsafe assumption.

Co-authored-by: Matt Bauman <[email protected]>
  • Loading branch information
vchuravy and mbauman committed Jun 22, 2018
1 parent 665b1de commit ae76ee4
Show file tree
Hide file tree
Showing 7 changed files with 97 additions and 17 deletions.
2 changes: 1 addition & 1 deletion base/compiler/validation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ const VALID_EXPR_HEADS = IdDict{Any,Any}(
:foreigncall => 3:typemax(Int),
:cfunction => 5:5,
:isdefined => 1:1,
:simdloop => 0:0,
:simdloop => 1:1,
:gc_preserve_begin => 0:typemax(Int),
:gc_preserve_end => 0:typemax(Int),
:thunk => 1:1,
Expand Down
14 changes: 11 additions & 3 deletions base/simdloop.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ simd_outer_range(r) = 0:0
@inline simd_index(r,j::Int,i) = (@inbounds ret = r[i+firstindex(r)]; ret)

# Compile Expr x in context of @simd.
function compile(x)
function compile(x, ivdep)
(isa(x, Expr) && x.head == :for) || throw(SimdError("for loop expected"))
length(x.args) == 2 || throw(SimdError("1D for loop expected"))
check_body!(x)
Expand All @@ -72,7 +72,7 @@ function compile(x)
local $var = Base.simd_index($r,$j,$i)
$(x.args[2]) # Body of loop
$i += 1
$(Expr(:simdloop)) # Mark loop as SIMD loop
$(Expr(:simdloop, ivdep)) # Mark loop as SIMD loop
end
end
# Set index to last value just like a regular for loop would
Expand All @@ -86,7 +86,15 @@ function compile(x)
end

macro simd(forloop)
esc(compile(forloop))
esc(compile(forloop, false))
end

macro simd(ivdep, forloop)
if ivdep == :ivdep
esc(compile(forloop, true))
else
throw(SimdError("Only ivdep is valid as the first argument to @simd"))
end
end

end # module SimdLoop
16 changes: 15 additions & 1 deletion src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ static Function *jl_alloc_obj_func;
static Function *jl_newbits_func;
static Function *jl_typeof_func;
static Function *jl_simdloop_marker_func;
static Function *jl_simdivdep_marker_func;
static Function *jl_write_barrier_func;
static Function *jlisa_func;
static Function *jlsubtype_func;
Expand Down Expand Up @@ -4067,7 +4068,13 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
maybe_decay_untracked(boxed(ctx, ast))), true, jl_expr_type);
}
else if (head == simdloop_sym) {
ctx.builder.CreateCall(prepare_call(jl_simdloop_marker_func));
jl_value_t *ivdep = args[0];
assert(jl_expr_nargs(ex) == 1 && jl_is_bool(ivdep));
if (ivdep == jl_false) {
ctx.builder.CreateCall(prepare_call(jl_simdloop_marker_func));
} else {
ctx.builder.CreateCall(prepare_call(jl_simdivdep_marker_func));
}
return jl_cgval_t();
}
else if (head == goto_ifnot_sym) {
Expand Down Expand Up @@ -7276,6 +7283,13 @@ static void init_julia_llvm_env(Module *m)
jl_simdloop_marker_func->addFnAttr(Attribute::NoRecurse);
jl_simdloop_marker_func->addFnAttr(Attribute::InaccessibleMemOnly);

jl_simdivdep_marker_func = Function::Create(FunctionType::get(T_void, {}, false),
Function::ExternalLinkage,
"julia.simdivdep_marker");
jl_simdivdep_marker_func->addFnAttr(Attribute::NoUnwind);
jl_simdivdep_marker_func->addFnAttr(Attribute::NoRecurse);
jl_simdivdep_marker_func->addFnAttr(Attribute::InaccessibleMemOnly);

jl_typeof_func = Function::Create(FunctionType::get(T_prjlvalue, {T_prjlvalue}, false),
Function::ExternalLinkage,
"julia.typeof");
Expand Down
39 changes: 28 additions & 11 deletions src/llvm-simdloop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ struct LowerSIMDLoop : public ModulePass {
private:
bool runOnModule(Module &M) override;

bool markSIMDLoop(Module &M, Function *marker, bool ivdep);

/// Check if loop has "simd_loop" annotation.
/// If present, the annotation is an MDNode attached to an instruction in the loop's latch.
bool hasSIMDLoopMetadata( Loop *L) const;
Expand Down Expand Up @@ -173,13 +175,23 @@ void LowerSIMDLoop::enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L) const
bool LowerSIMDLoop::runOnModule(Module &M)
{
Function *simdloop_marker = M.getFunction("julia.simdloop_marker");
Function *simdivdep_marker = M.getFunction("julia.simdivdep_marker");

bool Changed = false;
if (simdloop_marker)
Changed |= markSIMDLoop(M, simdloop_marker, false);

if (simdivdep_marker)
Changed |= markSIMDLoop(M, simdivdep_marker, true);

if (!simdloop_marker)
return false;
return Changed;
}

bool LowerSIMDLoop::markSIMDLoop(Module &M, Function *marker, bool ivdep)
{
bool Changed = false;
std::vector<Instruction*> ToDelete;
for (User *U : simdloop_marker->users()) {
for (User *U : marker->users()) {
Instruction *I = cast<Instruction>(U);
ToDelete.push_back(I);
LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>(*I->getParent()->getParent()).getLoopInfo();
Expand All @@ -189,6 +201,7 @@ bool LowerSIMDLoop::runOnModule(Module &M)
continue;

DEBUG(dbgs() << "LSL: simd_loop found\n");
DEBUG(dbgs() << "LSL: ivdep is: " << ivdep << "\n");
BasicBlock *Lh = L->getHeader();
DEBUG(dbgs() << "LSL: loop header: " << *Lh << "\n");
MDNode *n = L->getLoopID();
Expand All @@ -203,15 +216,19 @@ bool LowerSIMDLoop::runOnModule(Module &M)

MDNode *m = MDNode::get(Lh->getContext(), ArrayRef<Metadata *>(n));

// Mark memory references so that Loop::isAnnotatedParallel will return true for this loop.
for (BasicBlock *BB : L->blocks()) {
for (Instruction &I : *BB) {
if (I.mayReadOrWriteMemory()) {
I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, m);
}
// If ivdep is true we assume that there is no memory dependecy between loop iterations
// This is a fairly strong assumption and does often not hold true for generic code.
if (ivdep) {
// Mark memory references so that Loop::isAnnotatedParallel will return true for this loop.
for (BasicBlock *BB : L->blocks()) {
for (Instruction &I : *BB) {
if (I.mayReadOrWriteMemory()) {
I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, m);
}
}
}
assert(L->isAnnotatedParallel());
}
assert(L->isAnnotatedParallel());

// Mark floating-point reductions as okay to reassociate/commute.
for (BasicBlock::iterator I = Lh->begin(), E = Lh->end(); I != E; ++I) {
Expand All @@ -226,7 +243,7 @@ bool LowerSIMDLoop::runOnModule(Module &M)

for (Instruction *I : ToDelete)
I->deleteValue();
simdloop_marker->eraseFromParent();
marker->eraseFromParent();

return Changed;
}
Expand Down
10 changes: 10 additions & 0 deletions test/bitarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1575,3 +1575,13 @@ end
end
end
end

@testset "SIMD violations (issue #27482)" begin
@test all(any!(falses(10), trues(10, 10)))
@check_bit_operation any!(falses(10), trues(10, 10))
@check_bit_operation any!(falses(100), trues(100, 100))
@check_bit_operation any!(falses(1000), trues(1000, 100))
@check_bit_operation all!(falses(10), trues(10, 10))
@check_bit_operation all!(falses(100), trues(100, 100))
@check_bit_operation all!(falses(1000), trues(1000, 100))
end
21 changes: 20 additions & 1 deletion test/llvmpasses/simdloop.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; RUN: opt -load libjulia%shlibext -LowerSIMDLoop -S %s | FileCheck %s

declare void @julia.simdivdep_marker()
declare void @julia.simdloop_marker()

define void @simd_test(double *%a, double *%b) {
Expand All @@ -15,7 +16,7 @@ loop:
%cval = fadd double %aval, %bval
store double %cval, double *%bptr
%nexti = add i64 %i, 1
call void @julia.simdloop_marker()
call void @julia.simdivdep_marker()
%done = icmp sgt i64 %nexti, 500
br i1 %done, label %loopdone, label %loop
loopdone:
Expand All @@ -32,6 +33,24 @@ loop:
; CHECK: llvm.mem.parallel_loop_access
%aval = load double, double *%aptr
%nextv = fsub double %v, %aval
; CHECK: fsub fast double %v, %aval
%nexti = add i64 %i, 1
call void @julia.simdivdep_marker()
%done = icmp sgt i64 %nexti, 500
br i1 %done, label %loopdone, label %loop
loopdone:
ret double %nextv
}

define double @simd_test_sub2(double *%a) {
top:
br label %loop
loop:
%i = phi i64 [0, %top], [%nexti, %loop]
%v = phi double [0.000000e+00, %top], [%nextv, %loop]
%aptr = getelementptr double, double *%a, i64 %i
%aval = load double, double *%aptr
%nextv = fsub double %v, %aval
; CHECK: fsub fast double %v, %aval
%nexti = add i64 %i, 1
call void @julia.simdloop_marker()
Expand Down
12 changes: 12 additions & 0 deletions test/simdloop.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ function simd_loop_example_from_manual(x, y, z)
s
end

function simd_loop_axpy!(a, X, Y)
@simd ivdep for i in eachindex(X)
@inbounds Y[i] += a*X[i]
end
return Y
end

function simd_loop_with_multiple_reductions(x, y, z)
# Use non-zero initial value to make sure reduction values include it.
(s,t) = (one(eltype(x)),one(eltype(y)))
Expand Down Expand Up @@ -42,6 +49,11 @@ for T in [Int32,Int64,Float32,Float64]
(s,t) = simd_loop_with_multiple_reductions(a,b,c)
@test s==sum(a)+sum(c)+1
@test t==2*sum(b)+1

X = ones(T, n)
Y = zeros(T, n)
simd_loop_axpy!(T(2), X, Y)
@test all(y->y==T(2), Y)
end
end

Expand Down

0 comments on commit ae76ee4

Please sign in to comment.