diff --git a/base/compiler/validation.jl b/base/compiler/validation.jl index 1d12dc8e3d333..1b5ba3abf1ccf 100644 --- a/base/compiler/validation.jl +++ b/base/compiler/validation.jl @@ -24,7 +24,7 @@ const VALID_EXPR_HEADS = IdDict{Any,Any}( :foreigncall => 3:typemax(Int), :cfunction => 5:5, :isdefined => 1:1, - :simdloop => 0:0, + :simdloop => 1:1, :gc_preserve_begin => 0:typemax(Int), :gc_preserve_end => 0:typemax(Int), :thunk => 1:1, diff --git a/base/simdloop.jl b/base/simdloop.jl index 7493fbbd585a5..c5cccfddcbc7c 100644 --- a/base/simdloop.jl +++ b/base/simdloop.jl @@ -50,7 +50,7 @@ simd_outer_range(r) = 0:0 @inline simd_index(r,j::Int,i) = (@inbounds ret = r[i+firstindex(r)]; ret) # Compile Expr x in context of @simd. -function compile(x) +function compile(x, ivdep) (isa(x, Expr) && x.head == :for) || throw(SimdError("for loop expected")) length(x.args) == 2 || throw(SimdError("1D for loop expected")) check_body!(x) @@ -72,7 +72,7 @@ function compile(x) local $var = Base.simd_index($r,$j,$i) $(x.args[2]) # Body of loop $i += 1 - $(Expr(:simdloop)) # Mark loop as SIMD loop + $(Expr(:simdloop, ivdep)) # Mark loop as SIMD loop end end # Set index to last value just like a regular for loop would @@ -86,7 +86,15 @@ function compile(x) end macro simd(forloop) - esc(compile(forloop)) + esc(compile(forloop, false)) +end + +macro simd(ivdep, forloop) + if ivdep == :ivdep + esc(compile(forloop, true)) + else + throw(SimdError("Only ivdep is valid as the first argument to @simd")) + end end end # module SimdLoop diff --git a/src/codegen.cpp b/src/codegen.cpp index 05978c785ea98..d15ef31fe0326 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -302,6 +302,7 @@ static Function *jl_alloc_obj_func; static Function *jl_newbits_func; static Function *jl_typeof_func; static Function *jl_simdloop_marker_func; +static Function *jl_simdivdep_marker_func; static Function *jl_write_barrier_func; static Function *jlisa_func; static Function *jlsubtype_func; @@ -4067,7 +4068,13 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval) maybe_decay_untracked(boxed(ctx, ast))), true, jl_expr_type); } else if (head == simdloop_sym) { - ctx.builder.CreateCall(prepare_call(jl_simdloop_marker_func)); + jl_value_t *ivdep = args[0]; + assert(jl_expr_nargs(ex) == 1 && jl_is_bool(ivdep)); + if (ivdep == jl_false) { + ctx.builder.CreateCall(prepare_call(jl_simdloop_marker_func)); + } else { + ctx.builder.CreateCall(prepare_call(jl_simdivdep_marker_func)); + } return jl_cgval_t(); } else if (head == goto_ifnot_sym) { @@ -7276,6 +7283,13 @@ static void init_julia_llvm_env(Module *m) jl_simdloop_marker_func->addFnAttr(Attribute::NoRecurse); jl_simdloop_marker_func->addFnAttr(Attribute::InaccessibleMemOnly); + jl_simdivdep_marker_func = Function::Create(FunctionType::get(T_void, {}, false), + Function::ExternalLinkage, + "julia.simdivdep_marker"); + jl_simdivdep_marker_func->addFnAttr(Attribute::NoUnwind); + jl_simdivdep_marker_func->addFnAttr(Attribute::NoRecurse); + jl_simdivdep_marker_func->addFnAttr(Attribute::InaccessibleMemOnly); + jl_typeof_func = Function::Create(FunctionType::get(T_prjlvalue, {T_prjlvalue}, false), Function::ExternalLinkage, "julia.typeof"); diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp index 8061495bbadb7..de40b0c9901ff 100644 --- a/src/llvm-simdloop.cpp +++ b/src/llvm-simdloop.cpp @@ -76,6 +76,8 @@ struct LowerSIMDLoop : public ModulePass { private: bool runOnModule(Module &M) override; + bool markSIMDLoop(Module &M, Function *marker, bool ivdep); + /// Check if loop has "simd_loop" annotation. /// If present, the annotation is an MDNode attached to an instruction in the loop's latch. bool hasSIMDLoopMetadata( Loop *L) const; @@ -173,13 +175,23 @@ void LowerSIMDLoop::enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L) const bool LowerSIMDLoop::runOnModule(Module &M) { Function *simdloop_marker = M.getFunction("julia.simdloop_marker"); + Function *simdivdep_marker = M.getFunction("julia.simdivdep_marker"); + + bool Changed = false; + if (simdloop_marker) + Changed |= markSIMDLoop(M, simdloop_marker, false); + + if (simdivdep_marker) + Changed |= markSIMDLoop(M, simdivdep_marker, true); - if (!simdloop_marker) - return false; + return Changed; +} +bool LowerSIMDLoop::markSIMDLoop(Module &M, Function *marker, bool ivdep) +{ bool Changed = false; std::vector ToDelete; - for (User *U : simdloop_marker->users()) { + for (User *U : marker->users()) { Instruction *I = cast(U); ToDelete.push_back(I); LoopInfo &LI = getAnalysis(*I->getParent()->getParent()).getLoopInfo(); @@ -189,6 +201,7 @@ bool LowerSIMDLoop::runOnModule(Module &M) continue; DEBUG(dbgs() << "LSL: simd_loop found\n"); + DEBUG(dbgs() << "LSL: ivdep is: " << ivdep << "\n"); BasicBlock *Lh = L->getHeader(); DEBUG(dbgs() << "LSL: loop header: " << *Lh << "\n"); MDNode *n = L->getLoopID(); @@ -203,15 +216,19 @@ bool LowerSIMDLoop::runOnModule(Module &M) MDNode *m = MDNode::get(Lh->getContext(), ArrayRef(n)); - // Mark memory references so that Loop::isAnnotatedParallel will return true for this loop. - for (BasicBlock *BB : L->blocks()) { - for (Instruction &I : *BB) { - if (I.mayReadOrWriteMemory()) { - I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, m); - } + // If ivdep is true we assume that there is no memory dependecy between loop iterations + // This is a fairly strong assumption and does often not hold true for generic code. + if (ivdep) { + // Mark memory references so that Loop::isAnnotatedParallel will return true for this loop. + for (BasicBlock *BB : L->blocks()) { + for (Instruction &I : *BB) { + if (I.mayReadOrWriteMemory()) { + I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, m); + } + } } + assert(L->isAnnotatedParallel()); } - assert(L->isAnnotatedParallel()); // Mark floating-point reductions as okay to reassociate/commute. for (BasicBlock::iterator I = Lh->begin(), E = Lh->end(); I != E; ++I) { @@ -226,7 +243,7 @@ bool LowerSIMDLoop::runOnModule(Module &M) for (Instruction *I : ToDelete) I->deleteValue(); - simdloop_marker->eraseFromParent(); + marker->eraseFromParent(); return Changed; } diff --git a/test/bitarray.jl b/test/bitarray.jl index 997ae0b1edc6a..ef5e1910f64aa 100644 --- a/test/bitarray.jl +++ b/test/bitarray.jl @@ -1575,3 +1575,13 @@ end end end end + +@testset "SIMD violations (issue #27482)" begin + @test all(any!(falses(10), trues(10, 10))) + @check_bit_operation any!(falses(10), trues(10, 10)) + @check_bit_operation any!(falses(100), trues(100, 100)) + @check_bit_operation any!(falses(1000), trues(1000, 100)) + @check_bit_operation all!(falses(10), trues(10, 10)) + @check_bit_operation all!(falses(100), trues(100, 100)) + @check_bit_operation all!(falses(1000), trues(1000, 100)) +end diff --git a/test/llvmpasses/simdloop.ll b/test/llvmpasses/simdloop.ll index 9c44570ff0425..c427f318fe99a 100644 --- a/test/llvmpasses/simdloop.ll +++ b/test/llvmpasses/simdloop.ll @@ -1,5 +1,6 @@ ; RUN: opt -load libjulia%shlibext -LowerSIMDLoop -S %s | FileCheck %s +declare void @julia.simdivdep_marker() declare void @julia.simdloop_marker() define void @simd_test(double *%a, double *%b) { @@ -15,7 +16,7 @@ loop: %cval = fadd double %aval, %bval store double %cval, double *%bptr %nexti = add i64 %i, 1 - call void @julia.simdloop_marker() + call void @julia.simdivdep_marker() %done = icmp sgt i64 %nexti, 500 br i1 %done, label %loopdone, label %loop loopdone: @@ -32,6 +33,24 @@ loop: ; CHECK: llvm.mem.parallel_loop_access %aval = load double, double *%aptr %nextv = fsub double %v, %aval +; CHECK: fsub fast double %v, %aval + %nexti = add i64 %i, 1 + call void @julia.simdivdep_marker() + %done = icmp sgt i64 %nexti, 500 + br i1 %done, label %loopdone, label %loop +loopdone: + ret double %nextv +} + +define double @simd_test_sub2(double *%a) { +top: + br label %loop +loop: + %i = phi i64 [0, %top], [%nexti, %loop] + %v = phi double [0.000000e+00, %top], [%nextv, %loop] + %aptr = getelementptr double, double *%a, i64 %i + %aval = load double, double *%aptr + %nextv = fsub double %v, %aval ; CHECK: fsub fast double %v, %aval %nexti = add i64 %i, 1 call void @julia.simdloop_marker() diff --git a/test/simdloop.jl b/test/simdloop.jl index eae5e4e14f6af..97755be1c01a2 100644 --- a/test/simdloop.jl +++ b/test/simdloop.jl @@ -12,6 +12,13 @@ function simd_loop_example_from_manual(x, y, z) s end +function simd_loop_axpy!(a, X, Y) + @simd ivdep for i in eachindex(X) + @inbounds Y[i] += a*X[i] + end + return Y +end + function simd_loop_with_multiple_reductions(x, y, z) # Use non-zero initial value to make sure reduction values include it. (s,t) = (one(eltype(x)),one(eltype(y))) @@ -42,6 +49,11 @@ for T in [Int32,Int64,Float32,Float64] (s,t) = simd_loop_with_multiple_reductions(a,b,c) @test s==sum(a)+sum(c)+1 @test t==2*sum(b)+1 + + X = ones(T, n) + Y = zeros(T, n) + simd_loop_axpy!(T(2), X, Y) + @test all(y->y==T(2), Y) end end