Skip to content

Commit

Permalink
Remove a few more OS and CPPU ifdefs
Browse files Browse the repository at this point in the history
  • Loading branch information
pchintalapudi committed Mar 6, 2023
1 parent 6d5775c commit 7bc670f
Show file tree
Hide file tree
Showing 7 changed files with 128 additions and 132 deletions.
64 changes: 32 additions & 32 deletions src/aotcompile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,15 +436,16 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
CreateNativeGlobals += gvars.size();

//Safe b/c context is locked by params
#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
// setting the function personality enables stack unwinding and catching exceptions
// so make sure everything has something set
Type *T_int32 = Type::getInt32Ty(clone.getModuleUnlocked()->getContext());
Function *juliapersonality_func =
Function::Create(FunctionType::get(T_int32, true),
Function::ExternalLinkage, "__julia_personality", clone.getModuleUnlocked());
juliapersonality_func->setDLLStorageClass(GlobalValue::DLLImportStorageClass);
#endif
auto TT = Triple(clone.getModuleUnlocked()->getTargetTriple());
Function *juliapersonality_func = nullptr;
if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
// setting the function personality enables stack unwinding and catching exceptions
// so make sure everything has something set
Type *T_int32 = Type::getInt32Ty(clone.getModuleUnlocked()->getContext());
juliapersonality_func = Function::Create(FunctionType::get(T_int32, true),
Function::ExternalLinkage, "__julia_personality", clone.getModuleUnlocked());
juliapersonality_func->setDLLStorageClass(GlobalValue::DLLImportStorageClass);
}

// move everything inside, now that we've merged everything
// (before adding the exported headers)
Expand All @@ -455,11 +456,11 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
G.setLinkage(GlobalValue::ExternalLinkage);
G.setVisibility(GlobalValue::HiddenVisibility);
makeSafeName(G);
#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
// Add unwind exception personalities to functions to handle async exceptions
if (Function *F = dyn_cast<Function>(&G))
F->setPersonalityFn(juliapersonality_func);
#endif
if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
// Add unwind exception personalities to functions to handle async exceptions
if (Function *F = dyn_cast<Function>(&G))
F->setPersonalityFn(juliapersonality_func);
}
}
}
}
Expand Down Expand Up @@ -1446,30 +1447,29 @@ void jl_dump_native_impl(void *native_code,
// want less optimizations there.
Triple TheTriple = Triple(jl_ExecutionEngine->getTargetTriple());
// make sure to emit the native object format, even if FORCE_ELF was set in codegen
#if defined(_OS_WINDOWS_)
TheTriple.setObjectFormat(Triple::COFF);
#elif defined(_OS_DARWIN_)
TheTriple.setObjectFormat(Triple::MachO);
TheTriple.setOS(llvm::Triple::MacOSX);
#endif
if (TheTriple.isOSWindows()) {
TheTriple.setObjectFormat(Triple::COFF);
} else if (TheTriple.isOSDarwin()) {
TheTriple.setObjectFormat(Triple::MachO);
TheTriple.setOS(llvm::Triple::MacOSX);
}
Optional<Reloc::Model> RelocModel;
if (TheTriple.isOSLinux() || TheTriple.isOSFreeBSD()) {
RelocModel = Reloc::PIC_;
}
CodeModel::Model CMModel = CodeModel::Small;
if (TheTriple.isPPC()) {
// On PPC the small model is limited to 16bit offsets
CMModel = CodeModel::Medium;
}
std::unique_ptr<TargetMachine> SourceTM(
jl_ExecutionEngine->getTarget().createTargetMachine(
TheTriple.getTriple(),
jl_ExecutionEngine->getTargetCPU(),
jl_ExecutionEngine->getTargetFeatureString(),
jl_ExecutionEngine->getTargetOptions(),
#if defined(_OS_LINUX_) || defined(_OS_FREEBSD_)
Reloc::PIC_,
#else
Optional<Reloc::Model>(),
#endif
#if defined(_CPU_PPC_) || defined(_CPU_PPC64_)
// On PPC the small model is limited to 16bit offsets
CodeModel::Medium,
#else
// Use small model so that we can use signed 32bits offset in the function and GV tables
CodeModel::Small,
#endif
RelocModel,
CMModel,
CodeGenOpt::Aggressive // -O3 TODO: respect command -O0 flag?
));

Expand Down
15 changes: 11 additions & 4 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
#undef DEBUG
#include "llvm-version.h"
#include "platform.h"
#if defined(_CPU_X86_)
#define JL_NEED_FLOATTEMP_VAR 1
#endif

#ifndef __STDC_LIMIT_MACROS
#define __STDC_LIMIT_MACROS
Expand Down Expand Up @@ -86,6 +83,9 @@
using namespace llvm;

static bool jl_fpo_disabled(const Triple &TT) {
#ifdef JL_DISABLE_FPO
return true;
#endif
#ifdef _COMPILER_MSAN_ENABLED_
// MSAN doesn't support FPO
return true;
Expand All @@ -96,6 +96,13 @@ static bool jl_fpo_disabled(const Triple &TT) {
return false;
}

static bool jl_floattemp_var_needed(const Triple &TT) {
#ifdef JL_NEED_FLOATTEMP_VAR
return true;
#endif
return TT.getArch() == Triple::x86;
}

//Drag some useful type functions into our namespace
//to reduce verbosity of our code
auto getInt1Ty(LLVMContext &ctxt) {
Expand Down Expand Up @@ -2920,7 +2927,7 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);

if (at->isIntegerTy() || at->isPointerTy() || at->isFloatingPointTy()) {
Type *at_int = INTT(at);
Type *at_int = INTT(at, ctx.emission_context.DL);
Value *varg1 = emit_unbox(ctx, at_int, arg1, arg1.typ);
Value *varg2 = emit_unbox(ctx, at_int, arg2, arg2.typ);
return ctx.builder.CreateICmpEQ(varg1, varg2);
Expand Down
48 changes: 24 additions & 24 deletions src/intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,13 @@ static Type *FLOATT(Type *t)
}

// convert an llvm type to same-size int type
static Type *INTT(Type *t)
static Type *INTT(Type *t, const DataLayout &DL)
{
auto &ctxt = t->getContext();
if (t->isIntegerTy())
return t;
if (t->isPointerTy())
return getSizeTy(ctxt);
return DL.getIntPtrType(t);
if (t == getDoubleTy(ctxt))
return getInt64Ty(ctxt);
if (t == getFloatTy(ctxt))
Expand Down Expand Up @@ -343,22 +343,19 @@ static Value *emit_unboxed_coercion(jl_codectx_t &ctx, Type *to, Value *unboxed)
unboxed = emit_bitcast(ctx, unboxed, to);
}
else if (!ty->isIntOrPtrTy() && !ty->isFloatingPointTy()) {
#ifndef JL_NDEBUG
const DataLayout &DL = jl_Module->getDataLayout();
#endif
assert(DL.getTypeSizeInBits(ty) == DL.getTypeSizeInBits(to));
AllocaInst *cast = ctx.builder.CreateAlloca(ty);
ctx.builder.CreateStore(unboxed, cast);
unboxed = ctx.builder.CreateLoad(to, ctx.builder.CreateBitCast(cast, to->getPointerTo()));
}
else if (frompointer) {
Type *INTT_to = INTT(to);
Type *INTT_to = INTT(to, DL);
unboxed = ctx.builder.CreatePtrToInt(unboxed, INTT_to);
if (INTT_to != to)
unboxed = ctx.builder.CreateBitCast(unboxed, to);
}
else if (topointer) {
Type *INTT_to = INTT(to);
Type *INTT_to = INTT(to, DL);
if (to != INTT_to)
unboxed = ctx.builder.CreateBitCast(unboxed, INTT_to);
unboxed = emit_inttoptr(ctx, unboxed, to);
Expand Down Expand Up @@ -584,6 +581,8 @@ static jl_cgval_t generic_cast(
intrinsic f, Instruction::CastOps Op,
const jl_cgval_t *argv, bool toint, bool fromint)
{
auto &TT = ctx.emission_context.TargetTriple;
auto &DL = ctx.emission_context.DL;
const jl_cgval_t &targ = argv[0];
const jl_cgval_t &v = argv[1];
jl_datatype_t *jlto = staticeval_bitstype(targ);
Expand All @@ -593,11 +592,11 @@ static jl_cgval_t generic_cast(
Type *to = bitstype_to_llvm((jl_value_t*)jlto, ctx.builder.getContext(), true);
Type *vt = bitstype_to_llvm(v.typ, ctx.builder.getContext(), true);
if (toint)
to = INTT(to);
to = INTT(to, DL);
else
to = FLOATT(to);
if (fromint)
vt = INTT(vt);
vt = INTT(vt, DL);
else
vt = FLOATT(vt);
if (!to || !vt)
Expand All @@ -606,17 +605,17 @@ static jl_cgval_t generic_cast(
if (!CastInst::castIsValid(Op, from, to))
return emit_runtime_call(ctx, f, argv, 2);
if (Op == Instruction::FPExt) {
#ifdef JL_NEED_FLOATTEMP_VAR
// Target platform might carry extra precision.
// Force rounding to single precision first. The reason is that it's
// fine to keep working in extended precision as long as it's
// understood that everything is implicitly rounded to 23 bits,
// but if we start looking at more bits we need to actually do the
// rounding first instead of carrying around incorrect low bits.
Value *jlfloattemp_var = emit_static_alloca(ctx, from->getType());
ctx.builder.CreateStore(from, jlfloattemp_var);
from = ctx.builder.CreateLoad(from->getType(), jlfloattemp_var, /*force this to load from the stack*/true);
#endif
if (jl_floattemp_var_needed(TT)) {
// Target platform might carry extra precision.
// Force rounding to single precision first. The reason is that it's
// fine to keep working in extended precision as long as it's
// understood that everything is implicitly rounded to 23 bits,
// but if we start looking at more bits we need to actually do the
// rounding first instead of carrying around incorrect low bits.
Value *jlfloattemp_var = emit_static_alloca(ctx, from->getType());
ctx.builder.CreateStore(from, jlfloattemp_var);
from = ctx.builder.CreateLoad(from->getType(), jlfloattemp_var, /*force this to load from the stack*/true);
}
}
Value *ans = ctx.builder.CreateCast(Op, from, to);
if (f == fptosi || f == fptoui)
Expand Down Expand Up @@ -1126,6 +1125,7 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_

static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **args, size_t nargs)
{
auto &DL = ctx.emission_context.DL;
assert(f < num_intrinsics);
if (f == cglobal && nargs == 1)
f = cglobal_auto;
Expand Down Expand Up @@ -1231,7 +1231,7 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
const jl_cgval_t &x = argv[0];
if (!jl_is_primitivetype(x.typ))
return emit_runtime_call(ctx, f, argv, nargs);
Type *xt = INTT(bitstype_to_llvm(x.typ, ctx.builder.getContext(), true));
Type *xt = INTT(bitstype_to_llvm(x.typ, ctx.builder.getContext(), true), DL);
Value *from = emit_unbox(ctx, xt, x, x.typ);
Value *ans = ctx.builder.CreateNot(from);
return mark_julia_type(ctx, ans, false, x.typ);
Expand Down Expand Up @@ -1270,7 +1270,7 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
if (float_func()[f])
xtyp = FLOATT(xtyp);
else
xtyp = INTT(xtyp);
xtyp = INTT(xtyp, DL);
if (!xtyp)
return emit_runtime_call(ctx, f, argv, nargs);
////Bool are required to be in the range [0,1]
Expand All @@ -1289,7 +1289,7 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
if (f == shl_int || f == lshr_int || f == ashr_int) {
if (!jl_is_primitivetype(argv[1].typ))
return emit_runtime_call(ctx, f, argv, nargs);
argt[1] = INTT(bitstype_to_llvm(argv[1].typ, ctx.builder.getContext(), true));
argt[1] = INTT(bitstype_to_llvm(argv[1].typ, ctx.builder.getContext(), true), DL);
}
else {
for (size_t i = 1; i < nargs; ++i) {
Expand Down Expand Up @@ -1465,7 +1465,7 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg

case fpiseq: {
*newtyp = jl_bool_type;
Type *it = INTT(t);
Type *it = INTT(t, ctx.emission_context.DL);
Value *xi = ctx.builder.CreateBitCast(x, it);
Value *yi = ctx.builder.CreateBitCast(y, it);
return ctx.builder.CreateOr(ctx.builder.CreateAnd(ctx.builder.CreateFCmpUNO(x, x),
Expand Down
37 changes: 18 additions & 19 deletions src/llvm-cpufeatures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,18 @@ STATISTIC(LoweredWithoutFMA, "Number of have_fma's that were lowered to false");
extern JuliaOJIT *jl_ExecutionEngine;

// whether this platform unconditionally (i.e. without needing multiversioning) supports FMA
Optional<bool> always_have_fma(Function &intr) JL_NOTSAFEPOINT {
auto intr_name = intr.getName();
auto typ = intr_name.substr(strlen("julia.cpu.have_fma."));

#if defined(_CPU_AARCH64_)
return typ == "f32" || typ == "f64";
#else
(void)typ;
return {};
#endif
Optional<bool> always_have_fma(Function &intr, const Triple &TT) JL_NOTSAFEPOINT {
if (TT.isAArch64()) {
auto intr_name = intr.getName();
auto typ = intr_name.substr(strlen("julia.cpu.have_fma."));
return typ == "f32" || typ == "f64";
} else {
return {};
}
}

bool have_fma(Function &intr, Function &caller) JL_NOTSAFEPOINT {
auto unconditional = always_have_fma(intr);
static bool have_fma(Function &intr, Function &caller, const Triple &TT) JL_NOTSAFEPOINT {
auto unconditional = always_have_fma(intr, TT);
if (unconditional.hasValue())
return unconditional.getValue();

Expand All @@ -65,21 +63,21 @@ bool have_fma(Function &intr, Function &caller) JL_NOTSAFEPOINT {
SmallVector<StringRef, 6> Features;
FS.split(Features, ',');
for (StringRef Feature : Features)
#if defined _CPU_ARM_
if (TT.isARM()) {
if (Feature == "+vfp4")
return typ == "f32" || typ == "f64";lowerCPUFeatures
return typ == "f32" || typ == "f64";
else if (Feature == "+vfp4sp")
return typ == "f32";
#else
} else {
if (Feature == "+fma" || Feature == "+fma4")
return typ == "f32" || typ == "f64";
#endif
}

return false;
}

void lowerHaveFMA(Function &intr, Function &caller, CallInst *I) JL_NOTSAFEPOINT {
if (have_fma(intr, caller)) {
void lowerHaveFMA(Function &intr, Function &caller, const Triple &TT, CallInst *I) JL_NOTSAFEPOINT {
if (have_fma(intr, caller, TT)) {
++LoweredWithFMA;
I->replaceAllUsesWith(ConstantInt::get(I->getType(), 1));
} else {
Expand All @@ -91,6 +89,7 @@ void lowerHaveFMA(Function &intr, Function &caller, CallInst *I) JL_NOTSAFEPOINT

bool lowerCPUFeatures(Module &M) JL_NOTSAFEPOINT
{
auto TT = Triple(M.getTargetTriple());
SmallVector<Instruction*,6> Materialized;

for (auto &F: M.functions()) {
Expand All @@ -100,7 +99,7 @@ bool lowerCPUFeatures(Module &M) JL_NOTSAFEPOINT
for (Use &U: F.uses()) {
User *RU = U.getUser();
CallInst *I = cast<CallInst>(RU);
lowerHaveFMA(F, *I->getParent()->getParent(), I);
lowerHaveFMA(F, *I->getParent()->getParent(), TT, I);
Materialized.push_back(I);
}
}
Expand Down
22 changes: 11 additions & 11 deletions src/llvm-demote-float16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,26 +49,26 @@ extern JuliaOJIT *jl_ExecutionEngine;

namespace {

bool have_fp16(Function &caller) {
static bool have_fp16(Function &caller, const Triple &TT) {
Attribute FSAttr = caller.getFnAttribute("target-features");
StringRef FS =
FSAttr.isValid() ? FSAttr.getValueAsString() : jl_ExecutionEngine->getTargetFeatureString();
#if defined(_CPU_AARCH64_)
if (FS.find("+fp16fml") != llvm::StringRef::npos || FS.find("+fullfp16") != llvm::StringRef::npos){
return true;
}
#elif defined(_CPU_X86_64_)
if (FS.find("+avx512fp16") != llvm::StringRef::npos){
return true;
if (TT.isAArch64()) {
if (FS.find("+fp16fml") != llvm::StringRef::npos || FS.find("+fullfp16") != llvm::StringRef::npos){
return true;
}
} else if (TT.getArch() == Triple::x86_64) {
if (FS.find("+avx512fp16") != llvm::StringRef::npos){
return true;
}
}
#endif
(void)FS;
return false;
}

static bool demoteFloat16(Function &F)
{
if (have_fp16(F))
auto TT = Triple(F.getParent()->getTargetTriple());
if (have_fp16(F, TT))
return false;

auto &ctx = F.getContext();
Expand Down
Loading

0 comments on commit 7bc670f

Please sign in to comment.