Remove a few more OS and CPPU ifdefs

JuliaLang · Mar 6, 2023 · 7bc670f · 7bc670f
1 parent 6d5775c
commit 7bc670f
Show file tree

Hide file tree

Showing 7 changed files with 128 additions and 132 deletions.
diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp
@@ -436,15 +436,16 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
  CreateNativeGlobals += gvars.size();
 
  //Safe b/c context is locked by params
-#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
- // setting the function personality enables stack unwinding and catching exceptions
- // so make sure everything has something set
- Type *T_int32 = Type::getInt32Ty(clone.getModuleUnlocked()->getContext());
- Function *juliapersonality_func =
- Function::Create(FunctionType::get(T_int32, true),
- Function::ExternalLinkage, "__julia_personality", clone.getModuleUnlocked());
- juliapersonality_func->setDLLStorageClass(GlobalValue::DLLImportStorageClass);
-#endif
+ auto TT = Triple(clone.getModuleUnlocked()->getTargetTriple());
+ Function *juliapersonality_func = nullptr;
+ if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
+ // setting the function personality enables stack unwinding and catching exceptions
+ // so make sure everything has something set
+ Type *T_int32 = Type::getInt32Ty(clone.getModuleUnlocked()->getContext());
+ juliapersonality_func = Function::Create(FunctionType::get(T_int32, true),
+ Function::ExternalLinkage, "__julia_personality", clone.getModuleUnlocked());
+ juliapersonality_func->setDLLStorageClass(GlobalValue::DLLImportStorageClass);
+ }
 
  // move everything inside, now that we've merged everything
  // (before adding the exported headers)
@@ -455,11 +456,11 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
  G.setLinkage(GlobalValue::ExternalLinkage);
  G.setVisibility(GlobalValue::HiddenVisibility);
  makeSafeName(G);
-#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
- // Add unwind exception personalities to functions to handle async exceptions
- if (Function *F = dyn_cast<Function>(&G))
- F->setPersonalityFn(juliapersonality_func);
-#endif
+ if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
+  // Add unwind exception personalities to functions to handle async exceptions
+  if (Function *F = dyn_cast<Function>(&G))
+  F->setPersonalityFn(juliapersonality_func);
+ }
  }
  }
  }
@@ -1446,30 +1447,29 @@ void jl_dump_native_impl(void *native_code,
  // want less optimizations there.
  Triple TheTriple = Triple(jl_ExecutionEngine->getTargetTriple());
  // make sure to emit the native object format, even if FORCE_ELF was set in codegen
-#if defined(_OS_WINDOWS_)
- TheTriple.setObjectFormat(Triple::COFF);
-#elif defined(_OS_DARWIN_)
- TheTriple.setObjectFormat(Triple::MachO);
- TheTriple.setOS(llvm::Triple::MacOSX);
-#endif
+ if (TheTriple.isOSWindows()) {
+ TheTriple.setObjectFormat(Triple::COFF);
+ } else if (TheTriple.isOSDarwin()) {
+ TheTriple.setObjectFormat(Triple::MachO);
+ TheTriple.setOS(llvm::Triple::MacOSX);
+ }
+ Optional<Reloc::Model> RelocModel;
+ if (TheTriple.isOSLinux() || TheTriple.isOSFreeBSD()) {
+ RelocModel = Reloc::PIC_;
+ }
+ CodeModel::Model CMModel = CodeModel::Small;
+ if (TheTriple.isPPC()) {
+ // On PPC the small model is limited to 16bit offsets
+ CMModel = CodeModel::Medium;
+ }
  std::unique_ptr<TargetMachine> SourceTM(
  jl_ExecutionEngine->getTarget().createTargetMachine(
  TheTriple.getTriple(),
  jl_ExecutionEngine->getTargetCPU(),
  jl_ExecutionEngine->getTargetFeatureString(),
  jl_ExecutionEngine->getTargetOptions(),
-#if defined(_OS_LINUX_) || defined(_OS_FREEBSD_)
- Reloc::PIC_,
-#else
- Optional<Reloc::Model>(),
-#endif
-#if defined(_CPU_PPC_) || defined(_CPU_PPC64_)
- // On PPC the small model is limited to 16bit offsets
- CodeModel::Medium,
-#else
- // Use small model so that we can use signed 32bits offset in the function and GV tables
- CodeModel::Small,
-#endif
+ RelocModel,
+ CMModel,
  CodeGenOpt::Aggressive // -O3 TODO: respect command -O0 flag?
  ));
 

diff --git a/src/codegen.cpp b/src/codegen.cpp
@@ -3,9 +3,6 @@
 #undef DEBUG
 #include "llvm-version.h"
 #include "platform.h"
-#if defined(_CPU_X86_)
-#define JL_NEED_FLOATTEMP_VAR 1
-#endif
 
 #ifndef __STDC_LIMIT_MACROS
 #define __STDC_LIMIT_MACROS
@@ -86,6 +83,9 @@
 using namespace llvm;
 
 static bool jl_fpo_disabled(const Triple &TT) {
+#ifdef JL_DISABLE_FPO
+ return true;
+#endif
 #ifdef _COMPILER_MSAN_ENABLED_
  // MSAN doesn't support FPO
  return true;
@@ -96,6 +96,13 @@ static bool jl_fpo_disabled(const Triple &TT) {
  return false;
 }
 
+static bool jl_floattemp_var_needed(const Triple &TT) {
+#ifdef JL_NEED_FLOATTEMP_VAR
+ return true;
+#endif
+ return TT.getArch() == Triple::x86;
+}
+
 //Drag some useful type functions into our namespace
 //to reduce verbosity of our code
 auto getInt1Ty(LLVMContext &ctxt) {
@@ -2920,7 +2927,7 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
  return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
 
  if (at->isIntegerTy() || at->isPointerTy() || at->isFloatingPointTy()) {
- Type *at_int = INTT(at);
+ Type *at_int = INTT(at, ctx.emission_context.DL);
  Value *varg1 = emit_unbox(ctx, at_int, arg1, arg1.typ);
  Value *varg2 = emit_unbox(ctx, at_int, arg2, arg2.typ);
  return ctx.builder.CreateICmpEQ(varg1, varg2);

diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
@@ -154,13 +154,13 @@ static Type *FLOATT(Type *t)
 }
 
 // convert an llvm type to same-size int type
-static Type *INTT(Type *t)
+static Type *INTT(Type *t, const DataLayout &DL)
 {
  auto &ctxt = t->getContext();
  if (t->isIntegerTy())
  return t;
  if (t->isPointerTy())
- return getSizeTy(ctxt);
+ return DL.getIntPtrType(t);
  if (t == getDoubleTy(ctxt))
  return getInt64Ty(ctxt);
  if (t == getFloatTy(ctxt))
@@ -343,22 +343,19 @@ static Value *emit_unboxed_coercion(jl_codectx_t &ctx, Type *to, Value *unboxed)
  unboxed = emit_bitcast(ctx, unboxed, to);
  }
  else if (!ty->isIntOrPtrTy() && !ty->isFloatingPointTy()) {
-#ifndef JL_NDEBUG
- const DataLayout &DL = jl_Module->getDataLayout();
-#endif
  assert(DL.getTypeSizeInBits(ty) == DL.getTypeSizeInBits(to));
  AllocaInst *cast = ctx.builder.CreateAlloca(ty);
  ctx.builder.CreateStore(unboxed, cast);
  unboxed = ctx.builder.CreateLoad(to, ctx.builder.CreateBitCast(cast, to->getPointerTo()));
  }
  else if (frompointer) {
- Type *INTT_to = INTT(to);
+ Type *INTT_to = INTT(to, DL);
  unboxed = ctx.builder.CreatePtrToInt(unboxed, INTT_to);
  if (INTT_to != to)
  unboxed = ctx.builder.CreateBitCast(unboxed, to);
  }
  else if (topointer) {
- Type *INTT_to = INTT(to);
+ Type *INTT_to = INTT(to, DL);
  if (to != INTT_to)
  unboxed = ctx.builder.CreateBitCast(unboxed, INTT_to);
  unboxed = emit_inttoptr(ctx, unboxed, to);
@@ -584,6 +581,8 @@ static jl_cgval_t generic_cast(
  intrinsic f, Instruction::CastOps Op,
  const jl_cgval_t *argv, bool toint, bool fromint)
 {
+ auto &TT = ctx.emission_context.TargetTriple;
+ auto &DL = ctx.emission_context.DL;
  const jl_cgval_t &targ = argv[0];
  const jl_cgval_t &v = argv[1];
  jl_datatype_t *jlto = staticeval_bitstype(targ);
@@ -593,11 +592,11 @@ static jl_cgval_t generic_cast(
  Type *to = bitstype_to_llvm((jl_value_t*)jlto, ctx.builder.getContext(), true);
  Type *vt = bitstype_to_llvm(v.typ, ctx.builder.getContext(), true);
  if (toint)
- to = INTT(to);
+ to = INTT(to, DL);
  else
  to = FLOATT(to);
  if (fromint)
- vt = INTT(vt);
+ vt = INTT(vt, DL);
  else
  vt = FLOATT(vt);
  if (!to || !vt)
@@ -606,17 +605,17 @@ static jl_cgval_t generic_cast(
  if (!CastInst::castIsValid(Op, from, to))
  return emit_runtime_call(ctx, f, argv, 2);
  if (Op == Instruction::FPExt) {
-#ifdef JL_NEED_FLOATTEMP_VAR
- // Target platform might carry extra precision.
- // Force rounding to single precision first. The reason is that it's
- // fine to keep working in extended precision as long as it's
- // understood that everything is implicitly rounded to 23 bits,
- // but if we start looking at more bits we need to actually do the
- // rounding first instead of carrying around incorrect low bits.
- Value *jlfloattemp_var = emit_static_alloca(ctx, from->getType());
- ctx.builder.CreateStore(from, jlfloattemp_var);
- from = ctx.builder.CreateLoad(from->getType(), jlfloattemp_var, /*force this to load from the stack*/true);
-#endif
+ if (jl_floattemp_var_needed(TT)) {
+  // Target platform might carry extra precision.
+  // Force rounding to single precision first. The reason is that it's
+  // fine to keep working in extended precision as long as it's
+  // understood that everything is implicitly rounded to 23 bits,
+  // but if we start looking at more bits we need to actually do the
+  // rounding first instead of carrying around incorrect low bits.
+  Value *jlfloattemp_var = emit_static_alloca(ctx, from->getType());
+  ctx.builder.CreateStore(from, jlfloattemp_var);
+  from = ctx.builder.CreateLoad(from->getType(), jlfloattemp_var, /*force this to load from the stack*/true);
+ }
  }
  Value *ans = ctx.builder.CreateCast(Op, from, to);
  if (f == fptosi || f == fptoui)
@@ -1126,6 +1125,7 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_
 
 static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **args, size_t nargs)
 {
+ auto &DL = ctx.emission_context.DL;
  assert(f < num_intrinsics);
  if (f == cglobal && nargs == 1)
  f = cglobal_auto;
@@ -1231,7 +1231,7 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
  const jl_cgval_t &x = argv[0];
  if (!jl_is_primitivetype(x.typ))
  return emit_runtime_call(ctx, f, argv, nargs);
- Type *xt = INTT(bitstype_to_llvm(x.typ, ctx.builder.getContext(), true));
+ Type *xt = INTT(bitstype_to_llvm(x.typ, ctx.builder.getContext(), true), DL);
  Value *from = emit_unbox(ctx, xt, x, x.typ);
  Value *ans = ctx.builder.CreateNot(from);
  return mark_julia_type(ctx, ans, false, x.typ);
@@ -1270,7 +1270,7 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
  if (float_func()[f])
  xtyp = FLOATT(xtyp);
  else
- xtyp = INTT(xtyp);
+ xtyp = INTT(xtyp, DL);
  if (!xtyp)
  return emit_runtime_call(ctx, f, argv, nargs);
  ////Bool are required to be in the range [0,1]
@@ -1289,7 +1289,7 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
  if (f == shl_int || f == lshr_int || f == ashr_int) {
  if (!jl_is_primitivetype(argv[1].typ))
  return emit_runtime_call(ctx, f, argv, nargs);
- argt[1] = INTT(bitstype_to_llvm(argv[1].typ, ctx.builder.getContext(), true));
+ argt[1] = INTT(bitstype_to_llvm(argv[1].typ, ctx.builder.getContext(), true), DL);
  }
  else {
  for (size_t i = 1; i < nargs; ++i) {
@@ -1465,7 +1465,7 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
 
  case fpiseq: {
  *newtyp = jl_bool_type;
- Type *it = INTT(t);
+ Type *it = INTT(t, ctx.emission_context.DL);
  Value *xi = ctx.builder.CreateBitCast(x, it);
  Value *yi = ctx.builder.CreateBitCast(y, it);
  return ctx.builder.CreateOr(ctx.builder.CreateAnd(ctx.builder.CreateFCmpUNO(x, x),

diff --git a/src/llvm-cpufeatures.cpp b/src/llvm-cpufeatures.cpp
@@ -38,20 +38,18 @@ STATISTIC(LoweredWithoutFMA, "Number of have_fma's that were lowered to false");
 extern JuliaOJIT *jl_ExecutionEngine;
 
 // whether this platform unconditionally (i.e. without needing multiversioning) supports FMA
-Optional<bool> always_have_fma(Function &intr) JL_NOTSAFEPOINT {
- auto intr_name = intr.getName();
- auto typ = intr_name.substr(strlen("julia.cpu.have_fma."));
-
-#if defined(_CPU_AARCH64_)
- return typ == "f32" || typ == "f64";
-#else
- (void)typ;
- return {};
-#endif
+Optional<bool> always_have_fma(Function &intr, const Triple &TT) JL_NOTSAFEPOINT {
+ if (TT.isAArch64()) {
+ auto intr_name = intr.getName();
+ auto typ = intr_name.substr(strlen("julia.cpu.have_fma."));
+ return typ == "f32" || typ == "f64";
+ } else {
+ return {};
+ }
 }
 
-bool have_fma(Function &intr, Function &caller) JL_NOTSAFEPOINT {
- auto unconditional = always_have_fma(intr);
+static bool have_fma(Function &intr, Function &caller, const Triple &TT) JL_NOTSAFEPOINT {
+ auto unconditional = always_have_fma(intr, TT);
  if (unconditional.hasValue())
  return unconditional.getValue();
 
@@ -65,21 +63,21 @@ bool have_fma(Function &intr, Function &caller) JL_NOTSAFEPOINT {
  SmallVector<StringRef, 6> Features;
  FS.split(Features, ',');
  for (StringRef Feature : Features)
-#if defined _CPU_ARM_
+ if (TT.isARM()) {
  if (Feature == "+vfp4")
- return typ == "f32" || typ == "f64";lowerCPUFeatures
+ return typ == "f32" || typ == "f64";
  else if (Feature == "+vfp4sp")
  return typ == "f32";
-#else
+ } else {
  if (Feature == "+fma" || Feature == "+fma4")
  return typ == "f32" || typ == "f64";
-#endif
+ }
 
  return false;
 }
 
-void lowerHaveFMA(Function &intr, Function &caller, CallInst *I) JL_NOTSAFEPOINT {
- if (have_fma(intr, caller)) {
+void lowerHaveFMA(Function &intr, Function &caller, const Triple &TT, CallInst *I) JL_NOTSAFEPOINT {
+ if (have_fma(intr, caller, TT)) {
  ++LoweredWithFMA;
  I->replaceAllUsesWith(ConstantInt::get(I->getType(), 1));
  } else {
@@ -91,6 +89,7 @@ void lowerHaveFMA(Function &intr, Function &caller, CallInst *I) JL_NOTSAFEPOINT
 
 bool lowerCPUFeatures(Module &M) JL_NOTSAFEPOINT
 {
+ auto TT = Triple(M.getTargetTriple());
  SmallVector<Instruction*,6> Materialized;
 
  for (auto &F: M.functions()) {
@@ -100,7 +99,7 @@ bool lowerCPUFeatures(Module &M) JL_NOTSAFEPOINT
  for (Use &U: F.uses()) {
  User *RU = U.getUser();
  CallInst *I = cast<CallInst>(RU);
- lowerHaveFMA(F, *I->getParent()->getParent(), I);
+ lowerHaveFMA(F, *I->getParent()->getParent(), TT, I);
  Materialized.push_back(I);
  }
  }

diff --git a/src/llvm-demote-float16.cpp b/src/llvm-demote-float16.cpp
@@ -49,26 +49,26 @@ extern JuliaOJIT *jl_ExecutionEngine;
 
 namespace {
 
-bool have_fp16(Function &caller) {
+static bool have_fp16(Function &caller, const Triple &TT) {
  Attribute FSAttr = caller.getFnAttribute("target-features");
  StringRef FS =
  FSAttr.isValid() ? FSAttr.getValueAsString() : jl_ExecutionEngine->getTargetFeatureString();
-#if defined(_CPU_AARCH64_)
- if (FS.find("+fp16fml") != llvm::StringRef::npos || FS.find("+fullfp16") != llvm::StringRef::npos){
- return true;
- }
-#elif defined(_CPU_X86_64_)
- if (FS.find("+avx512fp16") != llvm::StringRef::npos){
- return true;
+ if (TT.isAArch64()) {
+ if (FS.find("+fp16fml") != llvm::StringRef::npos || FS.find("+fullfp16") != llvm::StringRef::npos){
+ return true;
+ }
+ } else if (TT.getArch() == Triple::x86_64) {
+ if (FS.find("+avx512fp16") != llvm::StringRef::npos){
+ return true;
+ }
  }
-#endif
- (void)FS;
  return false;
 }
 
 static bool demoteFloat16(Function &F)
 {
- if (have_fp16(F))
+ auto TT = Triple(F.getParent()->getTargetTriple());
+ if (have_fp16(F, TT))
  return false;
 
  auto &ctx = F.getContext();