diff --git a/src/init.c b/src/init.c index 3f0161dcaf1f1..43905933f391a 100644 --- a/src/init.c +++ b/src/init.c @@ -28,6 +28,7 @@ #undef DEFINE_BUILTIN_GLOBALS #include "threading.h" #include "julia_assert.h" +#include "processor.h" #ifdef __cplusplus extern "C" { @@ -618,6 +619,13 @@ static void jl_set_io_wait(int v) extern jl_mutex_t jl_modules_mutex; +static void restore_fp_env(void) +{ + if (jl_set_zero_subnormals(0) || jl_set_default_nans(0)) { + jl_error("Failed to configure floating point environment"); + } +} + void _julia_init(JL_IMAGE_SEARCH rel) { jl_init_timing(); @@ -634,6 +642,7 @@ void _julia_init(JL_IMAGE_SEARCH rel) // best to call this first, since it also initializes libuv jl_init_uv(); init_stdio(); + restore_fp_env(); restore_signals(); jl_page_size = jl_getpagesize(); diff --git a/src/processor.h b/src/processor.h index 6c95a0b6003b6..a1509180ba24a 100644 --- a/src/processor.h +++ b/src/processor.h @@ -166,6 +166,10 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void); // For debugging only JL_DLLEXPORT void jl_dump_host_cpu(void); +JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero); +JL_DLLEXPORT int32_t jl_get_zero_subnormals(void); +JL_DLLEXPORT int32_t jl_set_default_nans(int8_t isDefault); +JL_DLLEXPORT int32_t jl_get_default_nans(void); #ifdef __cplusplus } diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index 560d9f54539f8..f15f205118773 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -1808,8 +1808,10 @@ extern "C" int jl_test_cpu_feature(jl_cpu_feature_t feature) } #ifdef _CPU_AARCH64_ -// FZ, bit [24] +// FPCR FZ, bit [24] static constexpr uint32_t fpcr_fz_mask = 1 << 24; +// FPCR DN, bit [25] +static constexpr uint32_t fpcr_dn_mask = 1 << 25; static inline uint32_t get_fpcr_aarch64(void) { @@ -1835,6 +1837,19 @@ extern "C" JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero) set_fpcr_aarch64(fpcr); return 0; } + +extern "C" JL_DLLEXPORT int32_t jl_get_default_nans(void) +{ + return (get_fpcr_aarch64() & fpcr_dn_mask) != 0; +} + +extern "C" JL_DLLEXPORT int32_t jl_set_default_nans(int8_t isDefault) +{ + uint32_t fpcr = get_fpcr_aarch64(); + fpcr = isDefault ? (fpcr | fpcr_dn_mask) : (fpcr & ~fpcr_dn_mask); + set_fpcr_aarch64(fpcr); + return 0; +} #else extern "C" JL_DLLEXPORT int32_t jl_get_zero_subnormals(void) { @@ -1845,4 +1860,14 @@ extern "C" JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero) { return isZero; } + +extern "C" JL_DLLEXPORT int32_t jl_get_default_nans(void) +{ + return 0; +} + +extern "C" JL_DLLEXPORT int32_t jl_set_default_nans(int8_t isDefault) +{ + return isDefault; +} #endif diff --git a/src/processor_fallback.cpp b/src/processor_fallback.cpp index 416f1dd211a2b..43962eeb6930c 100644 --- a/src/processor_fallback.cpp +++ b/src/processor_fallback.cpp @@ -160,3 +160,13 @@ extern "C" JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero) { return isZero; } + +extern "C" JL_DLLEXPORT int32_t jl_get_default_nans(void) +{ + return 0; +} + +extern "C" JL_DLLEXPORT int32_t jl_set_default_nans(int8_t isDefault) +{ + return isDefault; +} diff --git a/src/processor_x86.cpp b/src/processor_x86.cpp index 81986a1f2c5f8..eab2c77ad91dc 100644 --- a/src/processor_x86.cpp +++ b/src/processor_x86.cpp @@ -1102,3 +1102,14 @@ extern "C" JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero) return isZero; } } + +// X86 does not support default NaNs +extern "C" JL_DLLEXPORT int32_t jl_get_default_nans(void) +{ + return 0; +} + +extern "C" JL_DLLEXPORT int32_t jl_set_default_nans(int8_t isDefault) +{ + return isDefault; +}