Skip to content

Commit

Permalink
Merge pull request #18578 from JuliaLang/yyc/arm/denormal
Browse files Browse the repository at this point in the history
Support set/get_zero_subnormals on AArch64
  • Loading branch information
yuyichao committed Sep 20, 2016
2 parents a6a8946 + 9a8001d commit 1621d7e
Showing 1 changed file with 32 additions and 2 deletions.
34 changes: 32 additions & 2 deletions src/sys.c
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ static int32_t get_subnormal_flags(void)
}

// Returns non-zero if subnormals go to 0; zero otherwise.
JL_DLLEXPORT int32_t jl_get_zero_subnormals(int8_t isZero)
JL_DLLEXPORT int32_t jl_get_zero_subnormals(void)
{
uint32_t flags = get_subnormal_flags();
return _mm_getcsr() & flags;
Expand All @@ -530,9 +530,39 @@ JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero)
}
}

#elif defined(_CPU_AARCH64_)

// FZ, bit [24]
static const uint32_t fpcr_fz_mask = 1 << 24;

static inline uint32_t get_fpcr_aarch64(void)
{
uint32_t fpcr;
asm volatile("mrs %0, fpcr" : "=r"(fpcr));
return fpcr;
}

static inline void set_fpcr_aarch64(uint32_t fpcr)
{
asm volatile("msr fpcr, %0" :: "r"(fpcr));
}

JL_DLLEXPORT int32_t jl_get_zero_subnormals(void)
{
return (get_fpcr_aarch64() & fpcr_fz_mask) != 0;
}

JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero)
{
uint32_t fpcr = get_fpcr_aarch64();
fpcr = isZero ? (fpcr | fpcr_fz_mask) : (fpcr & ~fpcr_fz_mask);
set_fpcr_aarch64(fpcr);
return 0;
}

#else

JL_DLLEXPORT int32_t jl_get_zero_subnormals(int8_t isZero)
JL_DLLEXPORT int32_t jl_get_zero_subnormals(void)
{
return 0;
}
Expand Down

2 comments on commit 1621d7e

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily benchmark build, I will reply here when finished:

@nanosoldier runbenchmarks(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your benchmark job has completed - possible performance regressions were detected. A full report can be found here. cc @jrevels

Please sign in to comment.