Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Backports 1.6-beta #38949

Merged
merged 40 commits into from
Jan 6, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
06171f6
Switch back to LLVM ORC v2, take two (#38804)
vtjnash Dec 17, 2020
264d855
fix #38888, pessimistic sparam inference with concrete upper bound (#…
JeffBezanson Dec 18, 2020
c678cef
Carry MBedTLS patch that works around CMake 3.18.2 bug. (#38933) (#38…
Sacha0 Dec 25, 2020
0372159
Source tarballs should contain `StdlibArtifacts.toml` files (#38962) …
Sacha0 Dec 25, 2020
715eb60
minor inferrability tweak for `manifest_deps_get` (#38954)
aviatesk Dec 21, 2020
f676019
Repair jl_init. (#38950)
GunnarFarneback Dec 21, 2020
e550e87
[Artifacts] Note the platform we're looking for in our error message …
staticfloat Dec 22, 2020
a67b662
Assert that _artifact_str returns a String (#38975)
timholy Dec 24, 2020
f190c53
Construct constant LLVMPtr correctly (#38958)
vchuravy Dec 25, 2020
9475802
Fix trampoline on PPC (#38980)
vchuravy Dec 25, 2020
f42b28e
Update the AST docs on `nothing` `:method` expressions (#38496)
timholy Dec 26, 2020
8d41bf7
[loader] Must invalidate `.o` files when `VERSION` changes (#38800)
staticfloat Dec 26, 2020
14a646c
build: pack checksums into fewer files by target (#38963)
vtjnash Dec 22, 2020
30ce7da
win: Set correct folder permissions after folder creation (#38942)
musm Dec 27, 2020
d73f866
Export jl_n_threads from the public libjulia library [#38925]
imciner2 Dec 18, 2020
a881030
[cli/trampolines]: Fix `aarch64-apple-darwin` trampoline ASM syntax
staticfloat Dec 19, 2020
dd7d025
[cli/trampolines]: Fix `i686-w64-mingw32` name mangling
staticfloat Dec 19, 2020
8984d13
Define `jl_n_threads` in only `libjulia`
staticfloat Dec 19, 2020
3be9b23
Fix visibility of jl_n_threads
imciner2 Dec 20, 2020
22a907b
Allow libjulia to contain non-pointer variables
imciner2 Dec 20, 2020
ac9c203
Remove second declaration of jl_n_threads
imciner2 Dec 20, 2020
c32c796
Use proper interprocedural register on aarch64
staticfloat Dec 21, 2020
b2484a7
Adapt Windows `cglobal()` search for `libjulia-internal`
staticfloat Dec 22, 2020
ea9d2bb
Revert "win: Set correct folder permissions after folder creation (#3…
KristofferC Dec 28, 2020
7d6594d
[libuv] Bump to include `DELETE` win ACL patch (#39038)
staticfloat Dec 30, 2020
c992850
Bump Documenter to 0.26.1 (#39097)
mortenpi Jan 5, 2021
bacbb97
Improve consistency of SONAME usage, fix debug installation
staticfloat Dec 26, 2020
3b809f7
Fix lack of `JL_MAJOR_SHLIB_EXT` on Windows
staticfloat Dec 27, 2020
b4f2c66
Fixes for non-Int based lengths (#37741)
dlfivefifty Jan 4, 2021
616cb5d
[cli/trampolines] Fix section directive for windows trampolines
staticfloat Dec 30, 2020
eb1efe5
[cli/loader]: Don't allow initialization to run more than once
staticfloat Dec 30, 2020
b6590d4
FileWatching: Dump open file descriptors on failure
Keno Apr 21, 2020
f452173
Don't detach rr workers in SharedArrays tests
Keno Apr 22, 2020
0d21397
Disable thread affinity test under rr
Keno Apr 22, 2020
2ed26a0
Exclude `threads` test from rr tracing
Keno Dec 16, 2020
c70397e
Use universal LIBUV_INC instead of build_includedir
vchuravy Sep 23, 2020
c8029bb
stage flisp.boot into host
vchuravy Sep 23, 2020
327f298
use BUILD_EXE for flisp
vchuravy Sep 24, 2020
55154b3
Measure compile time only when using time macros pt.2: handling when …
IanButterworth Jan 6, 2021
f34a4d8
fix Meta.partially_inline! again (#39112)
simeonschaub Jan 6, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Measure compile time only when using time macros pt.2: handling when …
…code under test throws and make compile timing thread-local (#38915)

* ensure compile timing disables in time & timev macros

* make comp measurement switching threadsafe

(cherry picked from commit 158e22f)
  • Loading branch information
IanButterworth authored and KristofferC committed Jan 6, 2021
commit 55154b30e88c9ac3eb810c06e59774960aefe2d9
78 changes: 43 additions & 35 deletions base/timing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -115,41 +115,44 @@ function format_bytes(bytes) # also used by InteractiveUtils
end
end

function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0)
function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, newline=false)
timestr = Ryu.writefixed(Float64(elapsedtime/1e9), 6)
length(timestr) < 10 && print(" "^(10 - length(timestr)))
print(timestr, " seconds")
parens = bytes != 0 || allocs != 0 || gctime > 0 || compile_time > 0
parens && print(" (")
if bytes != 0 || allocs != 0
allocs, ma = prettyprint_getunits(allocs, length(_cnt_units), Int64(1000))
if ma == 1
print(Int(allocs), _cnt_units[ma], allocs==1 ? " allocation: " : " allocations: ")
else
print(Ryu.writefixed(Float64(allocs), 2), _cnt_units[ma], " allocations: ")
end
print(format_bytes(bytes))
end
if gctime > 0
str = sprint() do io
print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "")
print(io, timestr, " seconds")
parens = bytes != 0 || allocs != 0 || gctime > 0 || compile_time > 0
parens && print(io, " (")
if bytes != 0 || allocs != 0
print(", ")
allocs, ma = prettyprint_getunits(allocs, length(_cnt_units), Int64(1000))
if ma == 1
print(io, Int(allocs), _cnt_units[ma], allocs==1 ? " allocation: " : " allocations: ")
else
print(io, Ryu.writefixed(Float64(allocs), 2), _cnt_units[ma], " allocations: ")
end
print(io, format_bytes(bytes))
end
print(Ryu.writefixed(Float64(100*gctime/elapsedtime), 2), "% gc time")
end
if compile_time > 0
if bytes != 0 || allocs != 0 || gctime > 0
print(", ")
if gctime > 0
if bytes != 0 || allocs != 0
print(io, ", ")
end
print(io, Ryu.writefixed(Float64(100*gctime/elapsedtime), 2), "% gc time")
end
print(Ryu.writefixed(Float64(100*compile_time/elapsedtime), 2), "% compilation time")
if compile_time > 0
if bytes != 0 || allocs != 0 || gctime > 0
print(io, ", ")
end
print(io, Ryu.writefixed(Float64(100*compile_time/elapsedtime), 2), "% compilation time")
end
parens && print(io, ")")
end
parens && print(")")
newline ? println(str) : print(str)
nothing
end

function timev_print(elapsedtime, diff::GC_Diff, compile_time)
allocs = gc_alloc_count(diff)
time_print(elapsedtime, diff.allocd, diff.total_time, allocs, compile_time)
print("\nelapsed time (ns): $elapsedtime\n")
time_print(elapsedtime, diff.allocd, diff.total_time, allocs, compile_time, true)
print("elapsed time (ns): $elapsedtime\n")
padded_nonzero_print(diff.total_time, "gc time (ns)")
padded_nonzero_print(diff.allocd, "bytes allocated")
padded_nonzero_print(diff.poolalloc, "pool allocs")
Expand Down Expand Up @@ -200,13 +203,14 @@ macro time(ex)
local stats = gc_num()
local compile_elapsedtime = cumulative_compile_time_ns_before()
local elapsedtime = time_ns()
local val = $(esc(ex))
elapsedtime = time_ns() - elapsedtime
compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime
local val = try
$(esc(ex))
finally
elapsedtime = time_ns() - elapsedtime
compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime
end
local diff = GC_Diff(gc_num(), stats)
time_print(elapsedtime, diff.allocd, diff.total_time,
gc_alloc_count(diff), compile_elapsedtime)
println()
time_print(elapsedtime, diff.allocd, diff.total_time, gc_alloc_count(diff), compile_elapsedtime, true)
val
end
end
Expand Down Expand Up @@ -248,10 +252,14 @@ macro timev(ex)
local stats = gc_num()
local compile_elapsedtime = cumulative_compile_time_ns_before()
local elapsedtime = time_ns()
local val = $(esc(ex))
elapsedtime = time_ns() - elapsedtime
compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime
timev_print(elapsedtime, GC_Diff(gc_num(), stats), compile_elapsedtime)
local val = try
$(esc(ex))
finally
elapsedtime = time_ns() - elapsedtime
compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime
end
local diff = GC_Diff(gc_num(), stats)
timev_print(elapsedtime, diff, compile_elapsedtime)
val
end
end
Expand Down
14 changes: 8 additions & 6 deletions src/aotcompile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,8 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
JL_GC_PUSH1(&src);
JL_LOCK(&codegen_lock);
uint64_t compiler_start_time = 0;
if (jl_measure_compile_time)
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
compiler_start_time = jl_hrtime();

CompilationPolicy policy = (CompilationPolicy) _policy;
Expand Down Expand Up @@ -417,8 +418,8 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
}

data->M = std::move(clone);
if (jl_measure_compile_time)
jl_cumulative_compile_time += (jl_hrtime() - compiler_start_time);
if (jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
JL_UNLOCK(&codegen_lock); // Might GC
return (void*)data;
}
Expand Down Expand Up @@ -896,7 +897,8 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
jl_llvm_functions_t decls;
JL_LOCK(&codegen_lock);
uint64_t compiler_start_time = 0;
if (jl_measure_compile_time)
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
compiler_start_time = jl_hrtime();
std::tie(m, decls) = jl_emit_code(mi, src, jlrettype, output);

Expand All @@ -921,8 +923,8 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
m.release(); // the return object `llvmf` will be the owning pointer
}
JL_GC_POP();
if (jl_measure_compile_time)
jl_cumulative_compile_time += (jl_hrtime() - compiler_start_time);
if (jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
JL_UNLOCK(&codegen_lock); // Might GC
if (F)
return F;
Expand Down
9 changes: 4 additions & 5 deletions src/gf.c
Original file line number Diff line number Diff line change
Expand Up @@ -3139,21 +3139,20 @@ int jl_has_concrete_subtype(jl_value_t *typ)
//static jl_mutex_t typeinf_lock;
#define typeinf_lock codegen_lock

uint8_t jl_measure_compile_time = 0;
uint64_t jl_cumulative_compile_time = 0;
static uint64_t inference_start_time = 0;

JL_DLLEXPORT void jl_typeinf_begin(void)
{
JL_LOCK(&typeinf_lock);
if (jl_measure_compile_time)
if (jl_measure_compile_time[jl_threadid()])
inference_start_time = jl_hrtime();
}

JL_DLLEXPORT void jl_typeinf_end(void)
{
if (typeinf_lock.count == 1 && jl_measure_compile_time)
jl_cumulative_compile_time += (jl_hrtime() - inference_start_time);
int tid = jl_threadid();
if (typeinf_lock.count == 1 && jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - inference_start_time);
JL_UNLOCK(&typeinf_lock);
}

Expand Down
38 changes: 22 additions & 16 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,16 @@ void jl_jit_globals(std::map<void *, GlobalVariable*> &globals)
extern "C" JL_DLLEXPORT
uint64_t jl_cumulative_compile_time_ns_before()
{
jl_measure_compile_time = 1;
return jl_cumulative_compile_time;
int tid = jl_threadid();
jl_measure_compile_time[tid] = 1;
return jl_cumulative_compile_time[tid];
}
extern "C" JL_DLLEXPORT
uint64_t jl_cumulative_compile_time_ns_after()
{
jl_measure_compile_time = 0;
return jl_cumulative_compile_time;
int tid = jl_threadid();
jl_measure_compile_time[tid] = 0;
return jl_cumulative_compile_time[tid];
}

// this generates llvm code for the lambda info
Expand Down Expand Up @@ -231,7 +233,8 @@ int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt
{
JL_LOCK(&codegen_lock);
uint64_t compiler_start_time = 0;
if (jl_measure_compile_time)
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
compiler_start_time = jl_hrtime();
jl_codegen_params_t params;
jl_codegen_params_t *pparams = (jl_codegen_params_t*)p;
Expand All @@ -255,8 +258,8 @@ int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt
if (success && llvmmod == NULL)
jl_add_to_ee(std::unique_ptr<Module>(into));
}
if (codegen_lock.count == 1 && jl_measure_compile_time)
jl_cumulative_compile_time += (jl_hrtime() - compiler_start_time);
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
JL_UNLOCK(&codegen_lock);
return success;
}
Expand Down Expand Up @@ -314,7 +317,8 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
{
JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
uint64_t compiler_start_time = 0;
if (jl_measure_compile_time)
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
compiler_start_time = jl_hrtime();
// if we don't have any decls already, try to generate it now
jl_code_info_t *src = NULL;
Expand Down Expand Up @@ -352,8 +356,8 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
else {
codeinst = NULL;
}
if (codegen_lock.count == 1 && jl_measure_compile_time)
jl_cumulative_compile_time += (jl_hrtime() - compiler_start_time);
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
JL_UNLOCK(&codegen_lock);
JL_GC_POP();
return codeinst;
Expand All @@ -367,7 +371,8 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
}
JL_LOCK(&codegen_lock);
uint64_t compiler_start_time = 0;
if (jl_measure_compile_time)
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
compiler_start_time = jl_hrtime();
if (unspec->invoke == NULL) {
jl_code_info_t *src = NULL;
Expand Down Expand Up @@ -395,8 +400,8 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
}
JL_GC_POP();
}
if (codegen_lock.count == 1 && jl_measure_compile_time)
jl_cumulative_compile_time += (jl_hrtime() - compiler_start_time);
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
JL_UNLOCK(&codegen_lock); // Might GC
}

Expand All @@ -419,7 +424,8 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
// so create an exception here so we can print pretty our lies
JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
uint64_t compiler_start_time = 0;
if (jl_measure_compile_time)
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
compiler_start_time = jl_hrtime();
specfptr = (uintptr_t)codeinst->specptr.fptr;
if (specfptr == 0) {
Expand All @@ -444,8 +450,8 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
}
JL_GC_POP();
}
if (jl_measure_compile_time)
jl_cumulative_compile_time += (jl_hrtime() - compiler_start_time);
if (jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
JL_UNLOCK(&codegen_lock);
}
if (specfptr != 0)
Expand Down
4 changes: 2 additions & 2 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@ static inline uint64_t cycleclock(void)

#include "timing.h"

extern uint8_t jl_measure_compile_time;
extern uint64_t jl_cumulative_compile_time;
extern uint8_t *jl_measure_compile_time;
extern uint64_t *jl_cumulative_compile_time;

#ifdef _COMPILER_MICROSOFT_
# define jl_return_address() ((uintptr_t)_ReturnAddress())
Expand Down
4 changes: 4 additions & 0 deletions src/threading.c
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,8 @@ jl_get_ptls_states_func jl_get_ptls_states_getter(void)
#endif

jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED;
uint8_t *jl_measure_compile_time = NULL;
uint64_t *jl_cumulative_compile_time = NULL;

// return calling thread's ID
// Also update the suspended_threads list in signals-mach when changing the
Expand Down Expand Up @@ -399,6 +401,8 @@ void jl_init_threading(void)
jl_n_threads = (uint64_t)strtol(cp, NULL, 10);
if (jl_n_threads <= 0)
jl_n_threads = 1;
jl_measure_compile_time = realloc( jl_measure_compile_time, jl_n_threads * sizeof *jl_measure_compile_time );
jl_cumulative_compile_time = realloc( jl_cumulative_compile_time, jl_n_threads * sizeof *jl_cumulative_compile_time );
#ifndef __clang_analyzer__
jl_all_tls_states = (jl_ptls_t*)calloc(jl_n_threads, sizeof(void*));
#endif
Expand Down