From 55a2ddd880d322da5f00559ad2ef5a11b141885c Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 15 Nov 2017 20:05:51 -0500 Subject: [PATCH 1/2] runtime-intrinsics: fix definition of shifts The compiled version does zext, whereas this was doing sext. --- src/runtime_intrinsics.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c index 92f437c5f5f89..f8bf4b2dff2ba 100644 --- a/src/runtime_intrinsics.c +++ b/src/runtime_intrinsics.c @@ -544,7 +544,7 @@ jl_value_t *jl_iintrinsic_2(jl_value_t *a, jl_value_t *b, const char *name, void *pa = jl_data_ptr(a), *pb = jl_data_ptr(b); unsigned sz = jl_datatype_size(ty); unsigned sz2 = next_power_of_two(sz); - unsigned szb = jl_datatype_size(tyb); + unsigned szb = cvtb ? jl_datatype_size(tyb) : sz; if (sz2 > sz) { /* round type up to the appropriate c-type and set/clear the unused bits */ void *pa2 = alloca(sz2); @@ -553,10 +553,12 @@ jl_value_t *jl_iintrinsic_2(jl_value_t *a, jl_value_t *b, const char *name, pa = pa2; } if (sz2 > szb) { - /* round type up to the appropriate c-type and set/clear/truncate the unused bits */ + /* round type up to the appropriate c-type and set/clear/truncate the unused bits + * (zero-extend if cvtb is set, since in that case b is unsigned while the sign of a comes from the op) + */ void *pb2 = alloca(sz2); memcpy(pb2, pb, szb); - memset((char*)pb2 + szb, getsign(pb, sz), sz2 - szb); + memset((char*)pb2 + szb, cvtb ? 0 : getsign(pb, szb), sz2 - szb); pb = pb2; } jl_value_t *newv = lambda2(ty, pa, pb, sz, sz2, list); From 3fee8a37d9b025a301d90e26a274ef5c0cbf83de Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 22 Nov 2017 17:53:08 -0500 Subject: [PATCH 2/2] runtime-intrinsics: reduce allocations for common types --- src/datatype.c | 21 ++++++++------- src/runtime_intrinsics.c | 58 +++++++++++++++++----------------------- 2 files changed, 37 insertions(+), 42 deletions(-) diff --git a/src/datatype.c b/src/datatype.c index 33181b90ad873..41f5cdb62ac70 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -486,21 +486,24 @@ JL_DLLEXPORT jl_datatype_t *jl_new_primitivetype(jl_value_t *name, jl_module_t * JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, void *data) { - // data may not have the alignment required by the data type. + // data may not have the alignment required by the size + // but will always have the alignment required by the datatype jl_ptls_t ptls = jl_get_ptls_states(); assert(jl_is_datatype(dt)); jl_datatype_t *bt = (jl_datatype_t*)dt; size_t nb = jl_datatype_size(bt); + // some types have special pools to minimize allocations if (nb == 0) return jl_new_struct_uninit(bt); // returns bt->instance + if (bt == jl_bool_type) return (1 & *(int8_t*)data) ? jl_true : jl_false; if (bt == jl_uint8_type) return jl_box_uint8(*(uint8_t*)data); - if (bt == jl_int64_type) return jl_box_int64(jl_load_unaligned_i64(data)); - if (bt == jl_bool_type) return (*(int8_t*)data) ? jl_true : jl_false; - if (bt == jl_int32_type) return jl_box_int32(jl_load_unaligned_i32(data)); - if (bt == jl_float64_type) { - double f; - memcpy(&f, data, 8); - return jl_box_float64(f); - } + if (bt == jl_int64_type) return jl_box_int64(*(int64_t*)data); + if (bt == jl_int32_type) return jl_box_int32(*(int32_t*)data); + if (bt == jl_int8_type) return jl_box_int8(*(int8_t*)data); + if (bt == jl_int16_type) return jl_box_int16(*(int16_t*)data); + if (bt == jl_uint64_type) return jl_box_uint64(*(uint64_t*)data); + if (bt == jl_uint32_type) return jl_box_uint32(*(uint32_t*)data); + if (bt == jl_uint16_type) return jl_box_uint16(*(uint16_t*)data); + if (bt == jl_char_type) return jl_box_char(*(uint32_t*)data); jl_value_t *v = jl_gc_alloc(ptls, nb, bt); switch (nb) { diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c index f8bf4b2dff2ba..33ed4c9907fa0 100644 --- a/src/runtime_intrinsics.c +++ b/src/runtime_intrinsics.c @@ -349,27 +349,26 @@ jl_value_t *jl_iintrinsic_1(jl_value_t *ty, jl_value_t *a, const char *name, static inline jl_value_t *jl_intrinsiclambda_ty1(jl_value_t *ty, void *pa, unsigned osize, unsigned osize2, const void *voidlist) { - jl_ptls_t ptls = jl_get_ptls_states(); - jl_value_t *newv = jl_gc_alloc(ptls, jl_datatype_size(ty), ty); intrinsic_1_t op = select_intrinsic_1(osize2, (const intrinsic_1_t*)voidlist); - op(osize * host_char_bit, pa, jl_data_ptr(newv)); - return newv; + void *pr = alloca(osize2); + op(osize * host_char_bit, pa, pr); + return jl_new_bits(ty, pr); } static inline jl_value_t *jl_intrinsiclambda_u1(jl_value_t *ty, void *pa, unsigned osize, unsigned osize2, const void *voidlist) { jl_ptls_t ptls = jl_get_ptls_states(); - jl_value_t *newv = jl_gc_alloc(ptls, jl_datatype_size(ty), ty); intrinsic_u1_t op = select_intrinsic_u1(osize2, (const intrinsic_u1_t*)voidlist); - unsigned cnt = op(osize * host_char_bit, pa); - // TODO: the following memset/memcpy assumes little-endian + uint64_t cnt = op(osize * host_char_bit, pa); + // TODO: the following assume little-endian // for big-endian, need to copy from the other end of cnt - if (osize > sizeof(unsigned)) { - // perform zext, if needed - memset((char*)jl_data_ptr(newv) + sizeof(unsigned), 0, osize - sizeof(unsigned)); - osize = sizeof(unsigned); + if (osize <= sizeof(cnt)) { + return jl_new_bits(ty, &cnt); } - memcpy(jl_data_ptr(newv), &cnt, osize); + jl_value_t *newv = jl_gc_alloc(ptls, osize, ty); + // perform zext, if needed + memset((char*)jl_data_ptr(newv) + sizeof(cnt), 0, osize - sizeof(cnt)); + memcpy(jl_data_ptr(newv), &cnt, sizeof(cnt)); return newv; } @@ -385,7 +384,6 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *ty, jl_value_t *a) \ static inline jl_value_t *jl_intrinsic_cvt(jl_value_t *ty, jl_value_t *a, const char *name, intrinsic_cvt_t op) { - jl_ptls_t ptls = jl_get_ptls_states(); jl_value_t *aty = jl_typeof(a); if (!jl_is_primitivetype(aty)) jl_errorf("%s: value is not a primitive type", name); @@ -394,12 +392,13 @@ static inline jl_value_t *jl_intrinsic_cvt(jl_value_t *ty, jl_value_t *a, const void *pa = jl_data_ptr(a); unsigned isize = jl_datatype_size(aty); unsigned osize = jl_datatype_size(ty); - jl_value_t *newv = jl_gc_alloc(ptls, jl_datatype_size(ty), ty); - op(aty == (jl_value_t*)jl_bool_type ? 1 : isize * host_char_bit, pa, - osize * host_char_bit, jl_data_ptr(newv)); - if (ty == (jl_value_t*)jl_bool_type) - return *(uint8_t*)jl_data_ptr(newv) & 1 ? jl_true : jl_false; - return newv; + void *pr = alloca(osize); + unsigned isize_bits = isize * host_char_bit; + unsigned osize_bits = osize * host_char_bit; + if (aty == (jl_value_t*)jl_bool_type) + isize_bits = 1; + op(isize_bits, pa, osize_bits, pr); + return jl_new_bits(ty, pr); } // floating point @@ -567,13 +566,10 @@ jl_value_t *jl_iintrinsic_2(jl_value_t *a, jl_value_t *b, const char *name, static inline jl_value_t *jl_intrinsiclambda_2(jl_value_t *ty, void *pa, void *pb, unsigned sz, unsigned sz2, const void *voidlist) { - jl_ptls_t ptls = jl_get_ptls_states(); - jl_value_t *newv = jl_gc_alloc(ptls, jl_datatype_size(ty), ty); + void *pr = alloca(sz2); intrinsic_2_t op = select_intrinsic_2(sz2, (const intrinsic_2_t*)voidlist); - op(sz * host_char_bit, pa, pb, jl_data_ptr(newv)); - if (ty == (jl_value_t*)jl_bool_type) - return *(uint8_t*)jl_data_ptr(newv) & 1 ? jl_true : jl_false; - return newv; + op(sz * host_char_bit, pa, pb, pr); + return jl_new_bits(ty, pr); } static inline jl_value_t *jl_intrinsiclambda_cmp(jl_value_t *ty, void *pa, void *pb, unsigned sz, unsigned sz2, const void *voidlist) @@ -588,7 +584,7 @@ static inline jl_value_t *jl_intrinsiclambda_checked(jl_value_t *ty, void *pa, v jl_value_t *params[2]; params[0] = ty; params[1] = (jl_value_t*)jl_bool_type; - jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params,2); + jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params, 2); jl_ptls_t ptls = jl_get_ptls_states(); jl_value_t *newv = jl_gc_alloc(ptls, ((jl_datatype_t*)tuptyp)->size, tuptyp); @@ -601,16 +597,12 @@ static inline jl_value_t *jl_intrinsiclambda_checked(jl_value_t *ty, void *pa, v } static inline jl_value_t *jl_intrinsiclambda_checkeddiv(jl_value_t *ty, void *pa, void *pb, unsigned sz, unsigned sz2, const void *voidlist) { - jl_ptls_t ptls = jl_get_ptls_states(); - jl_value_t *newv = jl_gc_alloc(ptls, jl_datatype_size(ty), ty); + void *pr = alloca(sz2); intrinsic_checked_t op = select_intrinsic_checked(sz2, (const intrinsic_checked_t*)voidlist); - int ovflw = op(sz * host_char_bit, pa, pb, jl_data_ptr(newv)); + int ovflw = op(sz * host_char_bit, pa, pb, pr); if (ovflw) jl_throw(jl_diverror_exception); - if (ty == (jl_value_t*)jl_bool_type) - return *(uint8_t*)jl_data_ptr(newv) & 1 ? jl_true : jl_false; - - return newv; + return jl_new_bits(ty, pr); } // floating point