Skip to content

Commit

Permalink
static-show: improve accuracy of some printings (#52799)
Browse files Browse the repository at this point in the history
- Show strings with escaping, rather than trying to output the text
unmodified.
- Show symbols with the same formatting as Strings
- Avoid accidentally defining a broken Core.show method for NamedTuple
  • Loading branch information
vtjnash committed Jan 9, 2024
1 parent 486f434 commit bd3eab6
Show file tree
Hide file tree
Showing 10 changed files with 126 additions and 78 deletions.
6 changes: 3 additions & 3 deletions base/namedtuple.jl
Original file line number Diff line number Diff line change
Expand Up @@ -196,9 +196,8 @@ function convert(::Type{NT}, nt::NamedTuple{names}) where {names, NT<:NamedTuple
end

if nameof(@__MODULE__) === :Base
Tuple(nt::NamedTuple) = (nt...,)
(::Type{T})(nt::NamedTuple) where {T <: Tuple} = (t = Tuple(nt); t isa T ? t : convert(T, t)::T)
end
Tuple(nt::NamedTuple) = (nt...,)
(::Type{T})(nt::NamedTuple) where {T <: Tuple} = (t = Tuple(nt); t isa T ? t : convert(T, t)::T)

function show(io::IO, t::NamedTuple)
n = nfields(t)
Expand Down Expand Up @@ -232,6 +231,7 @@ function show(io::IO, t::NamedTuple)
print(io, ")")
end
end
end

eltype(::Type{T}) where T<:NamedTuple = nteltype(T)
nteltype(::Type) = Any
Expand Down
2 changes: 1 addition & 1 deletion base/show.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1792,7 +1792,7 @@ function show_sym(io::IO, sym::Symbol; allow_macroname=false)
print(io, '@')
show_sym(io, Symbol(sym_str[2:end]))
else
print(io, "var", repr(string(sym)))
print(io, "var", repr(string(sym))) # TODO: this is not quite right, since repr uses String escaping rules, and Symbol uses raw string rules
end
end

Expand Down
10 changes: 5 additions & 5 deletions src/ast.c
Original file line number Diff line number Diff line change
Expand Up @@ -990,7 +990,7 @@ JL_DLLEXPORT jl_value_t *jl_copy_ast(jl_value_t *expr)
return expr;
}

JL_DLLEXPORT int jl_is_operator(char *sym)
JL_DLLEXPORT int jl_is_operator(const char *sym)
{
jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
fl_context_t *fl_ctx = &ctx->fl;
Expand All @@ -999,7 +999,7 @@ JL_DLLEXPORT int jl_is_operator(char *sym)
return res;
}

JL_DLLEXPORT int jl_is_unary_operator(char *sym)
JL_DLLEXPORT int jl_is_unary_operator(const char *sym)
{
jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
fl_context_t *fl_ctx = &ctx->fl;
Expand All @@ -1008,7 +1008,7 @@ JL_DLLEXPORT int jl_is_unary_operator(char *sym)
return res;
}

JL_DLLEXPORT int jl_is_unary_and_binary_operator(char *sym)
JL_DLLEXPORT int jl_is_unary_and_binary_operator(const char *sym)
{
jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
fl_context_t *fl_ctx = &ctx->fl;
Expand All @@ -1017,7 +1017,7 @@ JL_DLLEXPORT int jl_is_unary_and_binary_operator(char *sym)
return res;
}

JL_DLLEXPORT int jl_is_syntactic_operator(char *sym)
JL_DLLEXPORT int jl_is_syntactic_operator(const char *sym)
{
jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
fl_context_t *fl_ctx = &ctx->fl;
Expand All @@ -1026,7 +1026,7 @@ JL_DLLEXPORT int jl_is_syntactic_operator(char *sym)
return res;
}

JL_DLLEXPORT int jl_operator_precedence(char *sym)
JL_DLLEXPORT int jl_operator_precedence(const char *sym)
{
jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
fl_context_t *fl_ctx = &ctx->fl;
Expand Down
2 changes: 1 addition & 1 deletion src/flisp/print.c
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,7 @@ static void print_string(fl_context_t *fl_ctx, ios_t *f, char *str, size_t sz)
}
else {
while (i < sz) {
size_t n = u8_escape(buf, sizeof(buf), str, &i, sz, 1, 0);
size_t n = u8_escape(buf, sizeof(buf), str, &i, sz, "\"", 0);
outsn(fl_ctx, buf, f, n-1);
}
}
Expand Down
10 changes: 5 additions & 5 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -2125,11 +2125,11 @@ JL_DLLEXPORT jl_array_t *jl_uncompress_argnames(jl_value_t *syms);
JL_DLLEXPORT jl_value_t *jl_uncompress_argname_n(jl_value_t *syms, size_t i);


JL_DLLEXPORT int jl_is_operator(char *sym);
JL_DLLEXPORT int jl_is_unary_operator(char *sym);
JL_DLLEXPORT int jl_is_unary_and_binary_operator(char *sym);
JL_DLLEXPORT int jl_is_syntactic_operator(char *sym);
JL_DLLEXPORT int jl_operator_precedence(char *sym);
JL_DLLEXPORT int jl_is_operator(const char *sym);
JL_DLLEXPORT int jl_is_unary_operator(const char *sym);
JL_DLLEXPORT int jl_is_unary_and_binary_operator(const char *sym);
JL_DLLEXPORT int jl_is_syntactic_operator(const char *sym);
JL_DLLEXPORT int jl_operator_precedence(const char *sym);

STATIC_INLINE int jl_vinfo_sa(uint8_t vi)
{
Expand Down
137 changes: 90 additions & 47 deletions src/rtutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,7 @@ static size_t jl_show_svec(JL_STREAM *out, jl_svec_t *t, const char *head, const
JL_DLLEXPORT int jl_id_start_char(uint32_t wc) JL_NOTSAFEPOINT;
JL_DLLEXPORT int jl_id_char(uint32_t wc) JL_NOTSAFEPOINT;

JL_DLLEXPORT int jl_is_identifier(char *str) JL_NOTSAFEPOINT
JL_DLLEXPORT int jl_is_identifier(const char *str) JL_NOTSAFEPOINT
{
size_t i = 0;
uint32_t wc = u8_nextchar(str, &i);
Expand Down Expand Up @@ -674,22 +674,64 @@ static int is_globfunction(jl_value_t *v, jl_datatype_t *dv, jl_sym_t **globname
return 0;
}

static size_t jl_static_show_x_sym_escaped(JL_STREAM *out, jl_sym_t *name) JL_NOTSAFEPOINT
static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len, int wrap) JL_NOTSAFEPOINT
{
size_t n = 0;

char *sn = jl_symbol_name(name);
int hidden = 0;
if (!(jl_is_identifier(sn) || jl_is_operator(sn))) {
hidden = 1;
if (wrap)
n += jl_printf(out, "\"");
if (!u8_isvalid(str, len)) {
// alternate print algorithm that preserves data if it's not UTF-8
static const char hexdig[] = "0123456789abcdef";
for (size_t i = 0; i < len; i++) {
uint8_t c = str[i];
if (c == '\\' || c == '"' || c == '$')
n += jl_printf(out, "\\%c", c);
else if (c >= 32 && c < 0x7f)
n += jl_printf(out, "%c", c);
else
n += jl_printf(out, "\\x%c%c", hexdig[c>>4], hexdig[c&0xf]);
}
}

if (hidden) {
n += jl_printf(out, "var\"");
else {
int special = 0;
for (size_t i = 0; i < len; i++) {
uint8_t c = str[i];
if (c < 32 || c == 0x7f || c == '\\' || c == '"' || c == '$') {
special = 1;
break;
}
}
if (!special) {
jl_uv_puts(out, str, len);
n += len;
}
else {
char buf[512];
size_t i = 0;
while (i < len) {
size_t r = u8_escape(buf, sizeof(buf), str, &i, len, "\"$", 0);
jl_uv_puts(out, buf, r - 1);
n += r - 1;
}
}
}
n += jl_printf(out, "%s", sn);
if (hidden) {
if (wrap)
n += jl_printf(out, "\"");
return n;
}

static size_t jl_static_show_symbol(JL_STREAM *out, jl_sym_t *name) JL_NOTSAFEPOINT
{
size_t n = 0;
const char *sn = jl_symbol_name(name);
int quoted = !jl_is_identifier(sn) && !jl_is_operator(sn);
if (quoted) {
n += jl_printf(out, "var");
// TODO: this is not quite right, since repr uses String escaping rules, and Symbol uses raw string rules
n += jl_static_show_string(out, sn, strlen(sn), 1);
}
else {
n += jl_printf(out, "%s", sn);
}
return n;
}
Expand Down Expand Up @@ -807,11 +849,6 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
// Types are printed as a fully qualified name, with parameters, e.g.
// `Base.Set{Int}`, and function types are printed as e.g. `typeof(Main.f)`
jl_datatype_t *dv = (jl_datatype_t*)v;
jl_sym_t *globname;
int globfunc = is_globname_binding(v, dv) && is_globfunction(v, dv, &globname);
jl_sym_t *sym = globfunc ? globname : dv->name->name;
char *sn = jl_symbol_name(sym);
size_t quote = 0;
if (dv->name == jl_tuple_typename) {
if (dv == jl_tuple_type)
return jl_printf(out, "Tuple");
Expand Down Expand Up @@ -844,8 +881,13 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
return n;
}
if (ctx.quiet) {
return jl_printf(out, "%s", jl_symbol_name(dv->name->name));
return jl_static_show_symbol(out, dv->name->name);
}
jl_sym_t *globname;
int globfunc = is_globname_binding(v, dv) && is_globfunction(v, dv, &globname);
jl_sym_t *sym = globfunc ? globname : dv->name->name;
char *sn = jl_symbol_name(sym);
size_t quote = 0;
if (globfunc) {
n += jl_printf(out, "typeof(");
}
Expand All @@ -858,7 +900,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
quote = 1;
}
}
n += jl_static_show_x_sym_escaped(out, sym);
n += jl_static_show_symbol(out, sym);
if (globfunc) {
n += jl_printf(out, ")");
if (quote) {
Expand Down Expand Up @@ -927,9 +969,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
n += jl_printf(out, "nothing");
}
else if (vt == jl_string_type) {
n += jl_printf(out, "\"");
jl_uv_puts(out, jl_string_data(v), jl_string_len(v)); n += jl_string_len(v);
n += jl_printf(out, "\"");
n += jl_static_show_string(out, jl_string_data(v), jl_string_len(v), 1);
}
else if (v == jl_bottom_type) {
n += jl_printf(out, "Union{}");
Expand Down Expand Up @@ -978,7 +1018,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
n += jl_printf(out, ")");
n += jl_printf(out, "<:");
}
n += jl_static_show_x_sym_escaped(out, var->name);
n += jl_static_show_symbol(out, var->name);
if (showbounds && (ub != (jl_value_t*)jl_any_type || lb != jl_bottom_type)) {
// show type-var upper bound if it is defined, or if we showed the lower bound
int ua = jl_is_unionall(ub);
Expand All @@ -996,27 +1036,24 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
n += jl_static_show_x(out, (jl_value_t*)m->parent, depth, ctx);
n += jl_printf(out, ".");
}
n += jl_printf(out, "%s", jl_symbol_name(m->name));
n += jl_static_show_symbol(out, m->name);
}
else if (vt == jl_symbol_type) {
char *sn = jl_symbol_name((jl_sym_t*)v);
int quoted = !jl_is_identifier(sn) && jl_operator_precedence(sn) == 0;
if (quoted)
n += jl_printf(out, "Symbol(\"");
else
n += jl_printf(out, ":");
n += jl_printf(out, "%s", sn);
if (quoted)
n += jl_printf(out, "\")");
n += jl_printf(out, ":");
n += jl_static_show_symbol(out, (jl_sym_t*)v);
}
else if (vt == jl_ssavalue_type) {
n += jl_printf(out, "SSAValue(%" PRIuPTR ")",
(uintptr_t)((jl_ssavalue_t*)v)->id);
}
else if (vt == jl_globalref_type) {
n += jl_static_show_x(out, (jl_value_t*)jl_globalref_mod(v), depth, ctx);
char *name = jl_symbol_name(jl_globalref_name(v));
n += jl_printf(out, jl_is_identifier(name) ? ".%s" : ".:(%s)", name);
jl_sym_t *name = jl_globalref_name(v);
n += jl_printf(out, ".");
if (jl_is_operator(jl_symbol_name(name)))
n += jl_printf(out, ":(%s)", jl_symbol_name(name));
else
n += jl_static_show_symbol(out, name);
}
else if (vt == jl_gotonode_type) {
n += jl_printf(out, "goto %" PRIuPTR, jl_gotonode_label(v));
Expand Down Expand Up @@ -1050,17 +1087,17 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
else if (vt == jl_expr_type) {
jl_expr_t *e = (jl_expr_t*)v;
if (e->head == jl_assign_sym && jl_array_nrows(e->args) == 2) {
n += jl_static_show_x(out, jl_exprarg(e,0), depth, ctx);
n += jl_static_show_x(out, jl_exprarg(e, 0), depth, ctx);
n += jl_printf(out, " = ");
n += jl_static_show_x(out, jl_exprarg(e,1), depth, ctx);
n += jl_static_show_x(out, jl_exprarg(e, 1), depth, ctx);
}
else {
char sep = ' ';
n += jl_printf(out, "Expr(:%s", jl_symbol_name(e->head));
n += jl_printf(out, "Expr(");
n += jl_static_show_x(out, (jl_value_t*)e->head, depth, ctx);
size_t i, len = jl_array_nrows(e->args);
for (i = 0; i < len; i++) {
n += jl_printf(out, ",%c", sep);
n += jl_static_show_x(out, jl_exprarg(e,i), depth, ctx);
n += jl_printf(out, ", ");
n += jl_static_show_x(out, jl_exprarg(e, i), depth, ctx);
}
n += jl_printf(out, ")");
}
Expand Down Expand Up @@ -1195,7 +1232,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
}
}

n += jl_static_show_x_sym_escaped(out, sym);
n += jl_static_show_symbol(out, sym);

if (globfunc) {
if (quote) {
Expand Down Expand Up @@ -1231,8 +1268,14 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
jl_value_t *names = isnamedtuple ? jl_tparam0(vt) : (jl_value_t*)jl_field_names(vt);
for (; i < tlen; i++) {
if (!istuple) {
jl_value_t *fname = isnamedtuple ? jl_fieldref_noalloc(names, i) : jl_svecref(names, i);
n += jl_printf(out, "%s=", jl_symbol_name((jl_sym_t*)fname));
jl_sym_t *fname = (jl_sym_t*)(isnamedtuple ? jl_fieldref_noalloc(names, i) : jl_svecref(names, i));
if (fname == NULL || !jl_is_symbol(fname))
n += jl_static_show_x(out, (jl_value_t*)fname, depth, ctx);
else if (jl_is_operator(jl_symbol_name(fname)))
n += jl_printf(out, "(%s)", jl_symbol_name(fname));
else
n += jl_static_show_symbol(out, fname);
n += jl_printf(out, "=");
}
size_t offs = jl_field_offset(vt, i);
char *fld_ptr = (char*)v + offs;
Expand Down Expand Up @@ -1367,7 +1410,7 @@ size_t jl_static_show_func_sig_(JL_STREAM *s, jl_value_t *type, jl_static_show_c
if ((jl_nparams(ftype) == 0 || ftype == ((jl_datatype_t*)ftype)->name->wrapper) &&
((jl_datatype_t*)ftype)->name->mt != jl_type_type_mt &&
((jl_datatype_t*)ftype)->name->mt != jl_nonfunction_mt) {
n += jl_printf(s, "%s", jl_symbol_name(((jl_datatype_t*)ftype)->name->mt->name));
n += jl_static_show_symbol(s, ((jl_datatype_t*)ftype)->name->mt->name);
}
else {
n += jl_printf(s, "(::");
Expand Down Expand Up @@ -1466,10 +1509,10 @@ void jl_log(int level, jl_value_t *module, jl_value_t *group, jl_value_t *id,
}
jl_printf(str, "\n@ ");
if (jl_is_string(file)) {
jl_uv_puts(str, jl_string_data(file), jl_string_len(file));
jl_static_show_string(str, jl_string_data(file), jl_string_len(file), 0);
}
else if (jl_is_symbol(file)) {
jl_printf(str, "%s", jl_symbol_name((jl_sym_t*)file));
jl_static_show_string(str, jl_symbol_name((jl_sym_t*)file), strlen(jl_symbol_name((jl_sym_t*)file)), 0);
}
jl_printf(str, ":");
jl_static_show(str, line);
Expand Down
15 changes: 6 additions & 9 deletions src/support/utf8.c
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ int u8_escape_wchar(char *buf, size_t sz, uint32_t ch)
}

size_t u8_escape(char *buf, size_t sz, const char *src, size_t *pi, size_t end,
int escape_quotes, int ascii)
const char *escapes, int ascii)
{
size_t i = *pi, i0;
uint32_t ch;
Expand All @@ -420,12 +420,9 @@ size_t u8_escape(char *buf, size_t sz, const char *src, size_t *pi, size_t end,

while (i<end && buf<blim) {
// sz-11: leaves room for longest escape sequence
if (escape_quotes && src[i] == '"') {
buf += buf_put2c(buf, "\\\"");
i++;
}
else if (src[i] == '\\') {
buf += buf_put2c(buf, "\\\\");
if ((src[i] == '\\') || (escapes && strchr(escapes, src[i]))) {
*buf++ = '\\';
*buf++ = src[i];
i++;
}
else {
Expand Down Expand Up @@ -571,8 +568,8 @@ int u8_isvalid(const char *str, size_t len)
return 0;
// Check for surrogate chars
if (byt == 0xed && *pnt > 0x9f) return 0;
// Check for overlong encoding
if (byt == 0xe0 && *pnt < 0xa0) return 0;
// Check for overlong encoding
if (byt == 0xe0 && *pnt < 0xa0) return 0;
pnt += 2;
} else { // 4-byte sequence
// Must have 3 valid continuation characters
Expand Down
Loading

0 comments on commit bd3eab6

Please sign in to comment.