Skip to content

Commit

Permalink
AST: parse a.b as (. a b)
Browse files Browse the repository at this point in the history
I was working a bit on macro expansion - particularly `quote`
(quasiquote) expansion with `$` interpolations - and I've found that
it's weird and inconvenient that we parse `a.b` into `(. a (quote b))`.

Specifically, the part that's weird here is that we emit `(quote b)` for
the field name even though this is "not quote syntax": this should not
yield a syntax literal during lowering, and is thus a semantic mismatch
with actual quote syntax of the form `:(a + b)` or `quote a+b end`.

* Why is this a problem? It means we need special rules to distinguish
  actual syntax literals from field names.
* But can we really change this? Surely this AST form had a purpose?
  Yes! A long time ago Julia supported `a.(b)` syntax to mean
  `getfield(a, b)`, which would naturally have been parsed as `(. a b)`.
  However this was deprecated as part of adding broadcast syntax in
  JuliaLang/julia#15032

Here we simplify by parsing `a.b` as `(. a b)` instead, with the second
argument implied to be a field name.
  • Loading branch information
c42f committed Jul 8, 2023
1 parent 5aad812 commit 50a4403
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 74 deletions.
1 change: 1 addition & 0 deletions docs/src/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ class of tokenization errors and lets the parser deal with them.

### Improvements for AST inconsistencies

* Field access syntax like `a.b` is parsed as `(. a b)` rather than `(. a (quote b))` to avoid the inconsistency between this and actual quoted syntax literals like `:(b)` and `quote b end` ([#342](https://github.com/JuliaLang/JuliaSyntax.jl/issues/324))
* Dotted call syntax like `f.(a,b)` and `a .+ b` has been made consistent with the `K"dotcall"` head (#90)
* Standalone dotted operators are always parsed as `(. op)`. For example `.*(x,y)` is parsed as `(call (. *) x y)` (#240)
* The `K"="` kind is used for keyword syntax rather than `kw`, to avoid various inconsistencies and ambiguities (#103)
Expand Down
15 changes: 11 additions & 4 deletions src/expr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,17 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,
args[1] = Symbol(".", args[1])
end
end
elseif k == K"." && length(args) == 1 && is_operator(childheads[1])
# Hack: Here we preserve the head of the operator to determine whether
# we need to coalesce it with the dot into a single symbol later on.
args[1] = (childheads[1], args[1])
elseif k == K"."
if length(args) == 2
a2 = args[2]
if !@isexpr(a2, :quote) && !(a2 isa QuoteNode)
args[2] = QuoteNode(a2)
end
elseif length(args) == 1 && is_operator(childheads[1])
# Hack: Here we preserve the head of the operator to determine whether
# we need to coalesce it with the dot into a single symbol later on.
args[1] = (childheads[1], args[1])
end
elseif k == K"ref" || k == K"curly"
# Move parameters blocks to args[2]
_reorder_parameters!(args, 2)
Expand Down
66 changes: 29 additions & 37 deletions src/parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1437,7 +1437,7 @@ end
# * Adjoint suffix like a'
# * String macros like a"str" b"""str""" c`str` d```str```
#
# f(a).g(b) ==> (call (. (call f a) (quote g)) b)
# f(a).g(b) ==> (call (. (call f a) g) b)
#
# flisp: parse-call-chain, parse-call-with-initial-ex
function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
Expand All @@ -1448,7 +1448,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
end
# source range of the @-prefixed part of a macro
macro_atname_range = nothing
# $A.@x ==> (macrocall (. ($ A) (quote @x)))
# $A.@x ==> (macrocall (. ($ A) @x))
maybe_strmac = true
# We record the last component of chains of dot-separated identifiers so we
# know which identifier was the macro name.
Expand All @@ -1470,22 +1470,22 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
# [@foo x] ==> (vect (macrocall @foo x))
# [@foo] ==> (vect (macrocall @foo))
# @var"#" a ==> (macrocall (var @#) a)
# A.@x y ==> (macrocall (. A (quote @x)) y)
# A.@var"#" a ==> (macrocall (. A (quote (var @#))) a)
# A.@x y ==> (macrocall (. A @x) y)
# A.@var"#" a ==> (macrocall (. A (var @#)) a)
# @+x y ==> (macrocall @+ x y)
# [email protected] ==> (macrocall (. A (quote @.)) x)
# [email protected] ==> (macrocall (. A @.) x)
fix_macro_name_kind!(ps, macro_name_position)
let ps = with_space_sensitive(ps)
# Space separated macro arguments
# A.@foo a b ==> (macrocall (. A (quote @foo)) a b)
# @A.foo a b ==> (macrocall (. A (quote @foo)) a b)
# A.@foo a b ==> (macrocall (. A @foo) a b)
# @A.foo a b ==> (macrocall (. A @foo) a b)
n_args = parse_space_separated_exprs(ps)
is_doc_macro = peek_behind(ps, macro_name_position).orig_kind == K"doc"
if is_doc_macro && n_args == 1
# Parse extended @doc args on next line
# @doc x\ny ==> (macrocall @doc x y)
# A.@doc x\ny ==> (macrocall (. A (quote @doc)) doc x y)
# @A.doc x\ny ==> (macrocall (. A (quote @doc)) doc x y)
# A.@doc x\ny ==> (macrocall (. A @doc) doc x y)
# @A.doc x\ny ==> (macrocall (. A @doc) doc x y)
# @doc x y\nz ==> (macrocall @doc x y)
#
# Excluded cases
Expand Down Expand Up @@ -1518,8 +1518,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
end
if is_macrocall
# @x(a, b) ==> (macrocall-p @x a b)
# A.@x(y) ==> (macrocall-p (. A (quote @x)) y)
# A.@x(y).z ==> (. (macrocall-p (. A (quote @x)) y) (quote z))
# A.@x(y) ==> (macrocall-p (. A @x) y)
# A.@x(y).z ==> (. (macrocall-p (. A @x) y) z)
fix_macro_name_kind!(ps, macro_name_position)
is_macrocall = false
macro_atname_range = nothing
Expand All @@ -1535,8 +1535,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
# @S[a,b] ==> (macrocall @S (vect a b))
# @S[a b] ==> (macrocall @S (hcat a b))
# @S[a; b] ==> (macrocall @S (vcat a b))
# A.@S[a] ==> (macrocall (. A (quote @S)) (vect a))
# @S[a].b ==> (. (macrocall @S (vect a)) (quote b))
# A.@S[a] ==> (macrocall (. A @S) (vect a))
# @S[a].b ==> (. (macrocall @S (vect a)) b)
#v1.7: @S[a ;; b] ==> (macrocall @S (ncat-2 a b))
#v1.6: @S[a ;; b] ==> (macrocall @S (error (ncat-2 a b)))
fix_macro_name_kind!(ps, macro_name_position)
Expand Down Expand Up @@ -1565,14 +1565,14 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
check_ncat_compat(ps, mark, ckind)
end
elseif k == K"."
# x .y ==> (. x (error-t) (quote y))
# x .y ==> (. x (error-t) y)
bump_disallowed_space(ps)
emark = position(ps)
if !isnothing(macro_atname_range)
# Allow `@` in macrocall only in first and last position
# A.B.@x ==> (macrocall (. (. A (quote B)) (quote @x)))
# @A.B.x ==> (macrocall (. (. A (quote B)) (quote @x)))
# [email protected] ==> (macrocall (. (. A (error-t) B) (quote @x)))
# A.B.@x ==> (macrocall (. (. A B) @x))
# @A.B.x ==> (macrocall (. (. A B) @x))
# [email protected] ==> (macrocall (. (. A B (error-t)) @x))
emit_diagnostic(ps, macro_atname_range...,
error="`@` must appear on first or last macro name component")
bump(ps, TRIVIA_FLAG, error="Unexpected `.` after macro name")
Expand Down Expand Up @@ -1603,28 +1603,23 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
emit(ps, m, K"quote", COLON_QUOTE)
emit(ps, mark, K".")
elseif k == K"$"
# f.$x ==> (. f (inert ($ x)))
# f.$(x+y) ==> (. f (inert ($ (call + x y))))
# A.$B.@x ==> (macrocall (. (. A (inert ($ B))) (quote @x)))
# @A.$x a ==> (macrocall (. A (inert (error x))) a)
# f.$x ==> (. f ($ x))
# f.$(x+y) ==> (. f ($ (call + x y)))
# A.$B.@x ==> (macrocall (. (. A ($ B)) @x))
# @A.$x a ==> (macrocall (. A (error x)) a)
m = position(ps)
bump(ps, TRIVIA_FLAG)
parse_atom(ps)
emit(ps, m, K"$")
macro_name_position = position(ps)
# We need `inert` rather than `quote` here for subtle reasons:
# We need the expression expander to "see through" the quote
# around the `$x` in `:(f.$x)`, so that the `$x` is expanded
# even though it's double quoted.
emit(ps, m, K"inert")
emit(ps, mark, K".")
elseif k == K"@"
# A macro call after some prefix A has been consumed
# A.@x ==> (macrocall (. A (quote @x)))
# A.@x a ==> (macrocall (. A (quote @x)) a)
# A.@x ==> (macrocall (. A @x))
# A.@x a ==> (macrocall (. A @x) a)
m = position(ps)
if is_macrocall
# @A.B.@x a ==> (macrocall (. (. A (quote B)) (quote (error-t) @x)) a)
# @A.B.@x a ==> (macrocall (. (. A B) (error-t) @x) a)
bump(ps, TRIVIA_FLAG, error="repeated `@` in macro module path")
else
bump(ps, TRIVIA_FLAG)
Expand All @@ -1633,7 +1628,6 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
parse_macro_name(ps)
macro_name_position = position(ps)
macro_atname_range = (m, position(ps))
emit(ps, m, K"quote")
emit(ps, mark, K".")
elseif k == K"'"
# TODO: Reclaim dotted postfix operators :-)
Expand All @@ -1643,12 +1637,10 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
error="the .' operator for transpose is discontinued")
else
# Field/property syntax
# f.x.y ==> (. (. f (quote x)) (quote y))
m = position(ps)
# f.x.y ==> (. (. f x) y)
parse_atom(ps, false)
macro_name_position = position(ps)
maybe_strmac_1 = true
emit(ps, m, K"quote")
emit(ps, mark, K".")
end
elseif k == K"'" && !preceding_whitespace(t)
Expand All @@ -1665,8 +1657,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
parse_call_arglist(ps, K"}")
if is_macrocall
# @S{a,b} ==> (macrocall S (braces a b))
# A.@S{a} ==> (macrocall (. A (quote @S)) (braces a))
# @S{a}.b ==> (. (macrocall @S (braces a)) (quote b))
# A.@S{a} ==> (macrocall (. A @S) (braces a))
# @S{a}.b ==> (. (macrocall @S (braces a)) b)
fix_macro_name_kind!(ps, macro_name_position)
emit(ps, m, K"braces")
emit(ps, mark, K"macrocall")
Expand Down Expand Up @@ -2118,7 +2110,7 @@ function parse_function_signature(ps::ParseState, is_function::Bool)
# function ()(x) end ==> (function (call (tuple-p) x) (block))
emit(ps, mark, K"tuple", PARENS_FLAG)
else
# function (A).f() end ==> (function (call (. (parens A) (quote f))) (block))
# function (A).f() end ==> (function (call (. (parens A) f)) (block))
# function (:)() end ==> (function (call (parens :)) (block))
# function (x::T)() end ==> (function (call (parens (::-i x T))) (block))
# function (::T)() end ==> (function (call (parens (::-pre T))) (block))
Expand Down Expand Up @@ -2147,7 +2139,7 @@ function parse_function_signature(ps::ParseState, is_function::Bool)
# Parse function argument list
# function f(x,y) end ==> (function (call f x y) (block))
# function f{T}() end ==> (function (call (curly f T)) (block))
# function A.f() end ==> (function (call (. A (quote f))) (block))
# function A.f() end ==> (function (call (. A f)) (block))
parse_call_chain(ps, mark)
if peek_behind(ps).kind != K"call"
# function f body end ==> (function (error f) (block body))
Expand Down
13 changes: 10 additions & 3 deletions test/expr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,6 @@
# Compatibility hack for VERSION >= v"1.4"
# https://github.com/JuliaLang/julia/pull/34077
@test parseatom(":true") == Expr(:quote, true)

# Handling of K"inert"
@test parsestmt("a.\$b") == Expr(:., :a, QuoteNode(Expr(:$, :b)))
end

@testset "Line numbers" begin
Expand Down Expand Up @@ -386,6 +383,16 @@
Expr(:tuple, Expr(:parameters, Expr(:kw, :a, 1)))
end

@testset "Field access syntax" begin
@test parsestmt("a.b") == Expr(:., :a, QuoteNode(:b))
@test parsestmt("a.\$b") == Expr(:., :a, QuoteNode(Expr(:$, :b)))
@test parsestmt("a.:b") == Expr(:., :a, QuoteNode(:b))
@test parsestmt("a.@b x") == Expr(:macrocall,
Expr(:., :a, QuoteNode(Symbol("@b"))),
LineNumberNode(1),
:x)
end

@testset "dotcall / dotted operators" begin
@test parsestmt("f.(x,y)") == Expr(:., :f, Expr(:tuple, :x, :y))
@test parsestmt("f.(x=1)") == Expr(:., :f, Expr(:tuple, Expr(:kw, :x, 1)))
Expand Down
60 changes: 30 additions & 30 deletions test/parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,8 @@ tests = [
"\$f(x)" => "(call (\$ f) x)"
".&(x,y)" => "(call (. &) x y)"
# parse_call_chain
"f(a).g(b)" => "(call (. (call f a) (quote g)) b)"
"\$A.@x" => "(macrocall (. (\$ A) (quote @x)))"
"f(a).g(b)" => "(call (. (call f a) g) b)"
"\$A.@x" => "(macrocall (. (\$ A) @x))"

# non-errors in space sensitive contexts
"[f (x)]" => "(hcat f (parens x))"
Expand All @@ -322,25 +322,25 @@ tests = [
"@foo a b" => "(macrocall @foo a b)"
"@foo (x)" => "(macrocall @foo (parens x))"
"@foo (x,y)" => "(macrocall @foo (tuple-p x y))"
"A.@foo a b" => "(macrocall (. A (quote @foo)) a b)"
"@A.foo a b" => "(macrocall (. A (quote @foo)) a b)"
"A.@foo a b" => "(macrocall (. A @foo) a b)"
"@A.foo a b" => "(macrocall (. A @foo) a b)"
"[@foo x]" => "(vect (macrocall @foo x))"
"[@foo]" => "(vect (macrocall @foo))"
"@var\"#\" a" => "(macrocall (var @#) a)"
"@(A) x" => "(macrocall (parens @A) x)"
"A.@x y" => "(macrocall (. A (quote @x)) y)"
"A.@var\"#\" a"=> "(macrocall (. A (quote (var @#))) a)"
"A.@x y" => "(macrocall (. A @x) y)"
"A.@var\"#\" a"=> "(macrocall (. A (var @#)) a)"
"@+x y" => "(macrocall @+ x y)"
"[email protected]" => "(macrocall (. A (quote @.)) x)"
"[email protected]" => "(macrocall (. A @.) x)"
# Macro names
"@! x" => "(macrocall @! x)"
"@.. x" => "(macrocall @.. x)"
"@\$ y" => "(macrocall @\$ y)"
"@[x] y z" => "(macrocall (error (vect x)) y z)"
# Special @doc parsing rules
"@doc x\ny" => "(macrocall @doc x y)"
"A.@doc x\ny" => "(macrocall (. A (quote @doc)) x y)"
"@A.doc x\ny" => "(macrocall (. A (quote @doc)) x y)"
"A.@doc x\ny" => "(macrocall (. A @doc) x y)"
"@A.doc x\ny" => "(macrocall (. A @doc) x y)"
"@doc x y\nz" => "(macrocall @doc x y)"
"@doc x\n\ny" => "(macrocall @doc x)"
"@doc x\nend" => "(macrocall @doc x)"
Expand All @@ -352,8 +352,8 @@ tests = [
"(a=1)()" => "(call (parens (= a 1)))"
"f (a)" => "(call f (error-t) a)"
"@x(a, b)" => "(macrocall-p @x a b)"
"A.@x(y)" => "(macrocall-p (. A (quote @x)) y)"
"A.@x(y).z" => "(. (macrocall-p (. A (quote @x)) y) (quote z))"
"A.@x(y)" => "(macrocall-p (. A @x) y)"
"A.@x(y).z" => "(. (macrocall-p (. A @x) y) z)"
# do
"f() do\nend" => "(do (call f) (tuple) (block))"
"f() do ; body end" => "(do (call f) (tuple) (block body))"
Expand All @@ -364,8 +364,8 @@ tests = [
"@S[a,b]" => "(macrocall @S (vect a b))"
"@S[a b]" => "(macrocall @S (hcat a b))"
"@S[a; b]" => "(macrocall @S (vcat a b))"
"A.@S[a]" => "(macrocall (. A (quote @S)) (vect a))"
"@S[a].b" => "(. (macrocall @S (vect a)) (quote b))"
"A.@S[a]" => "(macrocall (. A @S) (vect a))"
"@S[a].b" => "(. (macrocall @S (vect a)) b)"
((v=v"1.7",), "@S[a ;; b]") => "(macrocall @S (ncat-2 a b))"
((v=v"1.6",), "@S[a ;; b]") => "(macrocall @S (error (ncat-2 a b)))"
"a[i]" => "(ref a i)"
Expand All @@ -383,9 +383,9 @@ tests = [

# Dotted forms
# Allow `@` in macrocall only in first and last position
"A.B.@x" => "(macrocall (. (. A (quote B)) (quote @x)))"
"@A.B.x" => "(macrocall (. (. A (quote B)) (quote @x)))"
"[email protected]" => "(macrocall (. (. A (quote B)) (error-t) (quote @x)))"
"A.B.@x" => "(macrocall (. (. A B) @x))"
"@A.B.x" => "(macrocall (. (. A B) @x))"
"[email protected]" => "(macrocall (. (. A B) (error-t) @x))"
"@M.(x)" => "(macrocall (dotcall @M (error-t) x))"
"f.(a,b)" => "(dotcall f a b)"
"f.(a=1; b=2)" => "(dotcall f (= a 1) (parameters (= b 2)))"
Expand All @@ -395,27 +395,27 @@ tests = [
"A.:+" => "(. A (quote-: +))"
"A.:.+" => "(. A (quote-: (. +)))"
"A.: +" => "(. A (quote-: (error-t) +))"
"f.\$x" => "(. f (inert (\$ x)))"
"f.\$(x+y)" => "(. f (inert (\$ (parens (call-i x + y)))))"
"A.\$B.@x" => "(macrocall (. (. A (inert (\$ B))) (quote @x)))"
"@A.\$x a" => "(macrocall (. A (inert (error x))) a)"
"A.@x" => "(macrocall (. A (quote @x)))"
"A.@x a" => "(macrocall (. A (quote @x)) a)"
"@A.B.@x a" => "(macrocall (. (. A (quote B)) (quote (error-t) @x)) a)"
"f.\$x" => "(. f (\$ x))"
"f.\$(x+y)" => "(. f (\$ (parens (call-i x + y))))"
"A.\$B.@x" => "(macrocall (. (. A (\$ B)) @x))"
"@A.\$x a" => "(macrocall (. A (error x)) a)"
"A.@x" => "(macrocall (. A @x))"
"A.@x a" => "(macrocall (. A @x) a)"
"@A.B.@x a" => "(macrocall (. (. A B) (error-t) @x) a)"
# .' discontinued
"f.'" => "(wrapper f (error-t '))"
# Field/property syntax
"f.x.y" => "(. (. f (quote x)) (quote y))"
"x .y" => "(. x (error-t) (quote y))"
"f.x.y" => "(. (. f x) y)"
"x .y" => "(. x (error-t) y)"
# Adjoint
"f'" => "(call-post f ')"
"f'ᵀ" => "(call-post f 'ᵀ)"
# Curly calls
"S {a}" => "(curly S (error-t) a)"
"A.@S{a}" => "(macrocall (. A (quote @S)) (braces a))"
"A.@S{a}" => "(macrocall (. A @S) (braces a))"
"@S{a,b}" => "(macrocall @S (braces a b))"
"A.@S{a}" => "(macrocall (. A (quote @S)) (braces a))"
"@S{a}.b" => "(. (macrocall @S (braces a)) (quote b))"
"A.@S{a}" => "(macrocall (. A @S) (braces a))"
"@S{a}.b" => "(. (macrocall @S (braces a)) b)"
"S{a,b}" => "(curly S a b)"
# String macros
"x\"str\"" => """(macrocall @x_str (string-r "str"))"""
Expand Down Expand Up @@ -554,7 +554,7 @@ tests = [
"function (x=1) end" => "(function (tuple-p (= x 1)) (block))"
"function (;x=1) end" => "(function (tuple-p (parameters (= x 1))) (block))"
"function ()(x) end" => "(function (call (tuple-p) x) (block))"
"function (A).f() end" => "(function (call (. (parens A) (quote f))) (block))"
"function (A).f() end" => "(function (call (. (parens A) f)) (block))"
"function (:)() end" => "(function (call (parens :)) (block))"
"function (x::T)() end"=> "(function (call (parens (::-i x T))) (block))"
"function (::g(x))() end" => "(function (call (parens (::-pre (call g x)))) (block))"
Expand All @@ -575,7 +575,7 @@ tests = [
# Function argument list
"function f(x,y) end" => "(function (call f x y) (block))"
"function f{T}() end" => "(function (call (curly f T)) (block))"
"function A.f() end" => "(function (call (. A (quote f))) (block))"
"function A.f() end" => "(function (call (. A f)) (block))"
"function f body end" => "(function (error f) (block body))"
"function f()::T end" => "(function (::-i (call f) T) (block))"
"function f()::g(T) end" => "(function (::-i (call f) (call g T)) (block))"
Expand Down

0 comments on commit 50a4403

Please sign in to comment.