Skip to content

Commit

Permalink
feat[lex]: Implement comments, line continuouses.
Browse files Browse the repository at this point in the history
  • Loading branch information
watcol committed Nov 22, 2021
1 parent 7b62d01 commit ef4ba9b
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 57 deletions.
12 changes: 5 additions & 7 deletions docs/language.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ syntax and semantics.

## Statement
Statement is a base unit of Walnut, categorized into these types:
- [Empty Statement](#empty-statement)
- [Value Binding](#value-binding)
- [Table Header](#table-header)
- [Function Definition](#function-definition)
Expand All @@ -34,7 +33,8 @@ an empty text).
# All of these are statements.
stmt = "foo"
stmt2 = "bar"
stmt3 =
stmt3 = \
# Empty lines in line continuouses are allowed.
"Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do" + \
"eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad" + \
"minim veniam, quis nostrud exercitation ullamco laboris nisi ut" + \
Expand All @@ -60,10 +60,6 @@ key = "value" # This is also a comment
key2 = "# This is not a comment"
```

### Empty statement
Empty statement is a statement with nothing but [whitespaces](#terms). Empty
statement has no effects to the semantics.

### Value Binding
Value Binding is a statement which registers key/value pairs to current
[scope](#scope) using [patterns](#pattern). Patterns are on the left of the
Expand Down Expand Up @@ -537,7 +533,9 @@ key = "foo"

## Terms
- "Whitespace" means tab (`U+0009`) or space (`U+0020`).
- "Newline" means line feed (`U+000A`) or carriage return (`U+000D`).
- "Newline" means a string sequence starts with line feed (`U+000A`) or
carriage return (`U+000D`), and contains only tabs, spaces, line feeds,
carriage returns or comments.
- "Parenthesis" means left and right of round brackets (`()`), curly brackets
(`{}`), or square brackets (`[]`).
- "Render" means processing and converting the walnut file to other data
Expand Down
95 changes: 45 additions & 50 deletions src/lex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ peg::parser! { grammar lexer() for str {
/ ">=" { Symbol::GreaterThanEquals }
/ "<<" { Symbol::LeftShift }
/ ">>" { Symbol::RightShift }
/ "\n" { Symbol::NewLine }
/ "\r" { Symbol::NewLine }
/ "=" { Symbol::Assign }
/ "+" { Symbol::Plus }
/ "-" { Symbol::Minus }
Expand All @@ -37,15 +35,20 @@ peg::parser! { grammar lexer() for str {
/ "_" { Symbol::UnderLine }
/ "@" { Symbol::At }

rule _ = [' '|'\t']*
rule comment() = "#" [^ '\n'|'\r']*
rule _ = ([' '|'\t'] / ("\\" [' '|'\t']* __))*
rule __ = comment()? ['\n'|'\r'] ([' '|'\t'|'\n'|'\r'] / comment())*

rule token(file_id: usize) -> PosToken
= s:position!()
t:(sym:symbol() { Token::Symbol(sym) })
e:position!() { PosToken{ file_id, pos: s..e, token: t } }

pub rule tokens(file_id: usize) -> Vec<PosToken>
= _ ts:(token(file_id) ** _) _ { ts }
rule statement(file_id: usize) -> Vec<PosToken>
= _ ts:(token(file_id) ++ _) _ { ts }

pub rule tokens(file_id: usize) -> Vec<Vec<PosToken>>
= __? s:(statement(file_id) ** __) __? { s }
}}

#[derive(Clone, Debug, PartialEq)]
Expand All @@ -67,7 +70,6 @@ pub enum Token {

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Symbol {
NewLine,
Assign,
Plus,
Minus,
Expand Down Expand Up @@ -107,172 +109,165 @@ mod tests {
#[test]
fn symbols() {
let code = indoc::indoc! {"
\t \r = + - * / % ** == != < > <= >=
! & | ^ << >> ( ) { } [ ] , . : _ @ "};
# Comment Line
\t \r = + - * / % ** == != < > <= >= \\ # After Comment
! & | ^ << >> ( ) { } [ ] , . : _ @
"};
assert_eq!(
lex(code, 0),
Ok(vec![
Ok(vec![vec![
PosToken {
file_id: 0,
pos: 4..5,
token: Token::Symbol(Symbol::NewLine)
},
PosToken {
file_id: 0,
pos: 6..7,
pos: 21..22,
token: Token::Symbol(Symbol::Assign)
},
PosToken {
file_id: 0,
pos: 8..9,
pos: 23..24,
token: Token::Symbol(Symbol::Plus)
},
PosToken {
file_id: 0,
pos: 10..11,
pos: 25..26,
token: Token::Symbol(Symbol::Minus)
},
PosToken {
file_id: 0,
pos: 12..13,
pos: 27..28,
token: Token::Symbol(Symbol::Multiply)
},
PosToken {
file_id: 0,
pos: 14..15,
pos: 29..30,
token: Token::Symbol(Symbol::Divide),
},
PosToken {
file_id: 0,
pos: 16..17,
pos: 31..32,
token: Token::Symbol(Symbol::Remains),
},
PosToken {
file_id: 0,
pos: 18..20,
pos: 33..35,
token: Token::Symbol(Symbol::Exponent),
},
PosToken {
file_id: 0,
pos: 21..23,
pos: 36..38,
token: Token::Symbol(Symbol::Equals),
},
PosToken {
file_id: 0,
pos: 24..26,
pos: 39..41,
token: Token::Symbol(Symbol::NotEquals),
},
PosToken {
file_id: 0,
pos: 27..28,
pos: 42..43,
token: Token::Symbol(Symbol::LessThan),
},
PosToken {
file_id: 0,
pos: 29..30,
pos: 44..45,
token: Token::Symbol(Symbol::GreaterThan),
},
PosToken {
file_id: 0,
pos: 31..33,
pos: 46..48,
token: Token::Symbol(Symbol::LessThanEquals),
},
PosToken {
file_id: 0,
pos: 34..36,
pos: 49..51,
token: Token::Symbol(Symbol::GreaterThanEquals),
},
PosToken {
file_id: 0,
pos: 36..37,
token: Token::Symbol(Symbol::NewLine),
},
PosToken {
file_id: 0,
pos: 37..38,
pos: 71..72,
token: Token::Symbol(Symbol::Not),
},
PosToken {
file_id: 0,
pos: 39..40,
pos: 73..74,
token: Token::Symbol(Symbol::And),
},
PosToken {
file_id: 0,
pos: 41..42,
pos: 75..76,
token: Token::Symbol(Symbol::Or),
},
PosToken {
file_id: 0,
pos: 43..44,
pos: 77..78,
token: Token::Symbol(Symbol::Xor),
},
PosToken {
file_id: 0,
pos: 45..47,
pos: 79..81,
token: Token::Symbol(Symbol::LeftShift),
},
PosToken {
file_id: 0,
pos: 48..50,
pos: 82..84,
token: Token::Symbol(Symbol::RightShift),
},
PosToken {
file_id: 0,
pos: 51..52,
pos: 85..86,
token: Token::Symbol(Symbol::LeftParenthesis),
},
PosToken {
file_id: 0,
pos: 53..54,
pos: 87..88,
token: Token::Symbol(Symbol::RightParenthesis),
},
PosToken {
file_id: 0,
pos: 55..56,
pos: 89..90,
token: Token::Symbol(Symbol::LeftBrace),
},
PosToken {
file_id: 0,
pos: 57..58,
pos: 91..92,
token: Token::Symbol(Symbol::RightBrace),
},
PosToken {
file_id: 0,
pos: 59..60,
pos: 93..94,
token: Token::Symbol(Symbol::LeftBracket),
},
PosToken {
file_id: 0,
pos: 61..62,
pos: 95..96,
token: Token::Symbol(Symbol::RightBracket),
},
PosToken {
file_id: 0,
pos: 63..64,
pos: 97..98,
token: Token::Symbol(Symbol::Comma),
},
PosToken {
file_id: 0,
pos: 65..66,
pos: 99..100,
token: Token::Symbol(Symbol::Dot),
},
PosToken {
file_id: 0,
pos: 67..68,
pos: 101..102,
token: Token::Symbol(Symbol::Colon),
},
PosToken {
file_id: 0,
pos: 69..70,
pos: 103..104,
token: Token::Symbol(Symbol::UnderLine),
},
PosToken {
file_id: 0,
pos: 71..72,
pos: 105..106,
token: Token::Symbol(Symbol::At),
},
])
]])
);
}
}

0 comments on commit ef4ba9b

Please sign in to comment.