Skip to content

Commit

Permalink
First commit -- the parser and the Rule, Sym classes
Browse files Browse the repository at this point in the history
The parser implements the earley top-down chart parser and uses
depth-first search to construct the parse tree.
The Rule class is a simple Array extension that contains a 'lhs'
property.
The Sym class is just an object with the 'name' attribute. Nothing
fancy.
  • Loading branch information
patgrasso committed Aug 2, 2016
0 parents commit 2b963f2
Show file tree
Hide file tree
Showing 4 changed files with 381 additions and 0 deletions.
245 changes: 245 additions & 0 deletions lib/parser.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
/*eslint no-console:0*/
'use strict';

const rules = require('./rules');
const interp = require('./interp');
const tokenize = require('./tokenizer');

function parse(sent, tokenizer) {
let tokens = (tokenizer || tokenize)(sent);
let states = Array.apply(null, Array(tokens.length + 1)).map(() => []);
var i, j;

let rulePairs = rules.rules.map((rule) => ({
name : rule.lhs.name,
rule : rule,
position: 0,
origin : 0
}));

[].push.apply(states[0], rulePairs);

for (i = 0; i <= tokens.length; i += 1) {
for (j = 0; j < states[i].length; j += 1) {
predict(tokens, states, i, j);
scan(tokens, states, i, j);
complete(tokens, states, i, j);
}
}

return dfs(swap(removeUnfinishedItems(states)), tokens);
}


function predict(tokens, states, i, j) {
let curr = states[i][j];

// prediction
if (curr.rule[curr.position] instanceof rules.Sym) {
rules.rules.forEach((rule) => {
let stateHasItem = states[i].filter((earleyItem) => {
return earleyItem.rule === rule &&
curr.position === earleyItem.position;
}).length > 0;

if (!stateHasItem) {
states[i].push({
name : rule.lhs.name,
rule : rule,
position: 0,
origin : i
});

if (states[i].length > 100) {
throw new Error(JSON.stringify(states[i], null, 2));
}
}
});
}
}


function scan(tokens, states, i, j) {
let newItem
, curr = states[i][j];

// scan
if (curr.rule[curr.position] instanceof RegExp) {
// regex matches token
if (curr.rule[curr.position].test(tokens[i]) && i < states.length) {
newItem = Object.assign({}, curr);
newItem.position += 1;
states[i + 1].push(newItem);
}
}
if (typeof curr.rule[curr.position] === 'string') {
// string equals token
if (curr.rule[curr.position] === tokens[i] && i < states.length) {
newItem = Object.assign({}, curr);
newItem.position += 1;
states[i + 1].push(newItem);
}
}
}

function complete(tokens, states, i, j) {
let newItem
, curr = states[i][j];

// completion (check first because the position may be out of bounds)
if (curr.position >= curr.rule.length) {
states[curr.origin].forEach((earleyItem) => {
if (earleyItem.rule[earleyItem.position] === curr.rule.lhs) {
let stateHasItem = states[i].filter((ei) => {
return ei.rule === earleyItem.rule &&
ei.position === earleyItem.position + 1;
}).length > 0;

if (stateHasItem) {
return;
}
newItem = Object.assign({}, earleyItem);
newItem.position += 1;
states[i].push(newItem);
}
});
}
}

function removeUnfinishedItems(states) {
return states.map((state) => state.filter((earleyItem) => {
return earleyItem.position >= earleyItem.rule.length;
}));
}

function swap(states) {
let newStates = Array.apply(null, Array(states.length)).map(() => []);

states.forEach((state, i) => {
state.forEach((earleyItem) => {
newStates[earleyItem.origin].push(earleyItem);
earleyItem.origin = i;
});
});
return newStates;
}

function dfs(states, tokens) {
let root = states[0].reduce((best, curr) => {
if (best == null || curr.origin > best.origin) {
return curr;
}
return best;
}, null);

if (root.origin !== tokens.length) {
throw new SyntaxError(`Parsing error near '${tokens[root.origin]}' `);
}

return {
item : root.name,
children: dfsHelper(states, root, 0, 0, tokens)
};
}


function dfsHelper(states, root, state, depth, tokens) {
var edges;

// Base case: we finished the root rule
if (state === root.origin && depth === root.rule.length) {
return [];
}

// If the current production symbol is a terminal
if (root.rule[depth] instanceof RegExp) {
if (root.rule[depth].test(tokens[state])) {
let subMatch = dfsHelper(states, root, state + 1, depth + 1, tokens);

if (subMatch) {
return [tokens[state]].concat(subMatch);
}
}
return null;
} else if (typeof root.rule[depth] === 'string') {
if (root.rule[depth] === tokens[state]) {
let subMatch = dfsHelper(states, root, state + 1, depth + 1, tokens);

if (subMatch) {
return [tokens[state]].concat(subMatch);
}
}
return null;
}

// Otherwise, it must be a non-terminal
edges = states[state]
.filter((item) => item.rule.lhs === root.rule[depth])
.map((item) => {
let subMatch = dfsHelper(states, root, item.origin, depth + 1, tokens);

if (subMatch) {
return [{
item : item,
children: dfsHelper(states, item, state, 0, tokens)
}].concat(subMatch);
}
return null;
})
.filter((list) => list);

if (edges.length > 1) {
let diffs = edges.filter(
(tree) => JSON.stringify(tree) !== JSON.stringify(edges[0])
);

if (diffs.length > 0) {
console.log('Ambiguity\n' + JSON.stringify(edges, null, 2));
}
}

return edges[0];
}


function interpret(parseTree) {
if (typeof parseTree === 'string' || parseTree == null) {
return parseTree;
}

let values = parseTree.children
.map((tree) => interpret(tree))
.filter((value) => value != null);

return interp.valueOf(parseTree.item, values);
}



//let sentence = '23 + ( 32 * 46 )';
//let sentence = '( 23 + 32 ) * 46';
//let sentence = '23 + 32 * 46';
//let sentence = '( ( 12 ) )';
//let sentence = '1 * 2 + 3 * 4 + 5';
//let sentence = '1 + 2 + 3';
let sentence = '1^3 + 2 * 3(3)';

let states = parse(sentence);

console.log('\n\n--FINAL--');
//printStates(states, sentence);
console.log('\n');
console.log('input:', sentence);
console.log('=================');


console.log('\n~~ dfs ~~');
console.log(JSON.stringify(
dfs(states, tokenize(sentence)),
null, 2
));

console.log(interpret(dfs(states, tokenize(sentence))));

module.exports.parse = parse;
module.exports.interpret = interpret;
93 changes: 93 additions & 0 deletions lib/rules.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@

function Rule(lhs, rhs) {
let arr = [];

if (!rhs || rhs.length === 0) {
throw new Error('Rule does not produce anything');
}
arr.push.apply(arr, rhs);
arr.lhs = lhs;
arr.__proto__ = Rule.prototype;

return arr;
}
Object.setPrototypeOf(Rule.prototype, Array.prototype);


function Sym(name) {
let symbol = {};
symbol.__proto__ = Sym.prototype;
symbol.name = name;
return symbol;
}

//const expr = new Sym('expr');
//const addexp = new Sym('addexp');
const sum = new Sym('sum');
const prod = new Sym('prod');
const factor = new Sym('factor');
const exp = new Sym('exp');
//const number = new Sym('number');

/*
let rules = {
multiply: [expr, '*', expr],
add : [expr, '+', expr],
//subtract: [expr, '-', expr],
//divide : [expr, '/', expr],
group : ['(', expr, ')'],
number : [/^\d+$/]
};
*/

/*
let rules = {
multiply: new Rule(expr, [expr, '*', expr]),
add : new Rule(expr, [expr, '+', expr]),
divide : new Rule(expr, [expr, '/', expr]),
subtract: new Rule(expr, [expr, '-', expr]),
group : new Rule(expr, ['(', expr, ')']),
number : new Rule(expr, [/^\d+$/])
};
*/

let rules = [
// sum
new Rule(sum , [sum, '+', prod] , (x, _, y) => x + y),
new Rule(sum , [prod] , (x) => x),

// product
new Rule(prod , [prod, '*', exp] , (x, _, y) => x * y),
new Rule(prod , [exp, '*', prod] , (x, _, y) => x * y),
//new Rule(prod , [prod, '*', prod] , (x, _, y) => x * y),
new Rule(prod , [exp] , (x) => x),
//new Rule(prod , [factor]),

// distributive product
new Rule(prod , [exp, '(', exp, ')'] , (x, _, y) => x * y),
new Rule(prod , ['(', exp, ')', exp] , (_, x, __, y) => x * y),

// exponent
new Rule(exp , [factor, '^', factor] , (x, _, y) => Math.pow(x, y)),
new Rule(exp , [factor] , (x) => x),

// factor
new Rule(factor , ['(', sum, ')'] , (_, x) => x),
new Rule(factor , [/\d+/] , (n) => parseFloat(n))
];

function getTokens() {
return rules.reduce(
(tokens, rule) => tokens.concat(
rule.filter((sym) => typeof sym === 'string' || sym instanceof RegExp)
)
, []);
}

module.exports = {
rules : rules,
Rule : Rule,
Sym : Sym,
getTokens : getTokens
};

17 changes: 17 additions & 0 deletions lib/tokenizer.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@

module.exports.tokenize = (sent, terminals) => {
let tokens = terminals
.map((token) => {
if (typeof token === 'string') {
return token.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, '\\$&');
}
return token.source;
})
, delims = RegExp('(' + tokens.join('|') + ')');

return sent
.split(delims)
.map((item) => item.trim())
.filter((item) => item !== '');
};

26 changes: 26 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"name": "parsey",
"version": "0.0.0",
"description": "Parser for context-free grammars",
"main": "parser.js",
"scripts": {
"test": "jasmine"
},
"repository": {
"type": "git",
"url": "git+https://github.com/patgrasso/parsey.git"
},
"keywords": [
"parse",
"grammar",
"cfg",
"ast",
"earley"
],
"author": "Pat Grasso",
"license": "MIT",
"bugs": {
"url": "https://github.com/patgrasso/parsey/issues"
},
"homepage": "https://github.com/patgrasso/parsey#readme"
}

0 comments on commit 2b963f2

Please sign in to comment.