Skip to content

Commit

Permalink
Add specs/tests for lib/tokenizer and lib/parser
Browse files Browse the repository at this point in the history
Test all public functions of lib/parser.js and lib/tokenizer.js.
Ambiguity is currently arbitrated, but should be handled more elegantly.
  • Loading branch information
patgrasso committed Aug 3, 2016
1 parent ef0c869 commit 5b51cae
Show file tree
Hide file tree
Showing 4 changed files with 282 additions and 1 deletion.
224 changes: 224 additions & 0 deletions spec/parser-spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
/*global describe, it, expect, beforeAll, jasmine*/
'use strict';

const Sym = require('../lib/rules').Sym;
const Rule = require('../lib/rules').Rule;
const parse = require('../lib/parser').parse;
const earley = require('../lib/parser').earley;
const dfs = require('../lib/parser').dfs;
const tokenize = require('../lib/tokenizer');

var sum, prod, factor, gram, f, tokens, states, tree;

describe('parser', () => {

beforeAll(() => {
sum = new Sym('sum');
prod = new Sym('prod');
factor = new Sym('factor');

gram = [
new Rule(sum , [sum, '+', prod] , (x, _, y) => x + y),
new Rule(sum , [prod] , (x) => x),
new Rule(prod , [prod, '*', factor] , (x, _, y) => x * y),
new Rule(prod , [factor] , (x) => x),
new Rule(factor , ['(', sum, ')'] , (_, x) => x),
new Rule(factor , [/\d+/] , (n) => parseFloat(n))
];
});

describe('earley()', () => {

it('cascades rule completion once a predicted rule successfully parses', () => {
tokens = tokenize('2 * 3', gram);
states = earley(tokens, gram);

// 0-1 '2' : factor -> /\d+/
expect(states[0]).toContain({
name: 'factor',
rule: gram[5],
position: 1,
origin: 1
});

// 0-1 '2' : prod -> factor
expect(states[0]).toContain({
name: 'prod',
rule: gram[3],
position: 1,
origin: 1
});

// 0-1 '2' : sum -> prod
expect(states[0]).toContain({
name: 'sum',
rule: gram[1],
position: 1,
origin: 1
});
});

it('returns an earley item in [0] whose origin = tokens.length', () => {
tokens = tokenize('2 * 3', gram);
states = earley(tokens, gram);

expect(states[0]).toContain({
name: 'prod',
rule: gram[2],
position: 3,
origin: 3
});
});

it('does not return a whole-expr earley item when the parse fails', () => {
tokens = tokenize('2 * 3 *', gram);
states = earley(tokens, gram);

expect(states[0]).not.toContain({
name: jasmine.any(String),
rule: jasmine.any(Rule),
position: jasmine.any(Number),
origin: 4
});
});

});

describe('dfs()', () => {

it('properly parses a sentence', () => {
tokens = tokenize('2 * 3', gram);
states = earley(tokens, gram);
tree = dfs(states, tokens);

expect(tree).toEqual(
jasmine.objectContaining({ item: gram[2] }) // prod -> prod * factor
);
expect(tree.children[0]).toEqual(
jasmine.objectContaining({ item: gram[3] }) // prod => factor
);
expect(tree.children[0].children[0]).toEqual(
jasmine.objectContaining({ item: gram[5] }) // factor -> /\d+/
);
expect(tree.children[0].children[0].children[0]).toBe('2');

expect(tree.children[1]).toBe('*');

expect(tree.children[2]).toEqual(
jasmine.objectContaining({ item: gram[5] }) // factor -> /\d+/
);
expect(tree.children[2].children[0]).toBe('3');
});

it('throws an error if the parse did not finish due to tra', () => {
tokens = tokenize('2 * 3 *', gram);
states = earley(tokens, gram);
f = () => dfs(states, tokens);

expect(f).toThrowError(SyntaxError);
});

it('throws an error if the parse did not finish', () => {
tokens = tokenize('* 2 * 3', gram);
states = earley(tokens, gram);
f = () => dfs(states, tokens);

expect(f).toThrowError(SyntaxError);
});

});

describe('parse()', () => {

it('properly parses a sentence', () => {
tree = parse('2 * 3', gram);

expect(tree).toEqual(
jasmine.objectContaining({ item: gram[2] }) // prod -> prod * factor
);
expect(tree.children[0]).toEqual(
jasmine.objectContaining({ item: gram[3] }) // prod => factor
);
expect(tree.children[0].children[0]).toEqual(
jasmine.objectContaining({ item: gram[5] }) // factor -> /\d+/
);
expect(tree.children[0].children[0].children[0]).toBe('2');

expect(tree.children[1]).toBe('*');

expect(tree.children[2]).toEqual(
jasmine.objectContaining({ item: gram[5] }) // factor -> /\d+/
);
expect(tree.children[2].children[0]).toBe('3');
});

it('accepts an optional tokenizer function', () => {
f = () => parse('2 * 3', gram, (sent) => sent.split(' '));
expect(f).not.toThrowError();

f = () => parse('2*3', gram, (sent) => sent.split('*'));
expect(f).toThrowError(SyntaxError);
});

it('parses 23 + (32 * 46) given a rule set (see parser-spec.js)', () => {
f = () => parse('23 + (32 * 46)', gram);
expect(f).not.toThrow();
});

it('parses (23 + 32) * 46 given a rule set (see parser-spec.js)', () => {
f = () => parse('(23 + 32) * 46', gram);
expect(f).not.toThrow();
});

it('parses 23 + 32 * 46 given a rule set (see parser-spec.js)', () => {
f = () => parse('23 + 32 * 46', gram);
expect(f).not.toThrow();
});

it('parses ((12)) given a rule set (see parser-spec.js)', () => {
f = () => parse('((12))', gram);
expect(f).not.toThrow();
});

it('parses 1 * 2 + 3 * 4 + 5 given a rule set (see parser-spec.js)', () => {
f = () => parse('1 * 2 + 3 * 4 + 5', gram);
expect(f).not.toThrow();
});

it('parses 1 + 2 + 3 given a rule set (see parser-spec.js)', () => {
f = () => parse('1 + 2 + 3', gram);
expect(f).not.toThrow();
});

// TODO: resolve ambiguity by spawning a separate parse tree
it('arbitrates on an ambiguous parse', () => {
gram.push(new Rule(sum, [prod, '+', sum]));
f = () => parse('1 + 2 * 3 + 4', gram);
expect(f).not.toThrow();
gram.pop();
});

it('allows multiple of same rule in same earley state, as long as ' +
'they have differing position and origin', () => {
let rules = [
new Rule(factor, [factor, factor]),
new Rule(factor, [factor, '+']),
new Rule(factor, [/\d+/])
];
f = () => parse('1 + 2 3', rules);
expect(f).not.toThrow();
});

it('does not sub-match a regex that does not match the current token', () => {
let rules = [
new Rule(factor, [factor, factor]),
new Rule(factor, [factor, /\+/]),
new Rule(factor, [/\d+/])
];
f = () => parse('1 + 2 3', rules);
expect(f).not.toThrow();
});

});

});
6 changes: 6 additions & 0 deletions spec/rules-spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@ describe('Rule', () => {
expect(r.evaluate instanceof Function).toBe(true);
});

it('accepts a single non-terminal on the RHS', () => {
r = new Rule(sum, [prod], (x) => x);
expect(r.lhs).toBe(sum);
expect(r).toContain(prod);
});

});

describe('evaluate()', () => {
Expand Down
51 changes: 51 additions & 0 deletions spec/tokenizer-spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*global describe, it, expect, beforeAll*/
'use strict';

const Sym = require('../lib/rules').Sym;
const Rule = require('../lib/rules').Rule;
const tokenize = require('../lib/tokenizer');

var sum, prod, factor, gram, f, tokens;

describe('tokenizer()', () => {

beforeAll(() => {
sum = new Sym('sum');
prod = new Sym('prod');
factor = new Sym('factor');

gram = [
new Rule(sum, [sum, '+', prod], (x, _, y) => x + y),
new Rule(sum, [prod], (x) => x),
new Rule(prod, [prod, '*', factor], (x, _, y) => x * y),
new Rule(prod [factor], (x) => x),
new Rule(factor, ['(', sum, ')'], (_, x) => x),
new Rule(factor, [/\d+/], (n) => parseFloat(n))
];
});

it('throws an error when no grammar is given', () => {
f = () => tokenize('2 * 3');
expect(f).toThrowError();
});

it('splits a string by each terminal character in the grammar', () => {
tokens = tokenize('(2*3)+(4*5)', gram);
expect(tokens).toEqual([
'(', '2', '*', '3', ')',
'+',
'(', '4', '*', '5', ')'
]);
});

it('strips whitespace around each token', () => {
tokens = tokenize('2 * 3', gram);
expect(tokens).toEqual(['2', '*', '3']);
});

it('treats regex matches as own tokens', () => {
tokens = tokenize('1 23 456', gram);
expect(tokens).toEqual(['1', '23', '456']);
});

});
2 changes: 1 addition & 1 deletion test.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ function printTree(tree, level) {
console.log(`${Array(level).join(' ')})`);
return;
}
return console.log(`${Array(level).join(' ')}${tree}`);
return console.log(`${Array(level).join(' ')}'${tree}'`);
}


Expand Down

0 comments on commit 5b51cae

Please sign in to comment.