// Copyright (C) 2007 Chris Double.
// 
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
// 
// 1. Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimer.
// 
// 2. Redistributions in binary form must reproduce the above copyright notice,
//    this list of conditions and the following disclaimer in the documentation
//    and/or other materials provided with the distribution.
// 
// THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
// INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// DEVELOPERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
function identity(x) {
    return x;
}

function foldl(f, initial, seq) {
    for(var i=0; i< seq.length; ++i) 
	initial = f(initial, seq[i]);
    return initial;
}

var memoize = true;

function ParseState(input, index) {
    this.input = input;
    this.index = index || 0;
    this.length = input.length - this.index;
    this.cache = { };
    return this;
}

ParseState.prototype.from = function(index) {
    var r = new ParseState(this.input, this.index + index);
    r.cache = this.cache;
    r.length = this.length - index;
    return r;
}

ParseState.prototype.substring = function(start, end) {
    return this.input.substring(start + this.index, (end || this.length) + this.index);
}

ParseState.prototype.trimLeft = function() {
    var s = this.substring(0);
    var m = s.match(/^\s+/);
    return m ? this.from(m[0].length) : this;
}

ParseState.prototype.at = function(index) {
    return this.input[this.index + index];
}

ParseState.prototype.toString = function() {
    return 'PS"' + this.substring(0) + '"'; 
}

ParseState.prototype.getCached = function(pid) {
    if(!memoize)
	return false;

    var p = this.cache[pid];
    if(p) 
	return p[this.index];
    else
	return false;
}

ParseState.prototype.putCached = function(pid, cached) {
    if(!memoize)
	return false;

    var p = this.cache[pid];
    if(p)
	p[this.index] = cached;
    else {
	p = this.cache[pid] = { };
	p[this.index] = cached;
    }
}

function ps(str) {
    return new ParseState(str);
}

// 'r' is the remaining string to be parsed.
// 'matched' is the portion of the string that
// was successfully matched by the parser.
// 'ast' is the AST returned by the successfull parse.
function make_result(r, matched, ast) {
	return { remaining: r, matched: matched, ast: ast };
}

var parser_id = 0;
		
// 'token' is a parser combinator that given a string, returns a parser
// that parses that string value. The AST contains the string that was parsed.
function token(s) {
    var pid = parser_id++;
    return function(state) {
	var savedState = state;
	var cached = savedState.getCached(pid);
	if(cached)
	    return cached;

	var r = state.length >= s.length && state.substring(0,s.length) == s;
	if(r) 
	    cached = { remaining: state.from(s.length), matched: s, ast: s };
	else
	    cached = false;
	savedState.putCached(pid, cached);
	return cached;
    };
}

// Like 'token' but for a single character. Returns a parser that given a string
// containing a single character, parses that character value.
function ch(c) {
    var pid = parser_id++;
    return function(state) {
	var savedState = state;
	var cached = savedState.getCached(pid);
	if(cached)
	    return cached;
	var r = state.length >= 1 && state.at(0) == c;
	if(r) 
	    cached = { remaining: state.from(1), matched: c, ast: c };
	else
	    cached = false;
	savedState.putCached(pid, cached);
	return cached;
    };
}

// 'range' is a parser combinator that returns a single character parser
// (similar to 'ch'). It parses single characters that are in the inclusive
// range of the 'lower' and 'upper' bounds ("a" to "z" for example).
function range(lower, upper) {
    var pid = parser_id++;
    return function(state) {
	var savedState = state;
	var cached = savedState.getCached(pid);
	if(cached)
	    return cached;

	if(state.length < 1) 
	    cached = false;
	else {
	    var ch = state.at(0);
	    if(ch >= lower && ch <= upper) 
		cached = { remaining: state.from(1), matched: ch, ast: ch };
	    else
		cached = false;
	}
	savedState.putCached(pid, cached);
	return cached;
    };
}

// Helper function to convert string literals to token parsers
// and perform other implicit parser conversions.
function toParser(p) {
    return (typeof(p) == "string") ? token(p) : p;
}

// Parser combinator that returns a parser that
// skips whitespace before applying parser.
function whitespace(p) {
    var p = toParser(p);
    var pid = parser_id++;
    return function(state) {
	var savedState = state;
	var cached = savedState.getCached(pid);
	if(cached)
	    return cached;

	cached = p(state.trimLeft());
	savedState.putCached(pid, cached);
	return cached;
    };
}

// Parser combinator that passes the AST generated from the parser 'p' 
// to the function 'f'. The result of 'f' is used as the AST in the result.
function action(p, f) {
    var p = toParser(p);
    var pid = parser_id++;
    return function(state) {
	var savedState = state;
	var cached = savedState.getCached(pid);
	if(cached) 
	    return cached;

	var x = p(state);
	if(x) {
	    x.ast = f(x.ast);
	    cached = x;
	}
	else {
	    cached = false;
	}
	savedState.putCached(pid, cached);
	return cached;
    };
}

// Given a parser that produces an array as an ast, returns a
// parser that produces an ast with the array joined by a separator.
function join_action(p, sep) {
    return action(p, function(ast) { return ast.join(sep); });
}

// Given an ast of the form [ Expression, [ a, b, ...] ], convert to
// [ [ [ Expression [ a ] ] b ] ... ]
// This is used for handling left recursive entries in the grammar. e.g.
// MemberExpression:
//   PrimaryExpression
//   FunctionExpression
//   MemberExpression [ Expression ]
//   MemberExpression . Identifier
//   new MemberExpression Arguments 
function left_factor(ast) {
    return foldl(function(v, action) { 
		     return [ v, action ]; 
		 }, 
		 ast[0], 
		 ast[1]);
}

// Return a parser that left factors the ast result of the original
// parser.
function left_factor_action(p) {
    return action(p, left_factor);
}

// 'negate' will negate a single character parser. So given 'ch("a")' it will successfully
// parse any character except for 'a'. Or 'negate(range("a", "z"))' will successfully parse
// anything except the lowercase characters a-z.
function negate(p) {
    var p = toParser(p);
    var pid = parser_id++;
    return function(state) {
	var savedState = state;
	var cached = savedState.getCached(pid);
	if(cached)
	    return cached;

	if(state.length >= 1) {
	    var r = p(state);
	    if(!r) 
		cached =  make_result(state.from(1), state.at(0), state.at(0));
	    else
		cached = false;
	}
	else {
	    cached = false;
	}
	savedState.putCached(pid, cached);
	return cached;
    };
}

// 'end_p' is a parser that is successful if the input string is empty (ie. end of parse).
function end_p(state) {
    if(state.length == 0) 
	return make_result(state, undefined, undefined);
    else
	return false;
}

// 'nothing_p' is a parser that always fails.
function nothing_p(state) {
    return false;
}

// 'sequence' is a parser combinator that processes a number of parsers in sequence.
// It can take any number of arguments, each one being a parser. The parser that 'sequence'
// returns succeeds if all the parsers in the sequence succeeds. It fails if any of them fail.
function sequence() {
    var parsers = [];
    for(var i = 0; i < arguments.length; ++i) 
	parsers.push(toParser(arguments[i]));	    
    var pid = parser_id++;
    return function(state) {
	var savedState = state;
	var cached = savedState.getCached(pid);
	if(cached) {
	    return cached;
	}

	var ast = [];
	var matched = "";
	var i;
	for(i=0; i< parsers.length; ++i) {
	    var parser = parsers[i];	
	    var result = parser(state);
	    if(result) {
		state = result.remaining;
		if(result.ast != undefined) {
		    ast.push(result.ast);
		    matched = matched + result.matched;
		}
	    }
	    else {
		break;
	    }
	}
	if(i == parsers.length) {
	    cached = make_result(state, matched, ast);
	}
	else 
	    cached = false;
	savedState.putCached(pid, cached);
	return cached;
    };
}

// Like sequence, but ignores whitespace between individual parsers.
function wsequence() {
    var parsers = [];
    for(var i=0; i < arguments.length; ++i) {
	parsers.push(whitespace(toParser(arguments[i])));
    }
    return sequence.apply(null, parsers);       
}

// 'choice' is a parser combinator that provides a choice between other parsers.
// It takes any number of parsers as arguments and returns a parser that will try
// each of the given parsers in order. The first one that succeeds results in a 
// successfull parse. It fails if all parsers fail.
function choice() {
    var parsers = [];
    for(var i = 0; i < arguments.length; ++i) 
	parsers.push(toParser(arguments[i]));	    
    var pid = parser_id++;
    return function(state) {
	var savedState = state;
	var cached = savedState.getCached(pid);
	if(cached) {
	    return cached;
	}
	var i;
	for(i=0; i< parsers.length; ++i) {
	    var parser=parsers[i];
	    var result = parser(state);
	    if(result) {
		break;
	    }
	}	
	if(i == parsers.length)
	    cached = false;
	else
	    cached = result;
	savedState.putCached(pid, cached);
	return cached;
    }
}

// 'butnot' is a parser combinator that takes two parsers, 'p1' and 'p2'. 
// It returns a parser that succeeds if 'p1' matches and 'p2' does not, or
// 'p1' matches and the matched text is longer that p2's.
// Useful for things like: butnot(IdentifierName, ReservedWord)
function butnot(p1,p2) {
    var p1 = toParser(p1);
    var p2 = toParser(p2);
    var pid = parser_id++;
    
    // match a but not b. if both match and b's matched text is shorter
    // than a's, a failed match is made
    return function(state) {
	var savedState = state;
	var cached = savedState.getCached(pid);
	if(cached)
	    return cached;
	
	var br = p2(state);
	if(!br) {
	    cached = p1(state);
	} else {
	    var ar = p1(state);
	    if(ar.matched.length > br.matched.length)
		cached = ar;
	    else
		cached = false;
	}
	savedState.putCached(pid, cached);
	return cached;
    }
}

// 'difference' is a parser combinator that takes two parsers, 'p1' and 'p2'. 
// It returns a parser that succeeds if 'p1' matches and 'p2' does not. If
// both match then if p2's matched text is shorter than p1's it is successfull.
function difference(p1,p2) {
    var p1 = toParser(p1);
    var p2 = toParser(p2);
    var pid = parser_id++;
    
    // match a but not b. if both match and b's matched text is shorter
    // than a's, a successfull match is made
    return function(state) {
	var savedState = sate;
	var cached = savedState.getCached(pid);
	if(cached)
	    return cached;

	var br = p2(state);
	if(!br) {
	    cached = p1(state);
	} else {
	    var ar = p1(state);
	    if(ar.matched.length >= br.matched.length)
		cached = br;
	    else
		cached = ar;
	}
	savedState.putCached(pid, cached);
	return cached;
    }
}


// 'xor' is a parser combinator that takes two parsers, 'p1' and 'p2'. 
// It returns a parser that succeeds if 'p1' or 'p2' match but fails if
// they both match.
function xor(p1, p2) {
    var p1 = toParser(p1);
    var p2 = toParser(p2);
    var pid = parser_id++;

    // match a or b but not both
    return function(state) {
	var savedState = state;
	var cached = savedState.getCached(pid);
	if(cached)
	    return cached;

	var ar = p1(state);
	var br = p2(state);
	if(ar && br)
	    cached = false;
	else
	    cached = ar || br;
	savedState.putCached(pid, cached);
	return cached;
    }
}

// A parser combinator that takes one parser. It returns a parser that
// looks for zero or more matches of the original parser.
function repeat0(p) {
    var p = toParser(p);
    var pid = parser_id++;
    
    return function(state) {
	var savedState = state;
	var cached = savedState.getCached(pid);
	if(cached) {
	    return cached;
	}

	var ast = [];
	var matched = "";
	var result;
	while(result = p(state)) {
	    ast.push(result.ast);
	    matched = matched + result.matched;
	    if(result.remaining.index == state.index)
		break;
	    state = result.remaining;			
	}		
	cached = make_result(state, matched, ast);		
	savedState.putCached(pid, cached);
	return cached;
    }
}

// A parser combinator that takes one parser. It returns a parser that
// looks for one or more matches of the original parser.
function repeat1(p) {
    var p = toParser(p);
    var pid = parser_id++;

    return function(state) {
	var savedState = state;
	var cached = savedState.getCached(pid);
	if(cached)
	    return cached;

	var ast = [];
	var matched = "";
	var result= p(state);
	if(!result) 
	    cached = false;
	else {	
	    while(result) {
		ast.push(result.ast);
		matched = matched + result.matched;
		if(result.remaining.index == state.index)
		    break;
		state = result.remaining;			
		result = p(state);
	    }		
	    cached = make_result(state, matched, ast);		
	}
	savedState.putCached(pid, cached);
	return cached;
    }
}

// A parser combinator that takes one parser. It returns a parser that
// matches zero or one matches of the original parser.
function optional(p) {
    var p = toParser(p);
    var pid = parser_id++;
    return function(state) {
	var savedState = state;
	var cached = savedState.getCached(pid);
	if(cached)
	    return cached;
	var r = p(state);
	cached = r || make_result(state, "", false);				
	savedState.putCached(pid, cached);
	return cached;
    }
}

// A parser combinator that ensures that the given parser succeeds but
// ignores its result. This can be useful for parsing literals that you
// don't want to appear in the ast. eg:
// sequence(expect("("), Number, expect(")")) => ast: Number
function expect(p) {
    return action(p, function(ast) { return undefined; });
}

function chain(p, s, f) {
    var p = toParser(p);

    return action(sequence(p, repeat0(action(sequence(s, p), f))),
		  function(ast) { return [ast[0]].concat(ast[1]); });
}

// A parser combinator to do left chaining and evaluation. Like 'chain', it expects a parser
// for an item and for a seperator. The seperator parser's AST result should be a function
// of the form: function(lhs,rhs) { return x; }
// Where 'x' is the result of applying some operation to the lhs and rhs AST's from the item
// parser.
function chainl(p, s) {
    var p = toParser(p);
    return action(sequence(p, repeat0(sequence(s, p))),
		  function(ast) {
		      return foldl(function(v, action) { return action[0](v, action[1]); }, ast[0], ast[1]);
		  });
}

// A parser combinator that returns a parser that matches lists of things. The parser to 
// match the list item and the parser to match the seperator need to 
// be provided. The AST is the array of matched items.
function list(p, s) {
    return chain(p, s, function(ast) { return ast[1]; });
}

// Like list, but ignores whitespace between individual parsers.
function wlist() {
    var parsers = [];
    for(var i=0; i < arguments.length; ++i) {
	parsers.push(whitespace(arguments[i]));
    }
    return list.apply(null, parsers);       
}

// A parser that always returns a zero length match
function epsilon_p(state) {
    return make_result(state, "", undefined);
}

// Allows attaching of a function anywhere in the grammer. If the function returns
// true then parse succeeds otherwise it fails. Can be used for testing if a symbol
// is in the symbol table, etc.
function semantic(f) {
    var pid = parser_id++;
    return function(state) {
	var savedState = state;
	var cached = savedState.getCached(pid);
	if(cached)
	    return cached;       
	cached = f() ? make_result(state, "", undefined) : false;
	savedState.putCached(pid, cached);
	return cached;
    }
}

// The and predicate asserts that a certain conditional
// syntax is satisfied before evaluating another production. Eg:
// sequence(and("0"), oct_p)
// (if a leading zero, then parse octal)
// It succeeds if 'p' succeeds and fails if 'p' fails. It never 
// consume any input however, and doesn't put anything in the resulting
// AST.
function and(p) {
    var p = toParser(p);
    var pid = parser_id++;
    return function(state) {
	var savedState = state;
	var cached = savedState.getCached(pid);
	if(cached)
	    return cached;       
	var r = p(state);
	cached = r ? make_result(state, "", undefined) : false;
	savedState.putCached(pid, cached);
	return cached;
    }
}
			
// The opposite of 'and'. It fails if 'p' succeeds and succeeds if
// 'p' fails. It never consumes any input. This combined with 'and' can
// be used for 'lookahead' and disambiguation of cases.
//
// Compare:
// sequence("a",choice("+","++"),"b")
//   parses a+b
//   but not a++b because the + matches the first part and peg's don't
//   backtrack to other choice options if they succeed but later things fail.
//
// sequence("a",choice(sequence("+", not("+")),"++"),"b")
//    parses a+b
//    parses a++b
//
function not(p) {
    var p = toParser(p);
    var pid = parser_id++;
    return function(state) {
	var savedState = state;
	var cached = savedState.getCached(pid);
	if(cached)
	    return cached;       
	cached = p(state) ? false : make_result(state, "", undefined);
	savedState.putCached(pid, cached);
	return cached;
    }
}
			


