/**
 * This file is part of InterpreterDemo. Use it for every purpose, you like.
 * Cosider this code as public domain.
 *
 * \author Christian Rehn
 */

#ifndef _PARSER_H
#define	_PARSER_H

#include <stdexcept>

#include "Tokenizer.h"
#include "Interpreter.h"

using std::string;

namespace InterpreterDemo
{

	/**
	 * \brief LL(1)-Parser which parses arithmetical expressions in infix
	 * notation.
	 *
	 * The grammer being parsed by Parser in EBNF:
	 * <pre>
	 * expression = firstPartOfExpression {('+'|'-') term} ;
	 * firstPartOfExpression = ['+'|'-'] term ;
	 * term = factor {('/'|'*') factor} ;
	 * factor = 'number' | parenthesis ;
	 * parenthesis = '('expression')' ;
	 * </pre>
	 *
	 * 'number' is actually a terminal symbol constructed by Tokenizer in
	 * order to have an LL(1) language, which simplifies the parser. The
	 * following production shows the structure of 'number':
	 * <pre>
	 * number = '0'|'1'|...|'9' {'0'|'1'|...|'9'} ;
	 * </pre>
	 */
	class Parser
	{
	public:

		/**
		 * \brief Constructs a new Parser using a copy of the given Tokenizer.
		 *
		 * \param tokenizer the tokenizer the parser should use; the instance is
		 * copied in order to avoid aliasing problems.
		 */
		Parser(Tokenizer tokenizer);

		/**
		 * \brief Parses the given string returning a tree of Expressions.
		 *
		 * If no argument is given, the string, which was passed to tokenizer,
		 * is used.
		 *
		 * \param str the string to be parsed. If omitted, the string, which was
		 * given to the tokenizer will be used.
		 */
		Expression* parse(const string& str = "");
	protected:
		/** The Tokenizer which is used to get the tokens */
		Tokenizer _tokenizer;

		/** The last token returned by the tokenizer. */
		Token _lookahead;

		/**
		 * \brief Parses the non-terminal expression "expression".
		 * <pre>
		 * expression = firstPartOfExpression {('+'|'-') term} ;
		 * </pre>
		 */
		virtual Expression* parseExpression();

		/**
		 * \brief Parses the non-terminal expression "firstPartOfExpression".
		 * <pre>
		 * firstPartOfExpression = ['+'|'-'] term ;
		 * </pre>
		 */
		virtual Expression* parseFirstPartOfExpression();

		/**
		 * \brief Parses the non-terminal expression "term".
		 * <pre>
		 * term = factor {('/'|'*') factor} ;
		 * </pre>
		 */
		virtual Expression* parseTerm();

		/**
		 * \brief Parses the non-terminal expression "factor".
		 * <pre>
		 * factor = integer | parenthesis ;
		 * </pre>
		 */
		virtual Expression* parseFactor();
		
		/**
		 * \brief Parses the non-terminal expression "parenthesis".
		 * <pre>
		 * parenthesis = '('expression')' ;
		 * </pre>
		 */
		virtual Expression* parseParenthesis();

		/**
		 * \brief Parses the terminal expression "number".
		 * <pre>
		 * number = '0'|'1'|...|'9' {'0'|'1'|...|'9'} ;
		 * </pre>
		 *
		 * note that number is actually a terminal symbol constructed by Tokenizer
		 * in order	to have an LL(1) language, which simplifies the parser.
		 */
		virtual NumberExpression* parseNumber();

		/**
		 * \brief Gets the next token from the tokenizer.
		 */
		virtual void nextToken();

	};

	/**
	 * \brief Exception class used for handling syntax errors detected by the
	 * parser.
	 *
	 * Could be easily enhanced for storing additional information such as line
	 * and column numbers.
	 */
	class ParserException : public std::runtime_error
	{
	public:
		ParserException(const string& msg) throw() : runtime_error(msg) {}
	};

}

#endif	/* _PARSER_H */

