/**
 * This file is part of InterpreterDemo. Use it for every purpose, you like.
 * Cosider this code as public domain.
 *
 * \author Christian Rehn
 */

#ifndef _TOKENIZER_H
#define	_TOKENIZER_H

#include <string>
#include <stdexcept>

using std::string;

namespace InterpreterDemo 
{

	/**
	 * \brief Represents a Token.
	 *
	 * enum would cause problems when Tokenizer needs to be subclassed,
	 * because you cannot add elements to it later.
	 */
	typedef string Token;

	/**
	 * \brief A simple tokenizer, which takes a string and scans it for tokens.
	 *
	 * The following tokens are recognized:
	 * <pre>
	 * Number = '0'|..|'9' {'0'|..|'9'}
	 * Plus = '+'
	 * Minus = '-'
	 * Times = '*'
	 * Divide = '/'
	 * LeftParenthesis = '('
	 * RightParenthesis = ')'
	 * </pre>
	 */
	class Tokenizer {
	public:

		/**
		 * \brief Constrcts a new Tokenizer, which scans through the given string.
		 *
		 * If the argument is omitted, you have to set the string later using
		 * the method setStr().
		 *
		 * \param str The string to parse
		 */
		Tokenizer(const string& str = "");

		/**
		 * \brief Returns the string the sokenizer scans through.
		 */
		string getStr() const;

		/**
		 * \brief Sets the string, the tokenizer scans through.
		 */
		void setStr(const string& value);

		/**
		 * \brief Determines if there is more text to scan and thus more tokens.
		 */
		bool hasNext() const;

		/**
		 * \brief Scans further through the string and returns the next token.
		 *
		 * If a syntax error is detected, i.e. there is a unknown Symbol a
		 * TokenizerException is thrown.
		 */
		Token getNextToken();

		/**
		 * \brief Returns the token which was returned by getNextToken()
		 * most recently.
		 *
		 * If getNextToken() had not been invoked yet, an empty
		 * token is returned.
		 */
		Token getCurrentToken() const;

		/**
		 * \brief If the currently returned token was a number-token, this
		 * method retuns the integer value actually recognized.
		 */
		int getCurrentNumberTokenValue() const;

		/**
		 * \brief Returns a string representation of the current token suitable
		 * for user output.
		 */
		string getCurrentTokenAsStr() const;
	protected:

		/** The string to be scanned through */
		string _str;

		/** The current position at which the tokenizer is currently reading. */
		int _pos;

		/** The token, which was read most recently. */
		Token _currentToken;

		/** If _currentToken is a number token this variable holds the actual
		 * value, which was read.
		 */
		int _currentNumberTokenValue;

		/**
		 * \brief Reads and returns a "Number"-token.
		 *
		 * Override this method in order to read numbers differently, e.g. if
		 * you want to read real numbers instead of integers.
		 */
		virtual Token readNumber();

		/**
		 * \brief Reads and returns a "Plus"-token.
		 */
		virtual Token readPlus();

		/**
		 * \brief Reads and returns a "Minus"-token.
		 */
		virtual Token readMinus();

		/**
		 * \brief Reads and returns a "Times"-token.
		 */
		virtual Token readTimes();

		/**
		 * \brief Reads and returns a "Divide"-token.
		 */
		virtual Token readDivide();

		/**
		 * \brief Reads and returns a "LeftParenthesis"-token.
		 */
		virtual Token readLeftParenthesis();

		/**
		 * \brief Reads and returns a "RightParenthesis"-token.
		 */
		virtual Token readRightParenthesis();

		/**
		 * This is a hook method, wich is empty by default. Override it in order
		 * to scan for custom tokens, e.g. for variables. It sould return a
		 * unique string for each token type.
		 */
		virtual Token readCustomTokens();
	};

	/**
	 * \brief Exception class used for handling syntax errors detected by the
	 * tokenizer.
	 *
	 * Could be easily enhanced for storing additional information such as line
	 * and column numbers.
	 */
	class TokenizerException : public std::runtime_error
	{
	public:
		TokenizerException(const string& msg) throw() : runtime_error(msg) {}
	};

}

#endif	/* _TOKENIZER_H */

