import java.util.Vector;
import rebelsky.compiler.lexer.Token;
import rebelsky.compiler.lexer.TokenStream;
import rebelsky.compiler.misc.Symbol;
import rebelsky.compiler.parser.Node;
import rebelsky.compiler.parser.Parser;
import rebelsky.compiler.parser.ParseException;
import rebelsky.compiler.pascal.PascalIdentifier;
import rebelsky.compiler.pascal.PascalInteger;
import rebelsky.compiler.pascal.PascalNonterminals;
import rebelsky.compiler.pascal.PascalReal;
import rebelsky.compiler.pascal.PascalString;
import rebelsky.compiler.pascal.PascalTokenizer;
import rebelsky.compiler.pascal.PascalTokens;

/**
 * A simple parser for Stupid, Sam's Totally Unintelligble
 * Programming Instruction Doohicky.  Programs in Stupid
 * consist of variable declarations, function declarations,
 * and a body.
 *
 * Here's a simple BNF grammar for the language with the nonterminals
 * presented in alphabetical order for easy reference.  program is the
 * start symbol.
 *
 *   adding_operator ::= PLUS
 *   adding_operator ::= MINUS
 *   adding_operator ::= OR
 *
 *   assignment ::= variable ASSIGN expression
 *
 *   conditional ::= IF expression THEN statement ELSE statement
 *
 *   expression ::= string
 *   expression ::= expression relational_operator expression
 *   expression ::= simple_expression
 *
 *   factor ::= ID
 *   factor ::= INTEGER
 *   factor ::= OPENPAREN expression CLOSEPAREN
 *   factor ::= ID OPENPAREN expression CLOSEPAREN
 *
 *   function_declaration ::= FUNCTION ID OPENPAREN ID COLON ID CLOSEPAREN
 *                            COLON ID compound_statement SEMI
 *
 *   function_declarations ::= { function_declaration }
 *
 *   multiplying_operator ::= TIMES
 *   multiplying_operator ::= DIVIDE
 *   multiplying_operator ::= DIV
 *   multiplying_operator ::= MOD
 *   multiplying_operator ::= AND
 *
 *   program ::= variable_declarations function_declarations statement_list
 *
 *   relational_operator ::= EQUALS
 *   relational_operator ::= NOTEQUALS
 *   relational_operator ::= LESSTHAN
 *   relational_operator ::= LESSEQUAL
 *   relational_operator ::= GREATERTHAN
 *   relational_operator ::= GREATEREQUAL
 *
 *   read ::= READ OPENPAREN ID CLOSEPAREN
 *
 *   simple_expression ::= term { adding_operator term }
 *
 *   statement ::= assignment
 *   statement ::= conditional
 *   statement ::= empty_statement
 *   statement ::= read_statement
 *   statement ::= while_loop
 *   statement ::= write_statement
 *   statement ::= statement_list
 *
 *   statement_list ::= BEGIN statement SEMI { statement SEMI } END
 *
 *   term ::= factor { multiplying_operator factor }
 *
 *   unsigned_number ::= UINT
 *   unsigned_number ::= UREAL
 *
 *   variable ::= ID
 *
 *   variable_declaration ::= ID { COMMA ID } TCOLON ID
 *
 *   variable_declarations ::= VAR variable_declaration SEMI
 *                             { variable_declaration SEMI }
 *   variable_declarations ::= epsilon
 *
 *   while_loop ::= WHILE expression DO statement
 *
 *   write ::= WRITE OPENPAREN expression CLOSEPAREN
 *    
 * @author Samuel A. Rebelsky
 * @version 1.0 of December 2002
 */
public class StupidParser
  implements Parser
{
  // +----------------------+----------------------------------------------
  // | Implementation Notes |
  // +----------------------+
/*
  I've implemented the language with a simple predictive parser.

  The parse tree I generate does not include many of the useless nodes
  (e.g., the terminals used only for parsing; terms, factors, and any 
  other nonterminals used for disambiguating; nonterminals whose primary
  purpose is to build lists).

  For nonterminals, such as variable_declarations, that can have a
  sequence of "children", I make only one node with lots of children,
  rather than a sequence of nodes.  Hence, the arity of something
  like variable_declarations is the number of declarations.
 */

  // +--------+------------------------------------------------------------
  // | Fields |
  // +--------+

  // +--------------+------------------------------------------------------
  // | Constructors |
  // +--------------+

  // +---------+-----------------------------------------------------------
  // | Methods |
  // +---------+

  /**
   * Parse a stream of tokens, advancing that stream as necessary.
   *
   * @exception ParseException
   *   If parsing fails.
   */
  public Node parse(TokenStream tokens)
    throws ParseException
  {
    if (!(tokens instanceof PascalTokenizer))
      boom("Can only parse with Pascal tokens.");
    try {
      return parseProgram((PascalTokenizer) tokens);
    }
    // The following few lines are something of a hack.  I
    // pass along parse exceptions and turn other exceptions
    // (e.g., TokenExceptions) to ParseExceptions.
    catch (ParseException e) { throw e; }
    catch (Exception e) { throw new ParseException(e.toString()); }
  } // parse(TokenStream)


  // +------------------------+--------------------------------------------
  // | Private Helper Methods |
  // +------------------------+

  /**
   * My favorite way of reporting errors.  Essentially, a shorthand
   * for "throw new ParseException(...)".
   */
  private void boom(String message)
    throws ParseException
  {
    throw new ParseException(message);
  } // boom(String)

  /**
   * Consume a particular token.
   */
  private void consume(Token desired, PascalTokenizer tokens)
    throws ParseException,Exception
  {
    if (tokens.peek() == desired)
      tokens.next();
    else
      wrongToken(desired, tokens.peek());
  } // consume(Token,PascalTokenizer)
  
  /**
   * Consume an identifier from a token stream or throw an exception.
   */
  private Node getID(PascalTokenizer tokens) 
    throws ParseException,Exception
  {
    if (tokens.peek() instanceof PascalIdentifier)
      return new Node(tokens.next(), null);
    else
      throw new ParseException("Expected IDENTIFIER, found " 
                               + tokens.peek().toString());
 } // getID(PascalTokenizer)
  
  /**
   * Determine if a token is an additive operator.
   */
  private boolean isAddOp(Token tok) {
    return ( (tok == PascalTokens.TPLUS) 
             || (tok == PascalTokens.TMINUS) 
             || (tok == PascalTokens.TOR) );
  } // isAddOp(Token)

  /**
   * Determine if a token is a multiplicative operator.
   */
  private boolean isMulOp(Token tok) {
    return ( (tok == PascalTokens.TTIMES) 
             || (tok == PascalTokens.TDIVIDE) 
             || (tok == PascalTokens.TDIV) 
             || (tok == PascalTokens.TMOD) 
             || (tok == PascalTokens.TAND) );
  } // isMulOp(Token)

  /**
   * Determine if a token is a relational operator.
   */
  private boolean isRelOp(Token tok) {
    return ( (tok == PascalTokens.TEQUALS) 
             || (tok == PascalTokens.TNOTEQUALS) 
             || (tok == PascalTokens.TLESSTHAN) 
             || (tok == PascalTokens.TLESSEQ) 
             || (tok == PascalTokens.TGREATERTHAN) 
             || (tok == PascalTokens.TGREATEREQ) );
  } // isRelOp(Token)

  /**
   * Give up because the wrong token was found when expecting
   * something particular (a nonterminal or a terminal).
   */
  private Node wrongToken(Symbol expected, Token found)
    throws ParseException
  {
    throw new ParseException("Expected: " + expected.toString() + 
                             "; Found: " + found.toString());
  } // wrongToken(Symbol, Token)


  // +-----------------------+---------------------------------------------
  // | Private Parse Methods |
  // +-----------------------+

  /*
     Each of these procedures throws a ParseException for obvious
     reasons (that is, parsing failed).  Each can also throw a
     host of other exceptions, typically when tokenizing fails.
   */

  private Node parseAddingOperator(PascalTokenizer tokens) 
    throws ParseException,Exception
  {
    Token tok = tokens.peek();
    if (isAddOp(tokens.peek())) {
      return new Node(tokens.next(), null);
    }
    else {
      return wrongToken(PascalNonterminals.ADDING_OPERATOR, tok);
    }
  }  // parseAddingOperator(PascalTokenizer)

  private Node parseAssignment(PascalTokenizer tokens)
    throws ParseException,Exception
  {
    Vector children = new Vector();
    // Parse the variable
    children.add(parseVariable(tokens));
    // Check for the assignment symbol
    if (tokens.peek() != PascalTokens.TASSIGN)
      return wrongToken(PascalNonterminals.ASSIGNMENT, tokens.peek());
    tokens.next();
    // Parse the expression
    children.add(parseExpression(tokens));
    // Build and return the assignment statement 
    return new Node(PascalNonterminals.ASSIGNMENT, children);
  } // parseAssignment(PascalTokenizer)

  private Node parseCompoundStatement(PascalTokenizer tokens)
    throws ParseException,Exception
  {
    // Sanity check: Compound statements begin with begin.
    if (tokens.peek() != PascalTokens.TBEGIN) 
      return wrongToken(PascalNonterminals.COMPOUND_STATEMENT, tokens.peek());
    // Skip over the begin.
    tokens.next();
    // Prepare to build a list of statements.
    Vector statements = new Vector();
    // Keep reading statements until we hit the end of the compound statement.
    while (tokens.peek() != PascalTokens.TEND) {
      statements.add(parseStatement(tokens));
      consume(PascalTokens.TSEMI, tokens);
    } // while
    // Sanity check: Do we have the end symbol?
    if (tokens.peek() != PascalTokens.TEND)
      return wrongToken(PascalTokens.TEND, tokens.peek());
    // Read over that symbol
    tokens.next();
    // Build the nice new list
    return new Node(PascalNonterminals.COMPOUND_STATEMENT, statements);
  } // parseCompoundStatement(PascalTokenizer)
  
  private Node parseConditional(PascalTokenizer tokens)
    throws ParseException,Exception
  {
    Vector children = new Vector();
    consume(PascalTokens.TIF, tokens);
    children.add(parseExpression(tokens));
    consume(PascalTokens.TTHEN, tokens);
    children.add(parseStatement(tokens));
    consume(PascalTokens.TELSE, tokens);
    children.add(parseStatement(tokens));
    return new Node(PascalNonterminals.CONDITIONAL, children);
  } // parseConditional(PascalTokenizer)

  private Node parseExpression(PascalTokenizer tokens)
    throws ParseException,Exception
  {
    // Special case: Strings
    if (tokens.peek() instanceof PascalString)
      return new Node(tokens.next(), null);
    // Other expressions need to begin with simple expressions
    Node primary = parseSimpleExpression(tokens);
    // Expressions can sometimes have relational operators
    if (isRelOp(tokens.peek())) {
      Vector children = new Vector();
      children.add(primary);
      children.add(parseRelationalOperator(tokens));
      children.add(parseSimpleExpression(tokens));
      return new Node(PascalNonterminals.EXPRESSION, children);
    }
    // If there's no relational operator, stick with the current
    // expression
    else {
      return primary;
    }
  } // parseExpression(PascalTokenizer)

  private Node parseFactor(PascalTokenizer tokens)
    throws ParseException,Exception
  {
    Token tok = tokens.peek();
    if ( (tok instanceof PascalInteger) || (tok instanceof PascalReal) ) {
      return new Node(tokens.next(), null);
    }
    else if (tok instanceof PascalIdentifier) {
      if (tokens.peek() == PascalTokens.TOPENPAREN) {
        Vector children = new Vector();
        children.add(getID(tokens));
        consume(PascalTokens.TOPENPAREN, tokens);
        children.add(parseExpression(tokens));
        consume(PascalTokens.TCLOSEPAREN, tokens);
        return new Node(PascalNonterminals.FUNCTION_CALL, children);
      } // function call
      else {
        return new Node(tokens.next(), null);
      } // just an identifier
    } // things that start with identifiers
    else if (tok == PascalTokens.TOPENPAREN) {
      // Consume the open paren
      tokens.next();
      // Get the subexpression
      Node result = parseExpression(tokens);
      // Make sure the end paren is there.
      consume(PascalTokens.TCLOSEPAREN, tokens);
      // Don't forget to return the intermediate result
      return result;
    } // OPENPAREN
    else {
      // Whoops, something's wrong
      return wrongToken(PascalNonterminals.FACTOR, tokens.peek());
    }
  } // parseFactor(PascalTokenizer)

  // Returns FUNCTION_DECLARATION node with children
  //   0: Function name (identifier)
  //   1: Parameter name (identifier)
  //   2: Parameter type (identifier)
  //   3: Return type (identifier)
  //   4: Body (compound_statement)
  private Node parseFunctionDeclaration(PascalTokenizer tokens)
    throws ParseException,Exception
  {
    Vector children = new Vector();

    consume(PascalTokens.TFUNCTION, tokens);
    children.add(getID(tokens));
    consume(PascalTokens.TOPENPAREN, tokens);
    children.add(getID(tokens));
    consume(PascalTokens.TCOLON, tokens);
    children.add(getID(tokens));
    consume(PascalTokens.TCLOSEPAREN, tokens);
    consume(PascalTokens.TCOLON, tokens);
    children.add(getID(tokens));
    children.add(parseCompoundStatement(tokens));
    consume(PascalTokens.TSEMI, tokens);
    
    return new Node(PascalNonterminals.FUNCTION_DECLARATION, children);
  } // parseFunctionDeclaration(PascalTokenizer)
  
  private Node parseFunctionDeclarations(PascalTokenizer tokens)
    throws ParseException,Exception
  {
    Vector functions = new Vector();
    while (tokens.peek() == PascalTokens.TFUNCTION) 
      functions.add(parseFunctionDeclaration(tokens));
    return new Node(PascalNonterminals.FUNCTION_DECLARATIONS, functions);
  } // parseFunctionDeclarations(PascalTokenizer)
  
  private Node parseMultiplyingOperator(PascalTokenizer tokens) 
    throws ParseException,Exception
  {
    if (isMulOp(tokens.peek())) 
      return new Node(tokens.next(), null);
    else 
      return wrongToken(PascalNonterminals.MULTIPLYING_OPERATOR, tokens.peek());
  }  // parseMultiplyingOperator(PascalTokenizer)

  private Node parseProgram(PascalTokenizer tokens)
    throws ParseException,Exception
  {
    Vector children = new Vector();
    children.add(parseVariableDeclarations(tokens));
    children.add(parseFunctionDeclarations(tokens));
    children.add(parseCompoundStatement(tokens));
    return new Node(PascalNonterminals.PROGRAM, children);
  } // parseProgram(PascalTokenizer)

  
  private Node parseReadStatement(PascalTokenizer tokens)
    throws ParseException,Exception
  {
    Vector children = new Vector();
    consume(StupidTokens.TREAD, tokens);
    children.add(new Node(StupidTokens.TREAD, null));
    consume(PascalTokens.TOPENPAREN, tokens);
    children.add(parseExpression(tokens));
    consume(PascalTokens.TCLOSEPAREN, tokens);
    return new Node(PascalNonterminals.PROCEDURE_CALL, children);
  } // parseReadStatement(PascalTokenizer)

  private Node parseRelationalOperator(PascalTokenizer tokens) 
    throws ParseException,Exception
  {
    if (isRelOp(tokens.peek())) 
      return new Node(tokens.next(), null);
    else 
      return wrongToken(PascalNonterminals.RELATIONAL_OPERATOR, tokens.peek());
  }  // parseRelationalOperator(PascalTokenizer)

  private Node parseSimpleExpression(PascalTokenizer tokens) 
    throws ParseException,Exception
  {
    // Every expression begins with a term, so get it.
    Node result = parseTerm(tokens);
    // If the next symbol is an adding operator, there should be
    // that operator and another term, which together make a
    // a compound simple expression.
    while (isAddOp(tokens.peek())) {
      // Three part expression, so build the children
      Vector children = new Vector();
      children.add(result);
      children.add(parseAddingOperator(tokens));
      children.add(parseTerm(tokens));
      // Update the result
      result = new Node(PascalNonterminals.EXPRESSION, children);
    } // while
    // That's it, we're done.
    return result;
  } // parseSimpleExpression(PascalTokenizer)

  private Node parseStatement(PascalTokenizer tokens)
    throws ParseException,Exception
  {
    Token tok = tokens.peek();
    if (tok instanceof PascalIdentifier) {
      return parseAssignment(tokens);
    }
    else if (tok == PascalTokens.TBEGIN) {
      return parseCompoundStatement(tokens);
    }
    else if (tok == PascalTokens.TIF) {
      return parseConditional(tokens);
    }
    else if (tok == PascalTokens.TSEMI) {
      tokens.next();
      return null;
    }
    else if (tok == StupidTokens.TREAD) {
      return parseReadStatement(tokens);
    }
    else if (tok == PascalTokens.TWHILE) {
      return parseWhileLoop(tokens);
    }
    else if (tok == StupidTokens.TWRITE) {
      return parseWriteStatement(tokens);
    }
    else
      return wrongToken(PascalNonterminals.STATEMENT, tok);
  } // parseStatement(PascalTokenizer)
  
  private Node parseTerm(PascalTokenizer tokens) 
    throws ParseException,Exception
  {
    // Every term begins with a factor, so get it.
    Node result = parseFactor(tokens);
    // If the next symbol is a multiplying operator, there should be
    // the operator and another factor, which together make a 
    // compound term.
    while (isMulOp(tokens.peek())) {
      // Three part expression, so build the children
      Vector children = new Vector();
      children.add(result);
      children.add(parseMultiplyingOperator(tokens));
      children.add(parseFactor(tokens));
      // Update the result
      result = new Node(PascalNonterminals.EXPRESSION, children);
    } // while
    // That's it, we're done.
    return result;
  } // parseTerm(PascalTokenizer)

  private Node parseVariable(PascalTokenizer tokens)
    throws ParseException,Exception
  {
    // Take the next identifier and shove it in a node.
    if (tokens.peek() instanceof PascalIdentifier)
      return new Node(tokens.next(), null);
    else
      return wrongToken(PascalNonterminals.VARIABLE, tokens.peek());
  } // parseVariable(PascalTokenizer)

  private Node parseVariableDeclaration(PascalTokenizer tokens)
    throws ParseException,Exception
  {
    Vector children = new Vector();
    // Make sure we start with an identifier
    if (!(tokens.peek() instanceof PascalIdentifier))
      return wrongToken(PascalNonterminals.VARIABLE_DECLARATION, tokens.peek());
    // Add the identifier
    children.add(new Node(tokens.next(), null));
    // Are there more?
    while (tokens.peek() == PascalTokens.TCOMMA) {
      tokens.next();
      // Make sure we start with an identifier
      if (!(tokens.peek() instanceof PascalIdentifier))
        return wrongToken(PascalNonterminals.VARIABLE_DECLARATION, tokens.peek());
      // Add the identifier
      children.add(new Node(tokens.next(), null));
    }
    // Ready for the colon
    if (tokens.peek() != PascalTokens.TCOLON) 
      return wrongToken(PascalTokens.TCOLON, tokens.peek());
    tokens.next();
    // Ready for the type
    if (!(tokens.peek() instanceof PascalIdentifier))
      return wrongToken(PascalNonterminals.VARIABLE_DECLARATION, tokens.peek());
    children.add(new Node(tokens.next(), null));
    // We're done
    return new Node(PascalNonterminals.VARIABLE_DECLARATION, children);
  } // parseVariableDeclaration(PascalTokenizer)

  private Node parseVariableDeclarations(PascalTokenizer tokens)
    throws ParseException,Exception
  {
    Vector children = new Vector();
    // Variable declarations are not required, so check for empty declarations.
    if (tokens.peek() != PascalTokens.TVAR)
      return new Node(PascalNonterminals.VARIABLE_DECLARATIONS, null);
    // Skip over the VARIABLE token.
    tokens.next();
    // Keep reading the variable declarations until we run out.
    while (tokens.peek() instanceof PascalIdentifier) {
      children.add(parseVariableDeclaration(tokens));
      if (tokens.peek() != PascalTokens.TSEMI)
        return wrongToken(PascalTokens.TSEMI, tokens.peek());
      tokens.next();
    } // while
    return new Node(PascalNonterminals.VARIABLE_DECLARATIONS, children);
  } // parseVariableDeclarations(PascalTokenizer)

  private Node parseWhileLoop(PascalTokenizer tokens)
    throws ParseException,Exception
  {
    Vector children = new Vector();
    consume(PascalTokens.TWHILE, tokens);
    children.add(parseExpression(tokens));
    consume(PascalTokens.TDO, tokens);
    children.add(parseStatement(tokens));
    return new Node(PascalNonterminals.WHILE_LOOP, children);
  } // parseWhileLoop(PascalTokenizer)
  
  private Node parseWriteStatement(PascalTokenizer tokens)
    throws ParseException,Exception
  {
    Vector children = new Vector();
    consume(StupidTokens.TWRITE, tokens);
    children.add(new Node(StupidTokens.TWRITE, null));
    consume(PascalTokens.TOPENPAREN, tokens);
    children.add(parseExpression(tokens));
    consume(PascalTokens.TCLOSEPAREN, tokens);
    return new Node(PascalNonterminals.PROCEDURE_CALL, children);
  } // parseWriteStatement(PascalTokenizer)
} // class StupidParser

