IFEM/3rdparty/expreval/parser.cpp

// File:    parser.cpp
// Author:  Brian Vanderburg II
// Purpose: Parser object to help parse expression
//------------------------------------------------------------------------------


// Includes
#include <new>
#include <memory>
#include <cstdlib>

#include "autodiff/reverse/var/var.hpp"
#include "defs.h"
#include "parser.h"
#include "node.h"
#include "except.h"
#include "funclist.h"
#include "expr.h"

using namespace std;
using namespace ExprEval;

// Private functions to solve locale problems
// ------------------------------------------
namespace
{
    bool expreval_isalpha(char c)
    {
        return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
    }

    bool expreval_isdigit(char c)
    {
        return (c >= '0' && c <= '9');
    }

    bool expreval_isalnum(char c)
    {
        return expreval_isalpha(c) || expreval_isdigit(c);
    }

    bool expreval_isspace(char c)
    {
        return (c == ' ') || (c == '\t') || (c == '\r') || (c == '\n');
    }

/*    double expreval_atof(const char* str)
    {
        bool negative = false;
        long double value = 0.0;

        // Skip space
        while(expreval_isspace(*str))
            str++;

        // Check for sign
        if(*str == '-')
        {
            negative = true;
            str++;
        }
        else if(*str == '+')
        {
            str++;
        }

        // The part before the decimal
        while(*str >= '0' && *str <= '9')
        {
            value = (value * 10.0) + (double)(*str - '0');
            str++;
        }

        // Decimal, if any
        if(*str == '.')
        {
            long double divisor = 1.0;
            str++;

            // Part after the decimal, if any
            while(*str >= '0' && *str <= '9')
            {
                value = (value * 10.0) + (double)(*str - '0');
                divisor *= 10.0;
                str++;
            }

        value /= divisor;
        }

        // The result
        return negative ? -value : value;
    }
    */
};


// Token
//------------------------------------------------------------------------------

// Constructor
Token::Token(TokenType type, string::size_type start, string::size_type end) :
        m_type(type),
        m_start(start),
        m_end(end)
{
}

// Construct identifier token
Token::Token(const string &ident, string::size_type start, string::size_type end) :
        m_type(Token::TypeIdentifier),
        m_ident(ident),
        m_start(start),
        m_end(end)
{
}

// Construct value token
Token::Token(double value, string::size_type start, string::size_type end) :
        m_type(Token::TypeValue),
        m_value(value),
        m_start(start),
        m_end(end)
{
}

// Get type
Token::TokenType Token::GetType() const
{
    return m_type;
}

// Get identifier
const string &Token::GetIdentifier() const
{
    return m_ident;
}

// Get value
double Token::GetValue() const
{
    return m_value;
}

// Get start
string::size_type Token::GetStart() const
{
    return m_start;
}

string::size_type Token::GetEnd() const
{
    return m_end;
}

// Parser
//------------------------------------------------------------------------------

// Constructor
template<class Value>
Parser<Value>::Parser(Expression<Value>* expr) : m_expr(expr)
{
    if(expr == 0)
        throw(NullPointerException("Parser::Parser"));
}

// Destructor
template<class Value>
Parser<Value>::~Parser()
{
}

// Parse an expression string
template<class Value>
Node<Value> *Parser<Value>::Parse(const string &exstr)
{
    BuildTokens(exstr);

    // Make sure it is not still empty
    if(m_tokens.size() == 0)
        throw(EmptyExpressionException());

    // Parse the range
    return ParseRegion(0, m_tokens.size() - 1);
}

// Parse a region of tokens
template<class Value>
Node<Value> *Parser<Value>::ParseRegion(Parser::size_type start, Parser::size_type end)
{
    size_type pos;
    size_type fgopen = (size_type)-1;
    size_type fgclose = (size_type)-1;
    size_type assignindex = (size_type)-1;
    size_type addsubindex = (size_type)-1;
    size_type muldivindex = (size_type)-1;
    size_type posnegindex = (size_type)-1;
    size_type expindex = (size_type)-1;
    bool multiexpr = false;
    int plevel = 0;

    // Check simple syntax
    if(start > end)
        throw(SyntaxException());

    // Scan through tokens
    for(pos = start; pos <= end; pos++)
    {
        switch(m_tokens[pos].GetType())
        {
            case Token::TypeOpenParenthesis:
            {
                plevel++;

                // Opening of first group?
                if(plevel == 1 && fgopen == (size_type)-1)
                    fgopen = pos;

                break;
            };

            case Token::TypeCloseParenthesis:
            {
                plevel--;

                // First group closed?
                if(plevel == 0 && fgclose == (size_type)-1)
                    fgclose = pos;

                if(plevel < 0)
                {
                    UnmatchedParenthesisException e;

                    e.SetStart(m_tokens[pos].GetStart());
                    e.SetEnd(m_tokens[pos].GetEnd());
                    throw(e);
                }

                break;
            }

            case Token::TypeEqual:
            {
                if(plevel == 0)
                {
                    if(assignindex == (size_type)-1)
                        assignindex = pos;
                }

                break;
            }

            case Token::TypeAsterisk:
            case Token::TypeForwardSlash:
            {
                if(plevel == 0)
                {
                    muldivindex = pos;
                }

                break;
            }

            case Token::TypeHat:
            {
                if(plevel == 0)
                {
                    expindex = pos;
                }

                break;
            }

            case Token::TypePlus:
            case Token::TypeHyphen:
            {
                if(plevel == 0)
                {
                    if(pos == start)
                    {
                        // Positive or negative sign
                        if(posnegindex == (size_type)-1)
                            posnegindex = pos;
                    }
                    else
                    {
                        // What is before us
                        switch(m_tokens[pos - 1].GetType())
                        {
                            case Token::TypeEqual:
                            case Token::TypePlus:
                            case Token::TypeHyphen:
                            case Token::TypeAsterisk:
                            case Token::TypeForwardSlash:
                            case Token::TypeHat:
                                // After any of these, we are a positive/negative
                                if(posnegindex == (size_type)-1)
                                    posnegindex = pos;
                                break;

                            default:
                                // After any other, we are addition/subtration
                                addsubindex = pos;
                                break;
                        }
                    }
                }

                break;
            }

            case Token::TypeSemicolon:
            {
                if(plevel == 0)
                {
                    multiexpr = true;
                }

                break;
            }
            default:
                break;
        }
    }

    // plevel should be 0
    if(plevel != 0)
    {
        UnmatchedParenthesisException e;

        e.SetStart(end);
        e.SetEnd(end);
        throw(e);
    }

    // Parse in certain order to maintain order of operators

    // Multi-expression first
    if(multiexpr)
    {
        aptr(Node<Value>) n(new MultiNode<Value>(m_expr));
        n->Parse(*this, start, end);
        return n.release();
    }
    else if(assignindex != (size_type)-1)
    {
        // Assignment next
        aptr(Node<Value>) n(new AssignNode<Value>(m_expr));
        n->Parse(*this, start, end, assignindex);
        return n.release();
    }
    else if(addsubindex != (size_type)-1)
    {
        // Addition/subtraction next
        if(m_tokens[addsubindex].GetType() == Token::TypePlus)
        {
            // Addition
            aptr(Node<Value>) n(new AddNode<Value>(m_expr));
            n->Parse(*this, start, end, addsubindex);
            return n.release();
        }
        else
        {
            // Subtraction
            aptr(Node<Value>) n(new SubtractNode<Value>(m_expr));
            n->Parse(*this, start, end, addsubindex);
            return n.release();
        }
    }
    else if(muldivindex != (size_type)-1)
    {
        // Multiplication/division next

        if(m_tokens[muldivindex].GetType() == Token::TypeAsterisk)
        {
            // Multiplication
            aptr(Node<Value>) n(new MultiplyNode<Value>(m_expr));
            n->Parse(*this, start, end, muldivindex);
            return n.release();
        }
        else
        {
            // Division
            aptr(Node<Value>) n(new DivideNode<Value>(m_expr));
            n->Parse(*this, start, end, muldivindex);
            return n.release();
        }
    }
    else if(posnegindex == start)
    {
        // Positive/negative next, must be at start and check before exponent
        if(m_tokens[posnegindex].GetType() == Token::TypePlus)
        {
            // Positive
            return ParseRegion(posnegindex + 1, end);
        }
        else
        {
            aptr(Node<Value>) n(new NegateNode<Value>(m_expr));
            n->Parse(*this, start, end, posnegindex);
            return n.release();
        }
    }
    else if(expindex != (size_type)-1)
    {
        // Exponent
        aptr(Node<Value>) n(new ExponentNode<Value>(m_expr));
        n->Parse(*this, start, end, expindex);
        return n.release();
    }
    else if(posnegindex != (size_type)-1)
    {
        // Check pos/neg again.  After testing for exponent, a pos/neg
        // at plevel 0 is syntax error
        SyntaxException e;

        e.SetStart(m_tokens[posnegindex].GetStart());
        e.SetEnd(m_tokens[posnegindex].GetEnd());
        throw(e);
    }
    else if(fgopen == start)
    {
        // Group parenthesis, make sure something in between them
        if(fgclose == end && fgclose > fgopen + 1)
        {
            return ParseRegion(fgopen + 1, fgclose - 1);
        }
        else
        {
            SyntaxException e;

            e.SetStart(m_tokens[fgopen].GetStart());
            if(fgclose == (size_type)-1)
                e.SetEnd(m_tokens[fgopen].GetEnd());
            else
                e.SetEnd(m_tokens[fgclose].GetEnd());

            throw(e);
        }
    }
    else if(fgopen == start + 1)
    {
        // Function
        if(fgclose == end)
        {
            // Find function list
            FunctionList<Value> *flist = m_expr->GetFunctionList();

            if(flist == 0)
            {
                NoFunctionListException e;

                e.SetStart(m_tokens[start].GetStart());
                e.SetEnd(m_tokens[start].GetEnd());
                throw(e);
            }

            // Get name
            string ident = m_tokens[start].GetIdentifier();

            // Create function node
            aptr(FunctionNode<Value>) n(flist->Create(ident, m_expr));

            if(n.get())
            {
                n->Parse(*this, fgopen, fgclose);
            }
            else
            {
                NotFoundException e(ident);

                e.SetStart(m_tokens[start].GetStart());
                e.SetEnd(m_tokens[start].GetEnd());
                throw(e);
            }

            return n.release();
        }
        else
        {
            SyntaxException e;

            e.SetStart(m_tokens[fgopen].GetStart());
            if(fgclose == (size_type)-1)
                e.SetEnd(m_tokens[fgopen].GetEnd());
            else
                e.SetEnd(m_tokens[fgclose].GetEnd());

            throw(e);
        }
    }
    else if(start == end)
    {
        // Value, variable, or constant

        if(m_tokens[start].GetType() == Token::TypeIdentifier)
        {
            // Variable/constant
            aptr(Node<Value>) n(new VariableNode<Value>(m_expr));
            n->Parse(*this, start, end);
            return n.release();
        }
        else
        {
            // Value
            aptr(Node<Value>) n(new ValueNode<Value>(m_expr));
            n->Parse(*this, start, end);
            return n.release();
        }
    }
    else
    {
        // Unknown, syntax
        SyntaxException e;

        e.SetStart(m_tokens[pos].GetStart());
        e.SetEnd(m_tokens[pos].GetEnd());

        throw(e);
    }
}

// Get a token
template<class Value>
const Token &Parser<Value>::operator[] (Parser::size_type pos) const
{
    return m_tokens[pos];
}

// Build tokens
template<class Value>
void Parser<Value>::BuildTokens(const string &exstr)
{
    m_tokens.clear();

    // Test zero-length expression
    if(exstr.length() == 0)
    {
        throw(EmptyExpressionException());
    }

    // Search through list
    string::size_type pos;
    bool comment = false;

    for(pos = 0; pos < exstr.length(); pos++)
    {
        // Take action based on character
        switch(exstr[pos])
        {
            // Comment
            case '#':
            {
                comment = true;

                break;
            }

            // Newline ends comment
            case '\r':
            case '\n':
            {
                comment = false;

                break;
            }

            // Open parenthesis
            case '(':
            {
                if(comment == false)
                {
                    m_tokens.push_back(Token(Token::TypeOpenParenthesis, pos, pos));
                }
                break;
            }

            // Close parenthesis
            case ')':
            {
                if(comment == false)
                {
                    m_tokens.push_back(Token(Token::TypeCloseParenthesis, pos, pos));
                }
                break;
            }

            // Equal
            case '=':
            {
                if(comment == false)
                {
                    m_tokens.push_back(Token(Token::TypeEqual, pos, pos));
                }
                break;
            }

            // Plus
            case '+':
            {
                if(comment == false)
                {
                    m_tokens.push_back(Token(Token::TypePlus, pos, pos));
                }
                break;
            }

            // Hyphen
            case '-':
            {
                if(comment == false)
                {
                    m_tokens.push_back(Token(Token::TypeHyphen, pos, pos));
                }
                break;
            }

            // Asterisk
            case '*':
            {
                if(comment == false)
                {
                    m_tokens.push_back(Token(Token::TypeAsterisk, pos, pos));
                }
                break;
            }

            // Forward slash
            case '/':
            {
                if(comment == false)
                {
                    m_tokens.push_back(Token(Token::TypeForwardSlash, pos, pos));
                }
                break;
            }

            // Hat (exponent)
            case '^':
            {
                if(comment == false)
                {
                    m_tokens.push_back(Token(Token::TypeHat, pos, pos));
                }
                break;
            }

            // Ampersand
            case '&':
            {
                if(comment == false)
                {
                    m_tokens.push_back(Token(Token::TypeAmpersand, pos, pos));
                }
                break;
            }

            // Comma
            case ',':
            {
                if(comment == false)
                {
                    m_tokens.push_back(Token(Token::TypeComma, pos, pos));
                }
                break;
            }

            // Semicolon
            case ';':
            {
                if(comment == false)
                {
                    m_tokens.push_back(Token(Token::TypeSemicolon, pos, pos));
                }
                break;
            }

            // None of the above, but it may be an identifier or value
            default:
            {
                if(comment == false)
                {
                    // First, test for value
                    if(exstr[pos] == '.' || expreval_isdigit(exstr[pos]))
                    {
                        // We are a value
                        string::size_type start = pos;

                        // Digits before period
                        while(expreval_isdigit(exstr[pos]))
                            pos++;

                        // Period
                        if(exstr[pos] == '.')
                            pos++;

                        // Digits after period
                        while(expreval_isdigit(exstr[pos]) ||
                              toupper(exstr[pos]) == 'E' ||
                              ((exstr[pos] == '+' || exstr[pos] == '-') &&
                               toupper(exstr[pos-1]) =='E'))
                            pos++;

                        // Create token
                        string ident = exstr.substr(start, pos - start);
                        m_tokens.push_back(Token(atof(ident.c_str()), start, pos - 1));

                        // Move pos back so pos++ will set it right
                        pos--;
                    }
                    else if(exstr[pos] == '_' || expreval_isalpha(exstr[pos]))
                    {
                        // We are an identifier
                        string::size_type start = pos;
                        bool foundname = true; // Found name part

                        // Search for name, then period, etc
                        // An identifier can be multiple parts.  Each part
                        // is formed as an identifier and seperated by a period,
                        // An identifier can not end in a period
                        //
                        // color1.red : 1 identifier token
                        // color1. : color1 is identifier, . begins new token
                        // color1.1red : Not value (part 2 is not right)
                        while(foundname)
                        {
                            // Part before period
                            while(exstr[pos] == '_' || expreval_isalnum(exstr[pos]))
                                pos++;

                            // Is there a period
                            if(exstr[pos] == '.')
                            {
                                pos++;

                                // There is a period, look for the name again
                                if(exstr[pos] == '_' || expreval_isalpha(exstr[pos]))
                                {
                                    foundname = true;
                                }
                                else
                                {
                                    // No name after period
                                    foundname = false;

                                    // Remove period from identifier
                                    pos--;
                                }
                            }
                            else
                            {
                                // No period after name, so no new name
                                foundname = false;
                            }
                        }

                        // Create token
                        m_tokens.push_back(Token(exstr.substr(start, pos - start), start, pos - 1));

                        // Move pos back so pos++ will set it right
                        pos--;
                    }
                    else if(expreval_isspace(exstr[pos]))
                    {
                        // Do nothing, just ignore white space, but it still
                        // seperates tokens
                    }
                    else
                    {
                        // Unknown token
                        UnknownTokenException e;
                        e.SetStart(pos);
                        e.SetEnd(pos);

                        throw(e);
                    }
                }
                break;
            }
        }
    }
}


namespace ExprEval {
template class Parser<double>;
template class Parser<autodiff::var>;
}