Files
IFEM/3rdparty/expreval/parser.cpp
Arne Morten Kvarving 327a855d1b expreval: make all classes a template over a scalar
instance for double and autodiff::Variable<double>
2023-10-11 08:52:55 +02:00

790 lines
22 KiB
C++

// File: parser.cpp
// Author: Brian Vanderburg II
// Purpose: Parser object to help parse expression
//------------------------------------------------------------------------------
// Includes
#include <new>
#include <memory>
#include <cstdlib>
#include "autodiff/reverse/var/var.hpp"
#include "defs.h"
#include "parser.h"
#include "node.h"
#include "except.h"
#include "funclist.h"
#include "expr.h"
using namespace std;
using namespace ExprEval;
// Private functions to solve locale problems
// ------------------------------------------
namespace
{
bool expreval_isalpha(char c)
{
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
bool expreval_isdigit(char c)
{
return (c >= '0' && c <= '9');
}
bool expreval_isalnum(char c)
{
return expreval_isalpha(c) || expreval_isdigit(c);
}
bool expreval_isspace(char c)
{
return (c == ' ') || (c == '\t') || (c == '\r') || (c == '\n');
}
/* double expreval_atof(const char* str)
{
bool negative = false;
long double value = 0.0;
// Skip space
while(expreval_isspace(*str))
str++;
// Check for sign
if(*str == '-')
{
negative = true;
str++;
}
else if(*str == '+')
{
str++;
}
// The part before the decimal
while(*str >= '0' && *str <= '9')
{
value = (value * 10.0) + (double)(*str - '0');
str++;
}
// Decimal, if any
if(*str == '.')
{
long double divisor = 1.0;
str++;
// Part after the decimal, if any
while(*str >= '0' && *str <= '9')
{
value = (value * 10.0) + (double)(*str - '0');
divisor *= 10.0;
str++;
}
value /= divisor;
}
// The result
return negative ? -value : value;
}
*/
};
// Token
//------------------------------------------------------------------------------
// Constructor
Token::Token(TokenType type, string::size_type start, string::size_type end) :
m_type(type),
m_start(start),
m_end(end)
{
}
// Construct identifier token
Token::Token(const string &ident, string::size_type start, string::size_type end) :
m_type(Token::TypeIdentifier),
m_ident(ident),
m_start(start),
m_end(end)
{
}
// Construct value token
Token::Token(double value, string::size_type start, string::size_type end) :
m_type(Token::TypeValue),
m_value(value),
m_start(start),
m_end(end)
{
}
// Get type
Token::TokenType Token::GetType() const
{
return m_type;
}
// Get identifier
const string &Token::GetIdentifier() const
{
return m_ident;
}
// Get value
double Token::GetValue() const
{
return m_value;
}
// Get start
string::size_type Token::GetStart() const
{
return m_start;
}
string::size_type Token::GetEnd() const
{
return m_end;
}
// Parser
//------------------------------------------------------------------------------
// Constructor
template<class Value>
Parser<Value>::Parser(Expression<Value>* expr) : m_expr(expr)
{
if(expr == 0)
throw(NullPointerException("Parser::Parser"));
}
// Destructor
template<class Value>
Parser<Value>::~Parser()
{
}
// Parse an expression string
template<class Value>
Node<Value> *Parser<Value>::Parse(const string &exstr)
{
BuildTokens(exstr);
// Make sure it is not still empty
if(m_tokens.size() == 0)
throw(EmptyExpressionException());
// Parse the range
return ParseRegion(0, m_tokens.size() - 1);
}
// Parse a region of tokens
template<class Value>
Node<Value> *Parser<Value>::ParseRegion(Parser::size_type start, Parser::size_type end)
{
size_type pos;
size_type fgopen = (size_type)-1;
size_type fgclose = (size_type)-1;
size_type assignindex = (size_type)-1;
size_type addsubindex = (size_type)-1;
size_type muldivindex = (size_type)-1;
size_type posnegindex = (size_type)-1;
size_type expindex = (size_type)-1;
bool multiexpr = false;
int plevel = 0;
// Check simple syntax
if(start > end)
throw(SyntaxException());
// Scan through tokens
for(pos = start; pos <= end; pos++)
{
switch(m_tokens[pos].GetType())
{
case Token::TypeOpenParenthesis:
{
plevel++;
// Opening of first group?
if(plevel == 1 && fgopen == (size_type)-1)
fgopen = pos;
break;
};
case Token::TypeCloseParenthesis:
{
plevel--;
// First group closed?
if(plevel == 0 && fgclose == (size_type)-1)
fgclose = pos;
if(plevel < 0)
{
UnmatchedParenthesisException e;
e.SetStart(m_tokens[pos].GetStart());
e.SetEnd(m_tokens[pos].GetEnd());
throw(e);
}
break;
}
case Token::TypeEqual:
{
if(plevel == 0)
{
if(assignindex == (size_type)-1)
assignindex = pos;
}
break;
}
case Token::TypeAsterisk:
case Token::TypeForwardSlash:
{
if(plevel == 0)
{
muldivindex = pos;
}
break;
}
case Token::TypeHat:
{
if(plevel == 0)
{
expindex = pos;
}
break;
}
case Token::TypePlus:
case Token::TypeHyphen:
{
if(plevel == 0)
{
if(pos == start)
{
// Positive or negative sign
if(posnegindex == (size_type)-1)
posnegindex = pos;
}
else
{
// What is before us
switch(m_tokens[pos - 1].GetType())
{
case Token::TypeEqual:
case Token::TypePlus:
case Token::TypeHyphen:
case Token::TypeAsterisk:
case Token::TypeForwardSlash:
case Token::TypeHat:
// After any of these, we are a positive/negative
if(posnegindex == (size_type)-1)
posnegindex = pos;
break;
default:
// After any other, we are addition/subtration
addsubindex = pos;
break;
}
}
}
break;
}
case Token::TypeSemicolon:
{
if(plevel == 0)
{
multiexpr = true;
}
break;
}
default:
break;
}
}
// plevel should be 0
if(plevel != 0)
{
UnmatchedParenthesisException e;
e.SetStart(end);
e.SetEnd(end);
throw(e);
}
// Parse in certain order to maintain order of operators
// Multi-expression first
if(multiexpr)
{
aptr(Node<Value>) n(new MultiNode<Value>(m_expr));
n->Parse(*this, start, end);
return n.release();
}
else if(assignindex != (size_type)-1)
{
// Assignment next
aptr(Node<Value>) n(new AssignNode<Value>(m_expr));
n->Parse(*this, start, end, assignindex);
return n.release();
}
else if(addsubindex != (size_type)-1)
{
// Addition/subtraction next
if(m_tokens[addsubindex].GetType() == Token::TypePlus)
{
// Addition
aptr(Node<Value>) n(new AddNode<Value>(m_expr));
n->Parse(*this, start, end, addsubindex);
return n.release();
}
else
{
// Subtraction
aptr(Node<Value>) n(new SubtractNode<Value>(m_expr));
n->Parse(*this, start, end, addsubindex);
return n.release();
}
}
else if(muldivindex != (size_type)-1)
{
// Multiplication/division next
if(m_tokens[muldivindex].GetType() == Token::TypeAsterisk)
{
// Multiplication
aptr(Node<Value>) n(new MultiplyNode<Value>(m_expr));
n->Parse(*this, start, end, muldivindex);
return n.release();
}
else
{
// Division
aptr(Node<Value>) n(new DivideNode<Value>(m_expr));
n->Parse(*this, start, end, muldivindex);
return n.release();
}
}
else if(posnegindex == start)
{
// Positive/negative next, must be at start and check before exponent
if(m_tokens[posnegindex].GetType() == Token::TypePlus)
{
// Positive
return ParseRegion(posnegindex + 1, end);
}
else
{
aptr(Node<Value>) n(new NegateNode<Value>(m_expr));
n->Parse(*this, start, end, posnegindex);
return n.release();
}
}
else if(expindex != (size_type)-1)
{
// Exponent
aptr(Node<Value>) n(new ExponentNode<Value>(m_expr));
n->Parse(*this, start, end, expindex);
return n.release();
}
else if(posnegindex != (size_type)-1)
{
// Check pos/neg again. After testing for exponent, a pos/neg
// at plevel 0 is syntax error
SyntaxException e;
e.SetStart(m_tokens[posnegindex].GetStart());
e.SetEnd(m_tokens[posnegindex].GetEnd());
throw(e);
}
else if(fgopen == start)
{
// Group parenthesis, make sure something in between them
if(fgclose == end && fgclose > fgopen + 1)
{
return ParseRegion(fgopen + 1, fgclose - 1);
}
else
{
SyntaxException e;
e.SetStart(m_tokens[fgopen].GetStart());
if(fgclose == (size_type)-1)
e.SetEnd(m_tokens[fgopen].GetEnd());
else
e.SetEnd(m_tokens[fgclose].GetEnd());
throw(e);
}
}
else if(fgopen == start + 1)
{
// Function
if(fgclose == end)
{
// Find function list
FunctionList<Value> *flist = m_expr->GetFunctionList();
if(flist == 0)
{
NoFunctionListException e;
e.SetStart(m_tokens[start].GetStart());
e.SetEnd(m_tokens[start].GetEnd());
throw(e);
}
// Get name
string ident = m_tokens[start].GetIdentifier();
// Create function node
aptr(FunctionNode<Value>) n(flist->Create(ident, m_expr));
if(n.get())
{
n->Parse(*this, fgopen, fgclose);
}
else
{
NotFoundException e(ident);
e.SetStart(m_tokens[start].GetStart());
e.SetEnd(m_tokens[start].GetEnd());
throw(e);
}
return n.release();
}
else
{
SyntaxException e;
e.SetStart(m_tokens[fgopen].GetStart());
if(fgclose == (size_type)-1)
e.SetEnd(m_tokens[fgopen].GetEnd());
else
e.SetEnd(m_tokens[fgclose].GetEnd());
throw(e);
}
}
else if(start == end)
{
// Value, variable, or constant
if(m_tokens[start].GetType() == Token::TypeIdentifier)
{
// Variable/constant
aptr(Node<Value>) n(new VariableNode<Value>(m_expr));
n->Parse(*this, start, end);
return n.release();
}
else
{
// Value
aptr(Node<Value>) n(new ValueNode<Value>(m_expr));
n->Parse(*this, start, end);
return n.release();
}
}
else
{
// Unknown, syntax
SyntaxException e;
e.SetStart(m_tokens[pos].GetStart());
e.SetEnd(m_tokens[pos].GetEnd());
throw(e);
}
}
// Get a token
template<class Value>
const Token &Parser<Value>::operator[] (Parser::size_type pos) const
{
return m_tokens[pos];
}
// Build tokens
template<class Value>
void Parser<Value>::BuildTokens(const string &exstr)
{
m_tokens.clear();
// Test zero-length expression
if(exstr.length() == 0)
{
throw(EmptyExpressionException());
}
// Search through list
string::size_type pos;
bool comment = false;
for(pos = 0; pos < exstr.length(); pos++)
{
// Take action based on character
switch(exstr[pos])
{
// Comment
case '#':
{
comment = true;
break;
}
// Newline ends comment
case '\r':
case '\n':
{
comment = false;
break;
}
// Open parenthesis
case '(':
{
if(comment == false)
{
m_tokens.push_back(Token(Token::TypeOpenParenthesis, pos, pos));
}
break;
}
// Close parenthesis
case ')':
{
if(comment == false)
{
m_tokens.push_back(Token(Token::TypeCloseParenthesis, pos, pos));
}
break;
}
// Equal
case '=':
{
if(comment == false)
{
m_tokens.push_back(Token(Token::TypeEqual, pos, pos));
}
break;
}
// Plus
case '+':
{
if(comment == false)
{
m_tokens.push_back(Token(Token::TypePlus, pos, pos));
}
break;
}
// Hyphen
case '-':
{
if(comment == false)
{
m_tokens.push_back(Token(Token::TypeHyphen, pos, pos));
}
break;
}
// Asterisk
case '*':
{
if(comment == false)
{
m_tokens.push_back(Token(Token::TypeAsterisk, pos, pos));
}
break;
}
// Forward slash
case '/':
{
if(comment == false)
{
m_tokens.push_back(Token(Token::TypeForwardSlash, pos, pos));
}
break;
}
// Hat (exponent)
case '^':
{
if(comment == false)
{
m_tokens.push_back(Token(Token::TypeHat, pos, pos));
}
break;
}
// Ampersand
case '&':
{
if(comment == false)
{
m_tokens.push_back(Token(Token::TypeAmpersand, pos, pos));
}
break;
}
// Comma
case ',':
{
if(comment == false)
{
m_tokens.push_back(Token(Token::TypeComma, pos, pos));
}
break;
}
// Semicolon
case ';':
{
if(comment == false)
{
m_tokens.push_back(Token(Token::TypeSemicolon, pos, pos));
}
break;
}
// None of the above, but it may be an identifier or value
default:
{
if(comment == false)
{
// First, test for value
if(exstr[pos] == '.' || expreval_isdigit(exstr[pos]))
{
// We are a value
string::size_type start = pos;
// Digits before period
while(expreval_isdigit(exstr[pos]))
pos++;
// Period
if(exstr[pos] == '.')
pos++;
// Digits after period
while(expreval_isdigit(exstr[pos]) ||
toupper(exstr[pos]) == 'E' ||
((exstr[pos] == '+' || exstr[pos] == '-') &&
toupper(exstr[pos-1]) =='E'))
pos++;
// Create token
string ident = exstr.substr(start, pos - start);
m_tokens.push_back(Token(atof(ident.c_str()), start, pos - 1));
// Move pos back so pos++ will set it right
pos--;
}
else if(exstr[pos] == '_' || expreval_isalpha(exstr[pos]))
{
// We are an identifier
string::size_type start = pos;
bool foundname = true; // Found name part
// Search for name, then period, etc
// An identifier can be multiple parts. Each part
// is formed as an identifier and seperated by a period,
// An identifier can not end in a period
//
// color1.red : 1 identifier token
// color1. : color1 is identifier, . begins new token
// color1.1red : Not value (part 2 is not right)
while(foundname)
{
// Part before period
while(exstr[pos] == '_' || expreval_isalnum(exstr[pos]))
pos++;
// Is there a period
if(exstr[pos] == '.')
{
pos++;
// There is a period, look for the name again
if(exstr[pos] == '_' || expreval_isalpha(exstr[pos]))
{
foundname = true;
}
else
{
// No name after period
foundname = false;
// Remove period from identifier
pos--;
}
}
else
{
// No period after name, so no new name
foundname = false;
}
}
// Create token
m_tokens.push_back(Token(exstr.substr(start, pos - start), start, pos - 1));
// Move pos back so pos++ will set it right
pos--;
}
else if(expreval_isspace(exstr[pos]))
{
// Do nothing, just ignore white space, but it still
// seperates tokens
}
else
{
// Unknown token
UnknownTokenException e;
e.SetStart(pos);
e.SetEnd(pos);
throw(e);
}
}
break;
}
}
}
}
namespace ExprEval {
template class Parser<double>;
template class Parser<autodiff::var>;
}