kicad/common/libeval_compiler/libeval_compiler.cpp

841 lines
21 KiB
C++
Raw Normal View History

2020-06-04 11:04:03 +00:00
/*
This file is part of libeval, a simple math expression evaluator
Copyright (C) 2017 Michael Geselbracht, mgeselbracht3@gmail.com
Copyright (C) 2019-2020 KiCad Developers, see AUTHORS.txt for contributors.
2020-06-04 11:04:03 +00:00
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#include <set>
#include <vector>
#ifdef DEBUG
#include <stdarg.h>
#endif
#include <libeval_compiler/libeval_compiler.h>
2020-06-04 11:04:03 +00:00
/* The (generated) lemon parser is written in C.
* In order to keep its symbol from the global namespace include the parser code with
* a C++ namespace.
*/
namespace LIBEVAL
{
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#pragma GCC diagnostic ignored "-Wsign-compare"
#endif
#include "grammar.c"
#include "grammar.h"
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
static void libeval_dbg( int level, const char* fmt, ... )
2020-06-04 11:04:03 +00:00
{
#ifdef DEBUG
if(level < -10) // fixme: tom's debugging.
{
va_list ap;
va_start( ap, fmt );
fprintf( stderr, "libeval: " );
vfprintf( stderr, fmt, ap );
va_end( ap );
}
2020-06-04 11:04:03 +00:00
#endif
}
static const std::string formatOpName( int op )
{
static const struct
{
int op;
std::string mnemonic;
} simpleOps[] = { { TR_OP_MUL, "MUL" }, { TR_OP_DIV, "DIV" }, { TR_OP_ADD, "ADD" },
{ TR_OP_SUB, "SUB" }, { TR_OP_LESS, "LESS" }, { TR_OP_GREATER, "GREATER" },
{ TR_OP_LESS_EQUAL, "LESS_EQUAL" }, { TR_OP_GREATER_EQUAL, "GREATER_EQUAL" },
{ TR_OP_EQUAL, "EQUAL" }, { TR_OP_NOT_EQUAL, "NEQUAL" }, { TR_OP_BOOL_AND, "AND" },
{ TR_OP_BOOL_OR, "OR" }, { TR_OP_BOOL_NOT, "NOT" }, { -1, "" } };
for( int i = 0; simpleOps[i].op >= 0; i++ )
{
if( simpleOps[i].op == op )
{
return simpleOps[i].mnemonic;
}
}
return "???";
}
std::string UOP::Format() const
2020-06-04 11:04:03 +00:00
{
char str[1024];
switch( m_op )
{
case TR_UOP_PUSH_VAR:
sprintf( str, "PUSH VAR [%p]", m_arg );
break;
case TR_UOP_PUSH_VALUE:
{
auto val = reinterpret_cast<VALUE*>( m_arg );
if( val->GetType() == VT_NUMERIC )
sprintf( str, "PUSH NUM [%.10f]", val->AsDouble() );
else
sprintf( str, "PUSH STR [%s]", val->AsString().c_str() );
break;
}
case TR_OP_METHOD_CALL:
sprintf(str, "MCALL" );
break;
case TR_OP_FUNC_CALL:
sprintf(str, "FCALL" );
break;
2020-06-04 11:04:03 +00:00
default:
sprintf( str, "%s %d", formatOpName( m_op ).c_str(), m_op );
2020-06-04 11:04:03 +00:00
break;
}
return str;
}
std::string UCODE::Dump() const
{
std::string rv;
for( auto op : m_ucode )
{
rv += op->Format();
rv += "\n";
}
return rv;
};
std::string TOKENIZER::GetChars( std::function<bool( int )> cond ) const
{
std::string rv;
size_t p = m_pos;
// printf("p %d len %d\n", p, str.length() );
while( p < m_str.length() && cond( m_str[p] ) )
{
rv.append( 1, m_str[p] );
p++;
}
return rv;
}
bool TOKENIZER::MatchAhead( std::string match, std::function<bool( int )> stopCond ) const
{
int remaining = m_str.length() - m_pos;
if( remaining < (int) match.length() )
return false;
if( m_str.substr( m_pos, match.length() ) == match )
{
return ( remaining == (int) match.length() || stopCond( m_str[m_pos + match.length()] ) );
}
return false;
}
COMPILER::COMPILER()
{
m_errorStatus.pendingError = false;
2020-06-04 11:04:03 +00:00
m_localeDecimalSeparator = '.';
m_parseFinished = false;
m_unitResolver.reset( new UNIT_RESOLVER );
m_parser = LIBEVAL::ParseAlloc( malloc );
}
COMPILER::~COMPILER()
{
LIBEVAL::ParseFree( m_parser, free );
// Allow explicit call to destructor
m_parser = nullptr;
Clear();
}
void COMPILER::Clear()
{
//free( current.token );
m_tokenizer.Clear();
2020-06-04 11:04:03 +00:00
}
void COMPILER::parseError( const char* s )
{
libeval_dbg(0, "PARSE ERROR: %s\n", s );
m_errorStatus.pendingError = true;
m_errorStatus.message = s;
2020-06-04 11:04:03 +00:00
}
void COMPILER::parseOk()
{
m_parseFinished = true;
}
bool COMPILER::Compile( const std::string& aString, UCODE* aCode )
{
// Feed parser token after token until end of input.
2020-06-04 11:04:03 +00:00
newString( aString );
m_errorStatus.pendingError = false;
2020-06-04 11:04:03 +00:00
m_tree = nullptr;
m_parseFinished = false;
T_TOKEN tok;
libeval_dbg(0, "str: '%s' empty: %d\n", aString.c_str(), !!aString.empty() );
2020-06-04 11:04:03 +00:00
if( aString.empty() )
{
m_parseFinished = true;
return generateUCode( aCode );
}
do
{
tok = getToken();
libeval_dbg(10, "parse: tok %d\n", tok.token );
2020-06-04 11:04:03 +00:00
Parse( m_parser, tok.token, tok.value, this );
if( m_errorStatus.pendingError )
2020-06-04 11:04:03 +00:00
{
m_errorStatus.stage = ERROR_STATUS::CST_PARSE;
m_errorStatus.failingPosition = m_tokenizer.GetPos();
m_errorStatus.failingObject = tok.value.value.str;
m_errorStatus.message = "Parse error";
2020-06-04 11:04:03 +00:00
return false;
}
if( m_parseFinished || tok.token == G_ENDS )
{
// Reset parser by passing zero as token ID, value is ignored.
Parse( m_parser, 0, tok.value, this );
break;
}
} while( tok.token );
return generateUCode( aCode );
}
void COMPILER::newString( const std::string& aString )
{
Clear();
m_lexerState = LS_DEFAULT;
m_tokenizer.Restart( aString );
m_parseFinished = false;
}
COMPILER::T_TOKEN COMPILER::getToken()
{
T_TOKEN rv;
bool done = false;
do
{
switch( m_lexerState )
{
case LS_DEFAULT:
done = lexDefault( rv );
break;
case LS_STRING:
done = lexString( rv );
break;
}
//printf( "-> lstate %d done %d\n", m_lexerState, !!done );
} while( !done );
return rv;
}
bool COMPILER::lexString( COMPILER::T_TOKEN& aToken )
{
auto str = m_tokenizer.GetChars( []( int c ) -> bool { return c != '"'; } );
//printf("STR LIT '%s'\n", (const char *)str.c_str() );
aToken.token = G_STRING;
2020-07-18 10:40:56 +00:00
snprintf( aToken.value.value.str, LIBEVAL_MAX_LITERAL_LENGTH, "%s", str.c_str() );
2020-06-04 11:04:03 +00:00
m_tokenizer.NextChar( str.length() + 1 );
m_lexerState = LS_DEFAULT;
return true;
}
int COMPILER::resolveUnits()
{
int unitId = 0;
for( auto unitName : m_unitResolver->GetSupportedUnits() )
{
if( m_tokenizer.MatchAhead( unitName, []( int c ) -> bool { return !isalnum( c ); } ) )
{
libeval_dbg(10, "Match unit '%s'\n", unitName.c_str() );
2020-06-04 11:04:03 +00:00
m_tokenizer.NextChar( unitName.length() );
return unitId;
}
unitId++;
}
return -1;
}
bool COMPILER::lexDefault( COMPILER::T_TOKEN& aToken )
{
T_TOKEN retval;
std::string current;
size_t idx;
int convertFrom;
retval.token = G_ENDS;
//printf( "tokdone %d\n", !!m_tokenizer.Done() );
if( m_tokenizer.Done() )
{
aToken = retval;
return true;
}
auto isDecimalSeparator = [&]( char ch ) -> bool {
return ( ch == m_localeDecimalSeparator || ch == '.' || ch == ',' );
};
// Lambda: get value as string, store into clToken.token and update current index.
auto extractNumber = [&]() {
bool haveSeparator = false;
idx = 0;
auto ch = m_tokenizer.GetChar();
do
{
if( isDecimalSeparator( ch ) && haveSeparator )
break;
current.append( 1, ch );
if( isDecimalSeparator( ch ) )
haveSeparator = true;
m_tokenizer.NextChar();
ch = m_tokenizer.GetChar();
} while( isdigit( ch ) || isDecimalSeparator( ch ) );
// Ensure that the systems decimal separator is used
for( int i = current.length(); i; i-- )
if( isDecimalSeparator( current[i - 1] ) )
current[i - 1] = m_localeDecimalSeparator;
//printf("-> NUM: '%s'\n", (const char *) current.c_str() );
};
int ch;
// Start processing of first/next token: Remove whitespace
for( ;; )
{
ch = m_tokenizer.GetChar();
if( ch == ' ' )
m_tokenizer.NextChar();
else
break;
}
libeval_dbg(10, "LEX ch '%c' pos %d\n", ch, m_tokenizer.GetPos() );
2020-06-04 11:04:03 +00:00
if( ch == 0 )
{
/* End of input */
}
else if( isdigit( ch ) )
{
// VALUE
extractNumber();
retval.token = G_VALUE;
2020-07-18 10:40:56 +00:00
snprintf( retval.value.value.str, LIBEVAL_MAX_LITERAL_LENGTH, "%s", current.c_str() );
2020-06-04 11:04:03 +00:00
}
else if( ( convertFrom = resolveUnits() ) >= 0 )
{
//printf("unit\n");
// UNIT
// Units are appended to a VALUE.
// Determine factor to default unit if unit for value is given.
// Example: Default is mm, unit is inch: factor is 25.4
// The factor is assigned to the terminal UNIT. The actual
// conversion is done within a parser action.
retval.token = G_UNIT;
retval.value.value.type = convertFrom;
}
else if( ch == '\"' ) // string literal
{
//printf( "MATCH STRING LITERAL\n" );
m_lexerState = LS_STRING;
m_tokenizer.NextChar();
return false;
}
else if( isalpha( ch ) )
{
//printf("ALPHA\n");
current = m_tokenizer.GetChars( []( int c ) -> bool { return isalnum( c ); } );
//printf("Len: %d\n", current.length() );
//printf("id '%s'\n", (const char *) current.c_str() );
fflush( stdout );
retval.token = G_IDENTIFIER;
2020-07-18 10:40:56 +00:00
snprintf( retval.value.value.str, LIBEVAL_MAX_LITERAL_LENGTH, "%s", current.c_str() );
2020-06-04 11:04:03 +00:00
m_tokenizer.NextChar( current.length() );
}
else if( m_tokenizer.MatchAhead( "==", []( int c ) -> bool { return c != '='; } ) )
{
retval.token = G_EQUAL;
m_tokenizer.NextChar( 2 );
}
else if( m_tokenizer.MatchAhead( "!=", []( int c ) -> bool { return c != '='; } ) )
{
retval.token = G_NOT_EQUAL;
2020-06-04 11:04:03 +00:00
m_tokenizer.NextChar( 2 );
}
else if( m_tokenizer.MatchAhead( "<=", []( int c ) -> bool { return c != '='; } ) )
{
retval.token = G_LESS_EQUAL_THAN;
m_tokenizer.NextChar( 2 );
}
else if( m_tokenizer.MatchAhead( ">=", []( int c ) -> bool { return c != '='; } ) )
{
retval.token = G_GREATER_EQUAL_THAN;
m_tokenizer.NextChar( 2 );
}
else if( m_tokenizer.MatchAhead( "&&", []( int c ) -> bool { return c != '&'; } ) )
{
retval.token = G_BOOL_AND;
m_tokenizer.NextChar( 2 );
}
else if( m_tokenizer.MatchAhead( "||", []( int c ) -> bool { return c != '|'; } ) )
{
retval.token = G_BOOL_OR;
m_tokenizer.NextChar( 2 );
}
else
{
//printf( "WTF: '%c'\n", ch );
// Single char tokens
switch( ch )
{
case '+':
retval.token = G_PLUS;
break;
case '!':
retval.token = G_BOOL_NOT;
break;
case '-':
retval.token = G_MINUS;
break;
case '*':
retval.token = G_MULT;
break;
case '/':
retval.token = G_DIVIDE;
break;
case '<':
retval.token = G_LESS_THAN;
break;
case '>':
retval.token = G_GREATER_THAN;
break;
case '(':
retval.token = G_PARENL;
break;
case ')':
retval.token = G_PARENR;
break;
case ';':
retval.token = G_SEMCOL;
break;
case '.':
retval.token = G_STRUCT_REF;
break;
default:
m_errorStatus.stage = ERROR_STATUS::CST_PARSE;
m_errorStatus.failingPosition = m_tokenizer.GetPos();
m_errorStatus.failingObject = ch;
m_errorStatus.message = "Syntax error";
2020-06-04 11:04:03 +00:00
break; /* invalid character */
}
m_tokenizer.NextChar();
}
aToken = retval;
return true;
}
const std::string formatNode( TREE_NODE* tok )
{
char str[1024];
// printf("fmt tok %p v %p ", tok, tok->value.v );
fflush( stdout );
sprintf( str, "%s", (const char*) tok->value.str );
return str;
}
void dumpNode( std::string& buf, TREE_NODE* tok, int depth = 0 )
{
char str[1024];
sprintf( str, "\n[%p L0:%-20p L1:%-20p] ", tok, tok->leaf[0], tok->leaf[1] ); //[tok %p] ", tok);
2020-06-04 11:04:03 +00:00
buf += str;
for( int i = 0; i < 2 * depth; i++ )
buf += " ";
if( tok->op & TR_OP_BINARY_MASK )
{
buf += formatOpName( tok->op );
2020-06-04 11:04:03 +00:00
dumpNode( buf, tok->leaf[0], depth + 1 );
dumpNode( buf, tok->leaf[1], depth + 1 );
}
switch( tok->op )
{
case TR_NUMBER:
buf += "NUMERIC: ";
buf += formatNode( tok );
2020-06-04 11:04:03 +00:00
if( tok->leaf[0] )
dumpNode( buf, tok->leaf[0], depth + 1 );
2020-06-04 11:04:03 +00:00
break;
case TR_STRING:
buf += "STRING: ";
buf += formatNode( tok );
2020-06-04 11:04:03 +00:00
break;
case TR_IDENTIFIER:
buf += "ID: ";
buf += formatNode( tok );
2020-06-04 11:04:03 +00:00
break;
case TR_STRUCT_REF:
buf += "SREF: ";
2020-06-04 11:04:03 +00:00
dumpNode( buf, tok->leaf[0], depth + 1 );
dumpNode( buf, tok->leaf[1], depth + 1 );
break;
case TR_OP_FUNC_CALL:
buf += "CALL '";
buf += tok->leaf[0]->value.str;
buf += "': ";
dumpNode( buf, tok->leaf[1], depth + 1 );
break;
2020-06-04 11:04:03 +00:00
case TR_UNIT:
sprintf( str, "UNIT: %d ", tok->value.type );
buf += str;
break;
}
}
ERROR_STATUS COMPILER::GetErrorStatus()
{
ERROR_STATUS dummy;
return dummy;
}
void COMPILER::ReportError( const std::string aErrorMsg )
{
m_errorStatus.pendingError = true;
m_errorStatus.message = aErrorMsg;
}
2020-06-04 11:04:03 +00:00
void COMPILER::setRoot( TREE_NODE root )
{
m_tree = copyNode( root );
}
bool COMPILER::generateUCode( UCODE* aCode )
{
std::vector<TREE_NODE*> stack;
std::set<TREE_NODE*> visitedNodes;
auto visited = [&]( TREE_NODE* node ) -> bool {
return visitedNodes.find( node ) != visitedNodes.end();
};
assert( m_tree );
stack.push_back( m_tree );
std::string dump;
dumpNode( dump, m_tree, 0 );
libeval_dbg(3,"Tree dump:\n%s\n\n", dump.c_str() );
2020-06-04 11:04:03 +00:00
while( !stack.empty() )
{
auto node = stack.back();
libeval_dbg( 4, "process node %p [op %d] [stack %d]\n", node, node->op, stack.size() );
2020-06-04 11:04:03 +00:00
// process terminal nodes first
switch( node->op )
{
case TR_OP_FUNC_CALL:
break;
2020-06-04 11:04:03 +00:00
case TR_STRUCT_REF:
{
assert( node->leaf[0]->op == TR_IDENTIFIER );
//assert( node->leaf[1]->op == TR_IDENTIFIER );
switch( node->leaf[1]->op )
{
case TR_IDENTIFIER:
{
auto vref = aCode->createVarRef( this, node->leaf[0]->value.str, node->leaf[1]->value.str );
if( m_errorStatus.pendingError )
{
libeval_dbg(4, "varref fail\n");
return false;
}
node->uop = makeUop( TR_UOP_PUSH_VAR, vref );
node->isTerminal = true;
break;
}
case TR_OP_FUNC_CALL:
{
libeval_dbg(4, "got a method call... [%s], this = %s\n", node->leaf[1]->leaf[0]->value.str, node->leaf[0]->value.str);
auto vref = aCode->createVarRef( this, node->leaf[0]->value.str, "");
auto func = aCode->createFuncCall( this, node->leaf[1]->leaf[0]->value.str );
if(!func)
{
m_errorStatus.pendingError = true;
m_errorStatus.stage = ERROR_STATUS::CST_CODEGEN;
libeval_dbg(0, "unable to resolve func %s\n", node->leaf[1]->leaf[0]->value.str );
return false;
// fixme: generate a message
}
// printf("cfc4 test\n");
// func(nullptr, nullptr, nullptr);
visitedNodes.insert( node->leaf[0] );
visitedNodes.insert( node->leaf[1]->leaf[0] );
node->uop = makeUop( TR_OP_METHOD_CALL, func, vref );
node->isTerminal = false;
break;
}
}
2020-06-04 11:04:03 +00:00
break;
}
case TR_NUMBER:
{
auto son = node->leaf[0];
double value = atof( node->value.str ); // fixme: locale
if( son && son->op == TR_UNIT )
{
//printf( "HandleUnit: %s unit %d\n", node->value.str, son->value.type );
value = m_unitResolver->Convert( node->value.str, son->value.type );
visitedNodes.insert( son );
}
node->uop = makeUop( TR_UOP_PUSH_VALUE, value );
node->isTerminal = true;
2020-06-04 11:04:03 +00:00
break;
}
case TR_STRING:
{
node->uop = makeUop( TR_UOP_PUSH_VALUE, node->value.str );
node->isTerminal = true;
2020-06-04 11:04:03 +00:00
break;
}
default:
node->uop = makeUop( node->op );
2020-06-04 11:04:03 +00:00
break;
}
if( !node->isTerminal && node->leaf[0] && !visited( node->leaf[0] ) )
2020-06-04 11:04:03 +00:00
{
stack.push_back( node->leaf[0] );
visitedNodes.insert( node->leaf[0] );
continue;
2020-06-04 11:04:03 +00:00
}
else if( !node->isTerminal && node->leaf[1] && !visited( node->leaf[1] ) )
2020-06-04 11:04:03 +00:00
{
stack.push_back( node->leaf[1] );
visitedNodes.insert( node->leaf[1] );
continue;
2020-06-04 11:04:03 +00:00
}
visitedNodes.insert( node );
if(node->uop)
aCode->AddOp(node->uop);
stack.pop_back();
2020-06-04 11:04:03 +00:00
}
libeval_dbg(2,"DUMp: \n%s\n", aCode->Dump().c_str() );
2020-06-04 11:04:03 +00:00
return true;
}
void UOP::Exec( UCODE::CONTEXT* ctx, UCODE* ucode )
2020-06-04 11:04:03 +00:00
{
switch( m_op )
{
case TR_UOP_PUSH_VAR:
{
auto value = ctx->AllocValue();
value->Set( reinterpret_cast<VAR_REF*>( m_arg )->GetValue( ucode ) );
ctx->Push( value );
break;
}
2020-06-04 11:04:03 +00:00
case TR_UOP_PUSH_VALUE:
ctx->Push( reinterpret_cast<VALUE*>( m_arg ) );
return;
case TR_OP_METHOD_CALL:
//printf("CALL METHOD %s\n" );
m_func( ucode, ctx, m_arg );
return;
2020-06-04 11:04:03 +00:00
default:
break;
}
if( m_op & TR_OP_BINARY_MASK )
{
auto arg2 = ctx->Pop();
auto arg1 = ctx->Pop();
double result;
switch( m_op )
{
case TR_OP_ADD:
result = arg1->AsDouble() + arg2->AsDouble();
break;
case TR_OP_SUB:
result = arg1->AsDouble() - arg2->AsDouble();
break;
case TR_OP_MUL:
result = arg1->AsDouble() * arg2->AsDouble();
break;
case TR_OP_DIV:
result = arg1->AsDouble() / arg2->AsDouble();
break;
case TR_OP_LESS_EQUAL:
result = arg1->AsDouble() <= arg2->AsDouble() ? 1 : 0;
break;
case TR_OP_GREATER_EQUAL:
result = arg1->AsDouble() >= arg2->AsDouble() ? 1 : 0;
break;
case TR_OP_LESS:
result = arg1->AsDouble() < arg2->AsDouble() ? 1 : 0;
break;
case TR_OP_GREATER:
result = arg1->AsDouble() > arg2->AsDouble() ? 1 : 0;
break;
case TR_OP_EQUAL:
result = arg1->EqualTo( arg2 ) ? 1 : 0;
break;
case TR_OP_NOT_EQUAL:
result = arg1->EqualTo( arg2 ) ? 0 : 1;
break;
case TR_OP_BOOL_AND:
result = ( ( arg1->AsDouble() != 0.0 ? true : false )
&& ( arg2->AsDouble() != 0.0 ? true : false ) ) ?
1 :
0;
break;
case TR_OP_BOOL_OR:
result = ( ( arg1->AsDouble() != 0.0 ? true : false )
|| ( arg2->AsDouble() != 0.0 ? true : false ) ) ?
1 :
0;
break;
default:
result = 0.0;
break;
}
auto rp = ctx->AllocValue();
rp->Set( result );
ctx->Push( rp );
return;
}
else if( m_op & TR_OP_UNARY_MASK )
{
// fixme : not operator
}
}
VALUE* UCODE::Run()
{
CONTEXT ctx;
for( const auto op : m_ucode )
op->Exec( &ctx, this );
assert( ctx.SP() == 1 );
return ctx.Pop();
}
void UCODE::RuntimeError( const std::string aErrorMsg )
{
}
std::string ERROR_STATUS::Format() const
{
if( !pendingError )
return "";
char str[1024];
sprintf(str,"%s (pos: %d, near: '%s')", message.c_str(), failingPosition, failingObject.c_str() );
return str;
}
2020-06-04 11:04:03 +00:00
} // namespace LIBEVAL