kicad/common/dsnlexer.cpp

742 lines
20 KiB
C++
Raw Normal View History

/*
* This program source code file is part of KiCad, a free EDA CAD application.
*
* Copyright (C) 2007-2013 SoftPLC Corporation, Dick Hollenbeck <dick@softplc.com>
* Copyright (C) 2007 KiCad Developers, see change_log.txt for contributors.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you may find one here:
* http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
* or you may search the http://www.gnu.org website for the version 2 license,
* or you may write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include <cstdarg>
#include <cstdio>
#include <cstdlib> // bsearch()
#include <cctype>
#include <macros.h>
#include <fctsys.h>
#include <dsnlexer.h>
//#include "fctsys.h"
//#include "pcbnew.h"
//#define STANDALONE 1 // enable this for stand alone testing.
static int compare( const void* a1, const void* a2 )
{
const KEYWORD* k1 = (const KEYWORD*) a1;
const KEYWORD* k2 = (const KEYWORD*) a2;
int ret = strcmp( k1->name, k2->name );
return ret;
}
//-----<DSNLEXER>-------------------------------------------------------------
void DSNLEXER::init()
{
curTok = DSN_NONE;
prevTok = DSN_NONE;
stringDelimiter = '"';
specctraMode = false;
space_in_quoted_tokens = false;
commentsAreTokens = false;
}
DSNLEXER::DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount,
FILE* aFile, const wxString& aFilename ) :
iOwnReaders( true ),
keywords( aKeywordTable ),
keywordCount( aKeywordCount )
{
FILE_LINE_READER* fileReader = new FILE_LINE_READER( aFile, aFilename );
PushReader( fileReader );
init();
}
DSNLEXER::DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount,
const std::string& aClipboardTxt, const wxString& aSource ) :
iOwnReaders( true ),
keywords( aKeywordTable ),
keywordCount( aKeywordCount )
{
STRING_LINE_READER* stringReader = new STRING_LINE_READER( aClipboardTxt, aSource.IsEmpty() ?
wxString( _( "clipboard" ) ) : aSource );
PushReader( stringReader );
init();
}
DSNLEXER::DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount,
LINE_READER* aLineReader ) :
iOwnReaders( false ),
keywords( aKeywordTable ),
keywordCount( aKeywordCount )
{
if( aLineReader )
PushReader( aLineReader );
init();
}
DSNLEXER::~DSNLEXER()
{
if( iOwnReaders )
{
// delete the LINE_READERs from the stack, since I own them.
for( READER_STACK::iterator it = readerStack.begin(); it!=readerStack.end(); ++it )
delete *it;
}
}
void DSNLEXER::SetSpecctraMode( bool aMode )
{
specctraMode = aMode;
if( aMode )
{
// specctra mode defaults, some of which can still be changed in this mode.
space_in_quoted_tokens = true;
}
else
{
space_in_quoted_tokens = false;
stringDelimiter = '"';
}
}
void DSNLEXER::PushReader( LINE_READER* aLineReader )
{
readerStack.push_back( aLineReader );
reader = aLineReader;
2011-01-12 04:51:29 +00:00
start = (const char*) (*reader);
// force a new readLine() as first thing.
limit = start;
next = start;
}
LINE_READER* DSNLEXER::PopReader()
{
LINE_READER* ret = 0;
if( readerStack.size() )
{
ret = reader;
readerStack.pop_back();
if( readerStack.size() )
{
reader = readerStack.back();
start = reader->Line();
// force a new readLine() as first thing.
limit = start;
next = start;
}
else
{
reader = 0;
start = dummy;
limit = dummy;
limit = dummy;
}
}
return ret;
}
int DSNLEXER::findToken( const std::string& tok )
{
// convert to lower case once, this should be faster than using strcasecmp()
// for each test in compare().
lowercase.clear();
for( std::string::const_iterator iter = tok.begin(); iter!=tok.end(); ++iter )
lowercase += (char) tolower( *iter );
KEYWORD search;
search.name = lowercase.c_str();
// a boost hashtable might be a few percent faster, depending on
// hashtable size and quality of the hash function.
const KEYWORD* findings = (const KEYWORD*) bsearch( &search,
keywords, keywordCount,
sizeof(KEYWORD), compare );
if( findings )
return findings->token;
else
return -1;
}
const char* DSNLEXER::Syntax( int aTok )
{
const char* ret;
switch( aTok )
{
case DSN_NONE:
ret = "NONE";
break;
2009-12-11 15:51:26 +00:00
case DSN_STRING_QUOTE:
ret = "string_quote"; // a special DSN syntax token, see specctra spec.
break;
case DSN_QUOTE_DEF:
ret = "quoted text delimiter";
break;
case DSN_DASH:
ret = "-";
break;
case DSN_SYMBOL:
ret = "symbol";
break;
case DSN_NUMBER:
ret = "number";
break;
case DSN_RIGHT:
ret = ")";
break;
case DSN_LEFT:
ret = "(";
break;
case DSN_STRING:
ret = "quoted string";
break;
case DSN_EOF:
ret = "end of input";
break;
default:
ret = "???";
}
return ret;
}
const char* DSNLEXER::GetTokenText( int aTok )
{
const char* ret;
if( aTok < 0 )
{
return Syntax( aTok );
}
else if( (unsigned) aTok < keywordCount )
{
ret = keywords[aTok].name;
}
else
ret = "token too big";
return ret;
}
wxString DSNLEXER::GetTokenString( int aTok )
{
wxString ret;
ret << wxT("'") << wxString::FromUTF8( GetTokenText(aTok) ) << wxT("'");
return ret;
}
bool DSNLEXER::IsSymbol( int aTok )
{
// This is static and not inline to reduce code space.
// if aTok is >= 0, then it is a coincidental match to a keyword.
return aTok==DSN_SYMBOL
|| aTok==DSN_STRING
|| aTok>=0
;
}
void DSNLEXER::Expecting( int aTok ) throw( IO_ERROR )
{
wxString errText;
errText.Printf( _("Expecting '%s'"), GetChars( GetTokenString( aTok ) ) );
THROW_PARSE_ERROR( errText, CurSource(), CurLine(), CurLineNumber(), CurOffset() );
}
void DSNLEXER::Expecting( const char* text ) throw( IO_ERROR )
{
wxString errText;
errText.Printf( _("Expecting '%s'"),
GetChars( wxString::FromUTF8( text ) ) );
THROW_PARSE_ERROR( errText, CurSource(), CurLine(), CurLineNumber(), CurOffset() );
}
void DSNLEXER::Unexpected( int aTok ) throw( IO_ERROR )
{
wxString errText;
errText.Printf( _("Unexpected '%s'"), GetChars( GetTokenString( aTok ) ) );
THROW_PARSE_ERROR( errText, CurSource(), CurLine(), CurLineNumber(), CurOffset() );
}
void DSNLEXER::Duplicate( int aTok ) throw( IO_ERROR )
{
2012-12-08 23:58:03 +00:00
wxString errText = wxString::Format(
_("%s is a duplicate"), GetTokenString( aTok ).GetData() );
THROW_PARSE_ERROR( errText, CurSource(), CurLine(), CurLineNumber(), CurOffset() );
}
void DSNLEXER::Unexpected( const char* text ) throw( IO_ERROR )
{
wxString errText;
errText.Printf( _("Unexpected '%s'"),
GetChars( wxString::FromUTF8( text ) ) );
THROW_PARSE_ERROR( errText, CurSource(), CurLine(), CurLineNumber(), CurOffset() );
}
void DSNLEXER::NeedLEFT() throw( IO_ERROR )
{
int tok = NextTok();
if( tok != DSN_LEFT )
Expecting( DSN_LEFT );
}
void DSNLEXER::NeedRIGHT() throw( IO_ERROR )
{
int tok = NextTok();
if( tok != DSN_RIGHT )
Expecting( DSN_RIGHT );
}
int DSNLEXER::NeedSYMBOL() throw( IO_ERROR )
{
int tok = NextTok();
if( !IsSymbol( tok ) )
Expecting( DSN_SYMBOL );
return tok;
}
int DSNLEXER::NeedSYMBOLorNUMBER() throw( IO_ERROR )
{
int tok = NextTok();
if( !IsSymbol( tok ) && tok!=DSN_NUMBER )
Expecting( "symbol|number" );
return tok;
}
2011-02-14 06:39:51 +00:00
int DSNLEXER::NeedNUMBER( const char* aExpectation ) throw( IO_ERROR )
{
int tok = NextTok();
if( tok != DSN_NUMBER )
{
2012-12-08 23:58:03 +00:00
wxString errText = wxString::Format(
_("need a NUMBER for '%s'"), wxString::FromUTF8( aExpectation ).GetData() );
2011-02-14 06:39:51 +00:00
THROW_PARSE_ERROR( errText, CurSource(), CurLine(), CurLineNumber(), CurOffset() );
}
return tok;
}
/**
* Function isSpace
* tests for whitespace. Our whitespace, by our definition, is a subset of ASCII,
* i.e. no bytes with MSB on can be considered whitespace, since they are likely part
* of a multibyte UTF8 character.
*/
2013-06-03 13:09:37 +00:00
static bool isSpace( char cc )
{
2013-06-03 13:09:37 +00:00
// cc is signed, so it is often negative.
// Treat negative as large positive to exclude rapidly.
2013-06-03 13:09:37 +00:00
if( (unsigned char) cc <= ' ' )
{
switch( cc )
{
case ' ':
case '\n':
case '\r':
case '\t':
case '\0': // PCAD s-expression files have this.
return true;
}
}
return false;
}
2013-06-03 13:09:37 +00:00
inline bool isDigit( char cc )
{
return '0' <= cc && cc <= '9';
}
/// return true if @a cc is an s-expression separator character
inline bool isSep( char cc )
{
return isSpace( cc ) || cc=='(' || cc==')';
}
/**
* Function isNumber
2013-06-03 13:09:37 +00:00
* returns true if the next sequence of text is a number:
* either an integer, fixed point, or float with exponent. Stops scanning
* at the first non-number character, even if it is not whitespace.
*
* @param cp is the start of the current token.
* @param limit is the end of the current line of text.
2013-06-04 14:44:08 +00:00
* @return const char* - if initial text was a number, then pointer to the text
* after this number, else NULL if initial text was not a number.
*/
2013-06-03 13:09:37 +00:00
static const char* isNumber( const char* cp, const char* limit )
{
2013-06-03 13:09:37 +00:00
// regex for a float: "^[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?" i.e. any number,
// code traversal manually here:
bool sawNumber = false;
2013-06-03 13:09:37 +00:00
if( cp < limit && ( *cp=='-' || *cp=='+' ) )
++cp;
2013-06-03 13:09:37 +00:00
while( cp < limit && isDigit( *cp ) )
{
++cp;
sawNumber = true;
}
if( cp < limit && *cp == '.' )
{
++cp;
2013-06-03 13:09:37 +00:00
while( cp < limit && isDigit( *cp ) )
{
++cp;
sawNumber = true;
}
}
if( sawNumber )
{
2013-06-03 13:09:37 +00:00
if( cp < limit && ( *cp=='E' || *cp=='e' ) )
{
++cp;
sawNumber = false; // exponent mandates at least one digit thereafter.
2013-06-03 13:09:37 +00:00
if( cp < limit && ( *cp=='-' || *cp=='+' ) )
++cp;
2013-06-03 13:09:37 +00:00
while( cp < limit && isDigit( *cp ) )
{
++cp;
sawNumber = true;
}
}
}
2013-06-03 13:09:37 +00:00
return sawNumber ? cp : NULL;
}
int DSNLEXER::NextTok() throw( IO_ERROR )
{
2011-01-12 04:51:29 +00:00
const char* cur = next;
const char* head = cur;
prevTok = curTok;
if( curTok != DSN_EOF )
{
if( cur >= limit )
{
L_read:
2009-12-11 15:51:26 +00:00
// blank lines are returned as "\n" and will have a len of 1.
// EOF will have a len of 0 and so is detectable.
int len = readLine();
if( len == 0 )
{
2011-01-05 21:21:32 +00:00
cur = start; // after readLine(), since start can change, set cur offset to start
curTok = DSN_EOF;
goto exit;
}
2011-01-05 21:21:32 +00:00
cur = start; // after readLine() since start can change.
// skip leading whitespace
2013-06-03 13:09:37 +00:00
while( cur<limit && isSpace( *cur ) )
++cur;
// If the first non-blank character is #, this line is a comment.
// Comments cannot follow any other token on the same line.
if( cur<limit && *cur=='#' )
{
if( commentsAreTokens )
{
// save the entire line, including new line as the current token.
// the '#' character may not be at offset zero.
curText = start; // entire line is the token
cur = start; // ensure a good curOffset below
curTok = DSN_COMMENT;
head = limit; // do a readLine() on next call in here.
goto exit;
}
else
goto L_read;
}
}
else
{
// skip leading whitespace
2013-06-03 13:09:37 +00:00
while( cur<limit && isSpace( *cur ) )
++cur;
}
if( cur >= limit )
goto L_read;
if( *cur == '(' )
{
curText = *cur;
curTok = DSN_LEFT;
head = cur+1;
goto exit;
}
if( *cur == ')' )
{
curText = *cur;
curTok = DSN_RIGHT;
head = cur+1;
goto exit;
}
// switching the string_quote character
if( prevTok == DSN_STRING_QUOTE )
{
static const wxString errtxt( _("String delimiter must be a single character of ', \", or $"));
char cc = *cur;
switch( cc )
{
case '\'':
case '$':
case '"':
break;
default:
THROW_PARSE_ERROR( errtxt, CurSource(), CurLine(), CurLineNumber(), CurOffset() );
}
2009-12-11 15:51:26 +00:00
curText = cc;
head = cur+1;
2013-06-03 13:09:37 +00:00
if( head<limit && !isSep( *head ) )
{
THROW_PARSE_ERROR( errtxt, CurSource(), CurLine(), CurLineNumber(), CurOffset() );
}
curTok = DSN_QUOTE_DEF;
goto exit;
}
/* get the dash out of a <pin_reference> which is embedded for example
like: U2-14 or "U2"-"14"
This is detectable by a non-space immediately preceeding the dash.
*/
if( *cur == '-' && cur>start && !isSpace( cur[-1] ) )
{
2009-12-11 15:51:26 +00:00
curText = '-';
curTok = DSN_DASH;
2009-12-11 15:51:26 +00:00
head = cur+1;
goto exit;
}
2013-06-03 13:09:37 +00:00
if( ( head = isNumber( cur, limit ) ) != NULL &&
// token can only be a number if trailing text is a separator or line end
( head==limit || isSep( *head ) ) )
{
curText.clear();
curText.append( cur, head );
curTok = DSN_NUMBER;
goto exit;
}
// a quoted string, will return DSN_STRING
if( *cur == stringDelimiter )
{
// Non-specctraMode, understands and deciphers escaped \, \r, \n, and \".
// Strips off leading and trailing double quotes
if( !specctraMode )
{
// copy the token, character by character so we can remove doubled up quotes.
curText.clear();
++cur; // skip over the leading delimiter, which is always " in non-specctraMode
head = cur;
while( head<limit )
{
// ESCAPE SEQUENCES:
if( *head =='\\' )
{
char tbuf[8];
char c;
int i;
if( ++head >= limit )
break; // throw exception at L_unterminated
switch( *head++ )
{
case '"':
case '\\': c = head[-1]; break;
case 'a': c = '\x07'; break;
case 'b': c = '\x08'; break;
case 'f': c = '\x0c'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\x09'; break;
case 'v': c = '\x0b'; break;
case 'x': // 1 or 2 byte hex escape sequence
for( i=0; i<2; ++i )
{
if( !isxdigit( head[i] ) )
break;
tbuf[i] = head[i];
}
tbuf[i] = '\0';
if( i > 0 )
c = (char) strtoul( tbuf, NULL, 16 );
else
c = 'x'; // a goofed hex escape sequence, interpret as 'x'
head += i;
break;
default: // 1-3 byte octal escape sequence
--head;
for( i=0; i<3; ++i )
{
if( head[i] < '0' || head[i] > '7' )
break;
tbuf[i] = head[i];
}
tbuf[i] = '\0';
if( i > 0 )
c = (char) strtoul( tbuf, NULL, 8 );
else
c = '\\'; // a goofed octal escape sequence, interpret as '\'
head += i;
break;
}
curText += c;
}
else if( *head == '"' ) // end of the non-specctraMode DSN_STRING
{
curTok = DSN_STRING;
++head; // omit this trailing double quote
goto exit;
}
else
curText += *head++;
} // while
// L_unterminated:
2013-06-03 13:09:37 +00:00
wxString errtxt( _( "Un-terminated delimited string" ) );
THROW_PARSE_ERROR( errtxt, CurSource(), CurLine(), CurLineNumber(), CurOffset() );
}
else // specctraMode DSN_STRING
{
++cur; // skip over the leading delimiter: ",', or $
head = cur;
while( head<limit && !isStringTerminator( *head ) )
++head;
if( head >= limit )
{
2013-06-03 13:09:37 +00:00
wxString errtxt( _( "Un-terminated delimited string" ) );
THROW_PARSE_ERROR( errtxt, CurSource(), CurLine(), CurLineNumber(), CurOffset() );
}
curText.clear();
curText.append( cur, head );
++head; // skip over the trailing delimiter
curTok = DSN_STRING;
goto exit;
}
}
// Maybe it is a token we will find in the token table.
// If not, then call it a DSN_SYMBOL.
{
head = cur+1;
2013-06-03 13:09:37 +00:00
while( head<limit && !isSep( *head ) )
++head;
curText.clear();
curText.append( cur, head );
int found = findToken( curText );
if( found != -1 )
curTok = found;
else if( 0 == curText.compare( "string_quote" ) )
curTok = DSN_STRING_QUOTE;
else // unrecogized token, call it a symbol
curTok = DSN_SYMBOL;
}
}
exit: // single point of exit, no returns elsewhere please.
curOffset = cur - start;
next = head;
// printf("tok:\"%s\"\n", curText.c_str() );
return curTok;
}