/* * This program source code file is part of KICAD, a free EDA CAD application. * * Copyright (C) 2007-2010 SoftPLC Corporation, Dick Hollenbeck * Copyright (C) 2007-2024 Kicad Developers, see change_log.txt for contributors. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, you may find one here: * http://www.gnu.org/licenses/old-licenses/gpl-2.0.html * or you may search the http://www.gnu.org website for the version 2 license, * or you may write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ #ifndef DSNLEXER_H_ #define DSNLEXER_H_ #include #include #include #include #include #include #ifndef SWIG /** * Hold a keyword string and its unique integer token. */ struct KICOMMON_API KEYWORD { const char* name; ///< unique keyword. int token; ///< a zero based index into an array of KEYWORDs }; #endif // SWIG // something like this macro can be used to help initialize a KEYWORD table. // see SPECCTRA_DB::keywords[] as an example. //#define TOKDEF(x) { #x, T_##x } /** * List all the DSN lexer's tokens that are supported in lexing. * * It is up to the parser if it wants also to support them. */ enum DSN_SYNTAX_T { DSN_NONE = -11, DSN_COMMENT = -10, DSN_STRING_QUOTE = -9, DSN_QUOTE_DEF = -8, DSN_DASH = -7, DSN_SYMBOL = -6, DSN_NUMBER = -5, DSN_RIGHT = -4, // right bracket, ')' DSN_LEFT = -3, // left bracket, '(' DSN_STRING = -2, // a quoted string, stripped of the quotes DSN_EOF = -1 // special case for end of file }; /** * Implement a lexical analyzer for the SPECCTRA DSN file format. * * It reads lexical tokens from the current #LINE_READER through the #NextTok() function. */ class KICOMMON_API DSNLEXER { public: /** * Initialize a DSN lexer and prepares to read from aFile which is already open and has * \a aFilename. * * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount. This * token table need not contain the lexer separators such as '(' ')', etc. * @param aKeywordCount is the count of tokens in aKeywordTable. * @param aFile is an open file, which will be closed when this is destructed. * @param aFileName is the name of the file */ DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount, const KEYWORD_MAP* aKeywordMap, FILE* aFile, const wxString& aFileName ); /** * Initialize a DSN lexer and prepares to read from @a aSExpression. * * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount. This * token table need not contain the lexer separators such as '(' ')', etc. * @param aKeywordCount is the count of tokens in aKeywordTable. * @param aSExpression is text to feed through a STRING_LINE_READER * @param aSource is a description of aSExpression, used for error reporting. */ DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount, const KEYWORD_MAP* aKeywordMap, const std::string& aSExpression, const wxString& aSource = wxEmptyString ); /** * Initialize a DSN lexer and prepares to read from @a aSExpression. * * Use this one without a keyword table with the DOM parser in ptree.h. * * @param aSExpression is text to feed through a #STRING_LINE_READER * @param aSource is a description of aSExpression, used for error reporting. */ DSNLEXER( const std::string& aSExpression, const wxString& aSource = wxEmptyString ); /** * Initialize a DSN lexer and prepares to read from @a aLineReader which is already * open, and may be in use by other DSNLEXERs also. * * No ownership is taken of @a aLineReader. This enables it to be used by other DSNLEXERs. * * @param aKeywordTable is an array of #KEYWORDS holding \a aKeywordCount. This * token table need not contain the lexer separators such as '(' ')', etc. * @param aKeywordCount is the count of tokens in aKeywordTable. * @param aLineReader is any subclassed instance of LINE_READER, such as * #STRING_LINE_READER or #FILE_LINE_READER. No ownership is taken. */ DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount, const KEYWORD_MAP* aKeywordMap, LINE_READER* aLineReader = nullptr ); virtual ~DSNLEXER(); /** * Reinit variables used during parsing, to ensure od states are not used in a new parsing * must be called before parsing a new file after parsing an old file to avoid * starting with some variables in a non initial state */ void InitParserState(); /** * Usable only for DSN lexers which share the same #LINE_READER. * * Synchronizes the pointers handling the data read by the #LINE_READER. Allows 2 * #DNSLEXER objects to share the same current line, when switching from a #DNSLEXER * to another #DNSLEXER * @param aLexer the model. * @return true if the sync can be made ( at least the same line reader ). */ bool SyncLineReaderWith( DSNLEXER& aLexer ); /** * Change the behavior of this lexer into or out of "specctra mode". * * If specctra mode, then: * -#) stringDelimiter can be changed. * -#) KiCad quoting protocol is not in effect. * -#) space_in_quoted_tokens is functional else none of the above are true. * * The default mode is non-specctra mode, meaning: * -#) stringDelimiter cannot be changed. * -#) KiCad quoting protocol is in effect. * -#) space_in_quoted_tokens is not functional. */ void SetSpecctraMode( bool aMode ); /** * Manage a stack of LINE_READERs in order to handle nested file inclusion. * * This function pushes aLineReader onto the top of a stack of LINE_READERs and makes * it the current #LINE_READER with its own #GetSource(), line number and line text. * A grammar must be designed such that the "include" token (whatever its various names), * and any of its parameters are not followed by anything on that same line, * because PopReader always starts reading from a new line upon returning to * the original #LINE_READER. */ void PushReader( LINE_READER* aLineReader ); /** * Delete the top most #LINE_READER from an internal stack of LINE_READERs and * in the case of #FILE_LINE_READER this means the associated FILE is closed. * * The most recently used former #LINE_READER on the stack becomes the * current #LINE_READER and its previous position in its input stream and the * its latest line number should pertain. PopReader always starts reading * from a new line upon returning to the previous #LINE_READER. A pop is only * possible if there are at least 2 #LINE_READERs on the stack, since popping * the last one is not supported. * * @return the LINE_READER that was in use before the pop, or NULL * if there was not at least two readers on the stack and therefore the * pop failed. */ LINE_READER* PopReader(); /** * Return the next token found in the input file or DSN_EOF when reaching the end of * file. * * Users should wrap this function to return an enum to aid in grammar debugging while * running under a debugger, but leave this lower level function returning an int (so * the enum does not collide with another usage). * * @return the type of token found next. * @throw IO_ERROR only if the #LINE_READER throws it. */ int NextTok(); /** * Call #NextTok() and then verifies that the token read in satisfies #IsSymbol(). * * @return the actual token read in. * @throw IO_ERROR if the next token does not satisfy IsSymbol(). */ int NeedSYMBOL(); /** * Call #NextTok() and then verifies that the token read in satisfies bool IsSymbol() or * the next token is #DSN_NUMBER. * * @return the actual token read in. * @throw IO_ERROR if the next token does not satisfy the above test. */ int NeedSYMBOLorNUMBER(); /** * Call #NextTok() and then verifies that the token read is type #DSN_NUMBER. * * @return the actual token read in. * @throw IO_ERROR if the next token does not satisfy the above test. */ int NeedNUMBER( const char* aExpectation ); /** * Return whatever #NextTok() returned the last time it was called. */ int CurTok() const { return curTok; } /** * Return whatever NextTok() returned the 2nd to last time it was called. */ int PrevTok() const { return prevTok; } /** * Used to support "loose" matches (quoted tokens). */ int GetCurStrAsToken() const { return findToken( curText ); } /** * Change the string delimiter from the default " to some other character and return * the old value. * * @param aStringDelimiter The character in lowest 8 bits. * @return The old delimiter in the lowest 8 bits. */ char SetStringDelimiter( char aStringDelimiter ) { char old = stringDelimiter; if( specctraMode ) stringDelimiter = aStringDelimiter; return old; } /** * Change the setting controlling whether a space in a quoted string isa terminator. * * @param val If true, means */ bool SetSpaceInQuotedTokens( bool val ) { bool old = space_in_quoted_tokens; if( specctraMode ) space_in_quoted_tokens = val; return old; } /** * Change the handling of comments. * * If set true, comments are returned as single line strings with a terminating newline. * Otherwise they are consumed by the lexer and not returned. */ bool SetCommentsAreTokens( bool val ) { bool old = commentsAreTokens; commentsAreTokens = val; return old; } /** * Check the next sequence of tokens and reads them into a wxArrayString if they are * comments. * * Reading continues until a non-comment token is encountered, and such last read token * remains as #CurTok() and as #CurText(). No push back or "un get" mechanism is used * for this support. Upon return you simply avoid calling NextTok() for the next token, * but rather #CurTok(). * * @return Heap allocated block of comments or NULL if none. The caller owns the * allocation and must delete if not NULL. */ wxArrayString* ReadCommentLines(); /** * Test a token to see if it is a symbol. * * This means it cannot be a special delimiter character such as #DSN_LEFT, #DSN_RIGHT, * #DSN_QUOTE, etc. It may however, coincidentally match a keyword and still be a symbol. */ static bool IsSymbol( int aTok ); /** * Throw an #IO_ERROR exception with an input file specific error message. * * @param aTok is the token/keyword type which was expected at the current input location. * @throw IO_ERROR with the location within the input file of the problem. */ void Expecting( int aTok ) const; /** * Throw an #IO_ERROR exception with an input file specific error message. * * @param aTokenList is the token/keyword type which was expected at the * current input location, e.g. "pin|graphic|property". * @throw IO_ERROR with the location within the input file of the problem. */ void Expecting( const char* aTokenList ) const; /** * Throw an #IO_ERROR exception with an input file specific error message. * * @param aTok is the token/keyword type which was not expected at the * current input location. * @throw IO_ERROR with the location within the input file of the problem. */ void Unexpected( int aTok ) const; /** * Throw an #IO_ERROR exception with an input file specific error message. * * @param aToken is the token which was not expected at the current input location. * @throw IO_ERROR with the location within the input file of the problem. */ void Unexpected( const char* aToken ) const; /** * Throw an #IO_ERROR exception with a message saying specifically that \a aTok * is a duplicate of one already seen in current context. * * @param aTok is the token/keyword type which was not expected at the current input * location. * @throw IO_ERROR with the location within the input file of the problem. */ void Duplicate( int aTok ); /** * Call #NextTok() and then verifies that the token read in is a #DSN_LEFT. * * @throw IO_ERROR if the next token is not a #DSN_LEFT */ void NeedLEFT(); /** * Call #NextTok() and then verifies that the token read in is a #DSN_RIGHT. * * @throw IO_ERROR if the next token is not a #DSN_RIGHT */ void NeedRIGHT(); /** * Return the C string representation of a #DSN_T value. */ const char* GetTokenText( int aTok ) const; /** * Return a quote wrapped wxString representation of a token value. */ wxString GetTokenString( int aTok ) const; static const char* Syntax( int aTok ); /** * Return a pointer to the current token's text. */ const char* CurText() const { return curText.c_str(); } /** * Return a reference to current token in std::string form. */ const std::string& CurStr() const { return curText; } /** * Return the current token text as a wxString, assuming that the input byte stream * is UTF8 encoded. */ wxString FromUTF8() const { return wxString::FromUTF8( curText.c_str() ); } /** * Return the current line number within my #LINE_READER. */ int CurLineNumber() const { return reader->LineNumber(); } /** * Return the current line of text from which the #CurText() would return its token. */ const char* CurLine() const { return (const char*)(*reader); } /** * Return the current #LINE_READER source. * * @return source of the lines of text, e.g. a filename or "clipboard". */ const wxString& CurSource() const { return reader->GetSource(); } /** * Return the byte offset within the current line, using a 1 based index. * * @return a one based index into the current line. */ int CurOffset() const { return curOffset + 1; } #ifndef SWIG protected: void init(); int readLine() { if( reader ) { reader->ReadLine(); unsigned len = reader->Length(); // start may have changed in ReadLine(), which can resize and // relocate reader's line buffer. start = reader->Line(); next = start; limit = next + len; return len; } return 0; } /** * Take @a aToken string and looks up the string in the keywords table. * * @param aToken is a string to lookup in the keywords table. * @return with a value from the enum #DSN_T matching the keyword text, * or #DSN_SYMBOL if @a aToken is not in the keywords table. */ int findToken( const std::string& aToken ) const; bool isStringTerminator( char cc ) const { if( !space_in_quoted_tokens && cc == ' ' ) return true; if( cc == stringDelimiter ) return true; return false; } /** * Parse the current token as an ASCII numeric string with possible leading * whitespace into a double precision floating point number. * * @throw IO_ERROR if an error occurs attempting to convert the current token. * @return The result of the parsed token. */ double parseDouble(); double parseDouble( const char* aExpected ) { NeedNUMBER( aExpected ); return parseDouble(); } template inline double parseDouble( T aToken ) { return parseDouble( GetTokenText( aToken ) ); } bool iOwnReaders; ///< on readerStack, should I delete them? const char* start; const char* next; const char* limit; char dummy[1]; ///< when there is no reader. typedef std::vector READER_STACK; READER_STACK readerStack; ///< all the LINE_READERs by pointer. ///< no ownership. ownership is via readerStack, maybe, if iOwnReaders LINE_READER* reader; bool specctraMode; ///< if true, then: ///< 1) stringDelimiter can be changed ///< 2) Kicad quoting protocol is not in effect ///< 3) space_in_quoted_tokens is functional ///< else not. char stringDelimiter; bool space_in_quoted_tokens; ///< blank spaces within quoted strings bool commentsAreTokens; ///< true if should return comments as tokens int prevTok; ///< curTok from previous NextTok() call. int curOffset; ///< offset within current line of the current token int curTok; ///< the current token obtained on last NextTok() std::string curText; ///< the text of the current token const KEYWORD* keywords; ///< table sorted by CMake for bsearch() unsigned keywordCount; ///< count of keywords table const KEYWORD_MAP* keywordsLookup; ///< fast, specialized "C string" hashtable #endif // SWIG }; #endif // DSNLEXER_H_