# This program source code file is part of KICAD, a free EDA CAD application. # # Copyright (C) 2010 Wayne Stambaugh # Copyright (C) 2010 Kicad Developers, see AUTHORS.txt for contributors. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, you may find one here: # http://www.gnu.org/licenses/old-licenses/gpl-2.0.html # or you may search the http://www.gnu.org website for the version 2 license, # or you may write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA # # # This script converts a plain text file with a line feed separated list # of token names into the appropriate source and header files required by # the DSN lexer. See files "/common/dsnlexer.cpp" and # "/include/dsnlexer.h" for more information about how # the DSN lexer works. The token list file format requires a single token # per line. Tokens can only contain lower case letters, numbers, and # underscores. The first letter of each token must be a lower case letter. # Tokens must be unique. If any of the above criteria are not met, the # source and header files will not be generated and a build error will # occur. # # Valid tokens: a a1 foo_1 foo_bar2 # Invalid tokens: 1 A _foo bar_ foO # # Invocation Parameters are: enum, inputFile, outCppFile, outHeaderFile # # enum - Required, namespace in which the enum T will be placed. # Keep it short because from outside the class you want a short enum name # like enum::T. Enums are contained in their own namespace to avoid # collisions on enum value names, a problem with C++ unless the enum # itself is in a separate namespace. # # inputFile - Required, name of the token list file, or "*.keywords" file. # Choose the basefilename carefully, it decides the class name # used in the generated *_lexer.h file. # # outCppFile - Optional, full path and file name of where to save the generated # cpp keywords file. If not defined, the output path is the same # path as the token list file path, with a file name of *_keywords.cpp # # outHeaderFile - Optional, full path and file name of where to save the generated # *.h lexfer file. If not defined, the output path is the same # path as the token list file path, with a file name of *_lexer.h # # exportMacro - Optional, the name of the macro used for dllexport/dllimport and is used # to mark the class for export # exportMacroInclude - Optional, a include that is added for use of the export macro # # Use the max_lexer() CMake function from functions.cmake for invocation convenience. #message( STATUS "TokenList2DsnLexer.cmake" ) # indicate we are running set( tokens "" ) set( lineCount 0 ) set( dsnErrorMsg "TokenList2DsnLexer.cmake failure:" ) if( NOT EXISTS ${inputFile} ) message( FATAL_ERROR "${dsnErrorMsg} file ${inputFile} cannot be found." ) endif() if( NOT DEFINED enum ) message( FATAL_ERROR "${dsnErrorMsg} missing \"enum\" processing ${inputFile}." ) endif() get_filename_component( outputPath "${inputFile}" PATH ) # the keywords filename without extension is important, it sets the classname into RESULT get_filename_component( result "${inputFile}" NAME_WE ) string( TOUPPER "${result}" RESULT ) set( LEXERCLASS "${RESULT}_LEXER" ) set( PARSERCLASS "${RESULT}_PARSER" ) #message( "enum:'${enum}' result:'${result}' outputPath:'${outputPath}' inputFile:'${inputFile}'" ) if( NOT DEFINED outCppFile ) set( outCppFile "${outputPath}/${result}_keywords.cpp" ) endif() if( NOT DEFINED outHeaderFile ) set( outHeaderFile "${outputPath}/${result}_lexer.h" ) endif() if( exportMacro ) set( exportMacro "${exportMacro} ") endif() # Create tag for generating header file. set( headerTag "${LEXERCLASS}_H_" ) set( includeFileHeader " /* Do not modify this file it was automatically generated by the * TokenList2DsnLexer CMake script. */ #ifndef ${headerTag} #define ${headerTag} #include ") if( exportMacroInclude ) set( includeFileHeader "${includeFileHeader} #include <${exportMacroInclude}> " ) endif() set( includeFileHeader "${includeFileHeader} /** * C++ does not put enum _values_ in separate namespaces unless the enum itself * is in a separate namespace. All the token enums must be in separate namespaces * otherwise the C++ compiler will eventually complain if it sees more than one * DSNLEXER in the same compilation unit, say by multiple header file inclusion. * Plus this also enables re-use of the same enum name T. A typedef can always be used * to clarify which enum T is in play should that ever be a problem. This is * unlikely since Parse() functions will usually only be exposed to one header * file like this one. But if there is a problem, then use: * typedef ${enum}::T T; * within that problem area. */ namespace ${enum} { /// enum T contains all this lexer's tokens. enum T { // these first few are negative special ones for syntax, and are // inherited from DSNLEXER. T_NONE = DSN_NONE, T_COMMENT = DSN_COMMENT, T_STRING_QUOTE = DSN_STRING_QUOTE, T_QUOTE_DEF = DSN_QUOTE_DEF, T_DASH = DSN_DASH, T_SYMBOL = DSN_SYMBOL, T_NUMBER = DSN_NUMBER, T_RIGHT = DSN_RIGHT, // right bracket: ')' T_LEFT = DSN_LEFT, // left bracket: '(' T_STRING = DSN_STRING, // a quoted string, stripped of the quotes T_EOF = DSN_EOF, // special case for end of file " ) set( sourceFileHeader " /* Do not modify this file it was automatically generated by the * TokenList2DsnLexer CMake script. * * Include this file in your lexer class to provide the keywords for * your DSN lexer. */ #include <${outHeaderFile}> using namespace ${enum}; #define TOKDEF(x) { #x, T_##x } const KEYWORD ${LEXERCLASS}::keywords[] = { " ) file( STRINGS ${inputFile} lines NO_HEX_CONVERSION ) foreach( line ${lines} ) math( EXPR lineCount "${lineCount} + 1" ) # strip any comment from # to end of line string( REGEX REPLACE "#.*$" "" tmpToken "${line}" ) string( STRIP "${tmpToken}" token ) # Ignore empty lines. if( NOT token STREQUAL "" ) # if token is "off" simple if( token) does not work # Make sure token is valid. #message( "token=${token}" ) string( REGEX MATCH "[a-z][_0-9a-z]*" validToken "${token}" ) #message( "validToken=${validToken}" ) if( validToken STREQUAL token ) list( APPEND tokens "${validToken}" ) else() message( FATAL_ERROR "Invalid token string \"${tmpToken}\" at line ${lineCount} in file " "<${inputFile}>." ) endif() endif() endforeach() list( SORT tokens ) # Check for duplicates. list( LENGTH tokens tokensBefore ) list( REMOVE_DUPLICATES tokens ) list( LENGTH tokens tokensAfter ) if( NOT ( tokensBefore EQUAL tokensAfter ) ) message( FATAL_ERROR "Duplicate tokens found in file <${inputFile}>." ) endif() file( WRITE "${outHeaderFile}" "${includeFileHeader}" ) file( WRITE "${outCppFile}" "${sourceFileHeader}" ) set( lineCount 1 ) foreach( token ${tokens} ) if( lineCount EQUAL 1 ) file( APPEND "${outHeaderFile}" " T_${token} = 0" ) else( lineCount EQUAL 1 ) file( APPEND "${outHeaderFile}" " T_${token}" ) endif( lineCount EQUAL 1 ) file(APPEND "${outCppFile}" " TOKDEF( ${token} )" ) if( lineCount EQUAL tokensAfter ) file( APPEND "${outHeaderFile}" "\n" ) file( APPEND "${outCppFile}" "\n" ) else( lineCount EQUAL tokensAfter ) file( APPEND "${outHeaderFile}" ",\n" ) file( APPEND "${outCppFile}" ",\n" ) endif( lineCount EQUAL tokensAfter ) math( EXPR lineCount "${lineCount} + 1" ) endforeach() file( APPEND "${outHeaderFile}" " }; } // namespace ${enum} /** * Class ${LEXERCLASS} * is an automatically generated class using the TokenList2DnsLexer.cmake * technology, based on keywords provided by file: * ${inputFile} */ class ${exportMacro}${LEXERCLASS} : public DSNLEXER { /// Auto generated lexer keywords table and length: static const KEYWORD keywords[]; static const KEYWORD_MAP keywords_hash; static const unsigned keyword_count; public: /** * Constructor ( const std::string&, const wxString& ) * @param aSExpression is (utf8) text possibly from the clipboard that you want to parse. * @param aSource is a description of the origin of @a aSExpression, such as a filename. * If left empty, then _(\"clipboard\") is used. */ ${LEXERCLASS}( const std::string& aSExpression, const wxString& aSource = wxEmptyString ) : DSNLEXER( keywords, keyword_count, &keywords_hash, aSExpression, aSource ) { } /** * Constructor ( FILE* ) * takes @a aFile already opened for reading and @a aFilename as parameters. * The opened file is assumed to be positioned at the beginning of the file * for purposes of accurate line number reporting in error messages. The * FILE is closed by this instance when its destructor is called. * @param aFile is a FILE already opened for reading. * @param aFilename is the name of the opened file, needed for error reporting. */ ${LEXERCLASS}( FILE* aFile, const wxString& aFilename ) : DSNLEXER( keywords, keyword_count, &keywords_hash, aFile, aFilename ) { } /** * Constructor ( LINE_READER* ) * initializes a lexer and prepares to read from @a aLineReader which * is assumed ready, and may be in use by other DSNLEXERs also. No ownership * is taken of @a aLineReader. This enables it to be used by other lexers also. * The transition between grammars in such a case, must happen on a text * line boundary, not within the same line of text. * * @param aLineReader is any subclassed instance of LINE_READER, such as * STRING_LINE_READER or FILE_LINE_READER. No ownership is taken of aLineReader. */ ${LEXERCLASS}( LINE_READER* aLineReader ) : DSNLEXER( keywords, keyword_count, &keywords_hash, aLineReader ) { } /** * Function TokenName * returns the name of the token in ASCII form. */ static const char* TokenName( ${enum}::T aTok ); /** * Function NextTok * returns the next token found in the input file or T_EOF when reaching * the end of file. Users should wrap this function to return an enum * to aid in grammar debugging while running under a debugger, but leave * this lower level function returning an int (so the enum does not collide * with another usage). * @return ${enum}::T - the type of token found next. * @throw IO_ERROR - only if the LINE_READER throws it. */ ${enum}::T NextTok() { return (${enum}::T) DSNLEXER::NextTok(); } /** * Function NeedSYMBOL * calls NextTok() and then verifies that the token read in * satisfies bool IsSymbol(). * If not, an IO_ERROR is thrown. * @return int - the actual token read in. * @throw IO_ERROR, if the next token does not satisfy IsSymbol() */ ${enum}::T NeedSYMBOL() { return (${enum}::T) DSNLEXER::NeedSYMBOL(); } /** * Function NeedSYMBOLorNUMBER * calls NextTok() and then verifies that the token read in * satisfies bool IsSymbol() or tok==T_NUMBER. * If not, an IO_ERROR is thrown. * @return int - the actual token read in. * @throw IO_ERROR, if the next token does not satisfy the above test */ ${enum}::T NeedSYMBOLorNUMBER() { return (${enum}::T) DSNLEXER::NeedSYMBOLorNUMBER(); } /** * Function CurTok * returns whatever NextTok() returned the last time it was called. */ ${enum}::T CurTok() { return (${enum}::T) DSNLEXER::CurTok(); } /** * Function PrevTok * returns whatever NextTok() returned the 2nd to last time it was called. */ ${enum}::T PrevTok() { return (${enum}::T) DSNLEXER::PrevTok(); } /** * Function GetCurStrAsToken * Used to support 'loose' matches (quoted tokens) */ ${enum}::T GetCurStrAsToken() { return (${enum}::T) DSNLEXER::GetCurStrAsToken(); } }; // example usage /** * Class ${LEXCLASS}_PARSER * holds data and functions pertinent to parsing a S-expression file . * class ${PARSERCLASS} : public ${LEXERCLASS} { }; */ #endif // ${headerTag} " ) file( APPEND "${outCppFile}" "}; const unsigned ${LEXERCLASS}::keyword_count = unsigned( sizeof( ${LEXERCLASS}::keywords )/sizeof( ${LEXERCLASS}::keywords[0] ) ); const char* ${LEXERCLASS}::TokenName( T aTok ) { const char* ret; if( aTok < 0 ) ret = DSNLEXER::Syntax( aTok ); else if( (unsigned) aTok < keyword_count ) ret = keywords[aTok].name; else ret = \"token too big\"; return ret; } " ) file( APPEND "${outCppFile}" " const KEYWORD_MAP ${LEXERCLASS}::keywords_hash({ " ) set( TOKEN_NUM 0 ) math( EXPR tokensAfter "${tokensAfter} - 1" ) foreach( token ${tokens} ) file(APPEND "${outCppFile}" " { \"${token}\", ${TOKEN_NUM} }" ) if( TOKEN_NUM EQUAL tokensAfter ) file( APPEND "${outCppFile}" "\n" ) else( TOKEN_NUM EQUAL tokensAfter ) file( APPEND "${outCppFile}" ",\n" ) endif() math( EXPR TOKEN_NUM "${TOKEN_NUM} + 1" ) endforeach() file( APPEND "${outCppFile}" "});")