kicad/cmake/BuildSteps/TokenList2DsnLexer.cmake


#  This program source code file is part of KICAD, a free EDA CAD application.
#
#  Copyright (C) 2010 Wayne Stambaugh <stambaughw@verizon.net>
#  Copyright (C) 2010 Kicad Developers, see AUTHORS.txt for contributors.
#
#  This program is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License
#  as published by the Free Software Foundation; either version 2
#  of the License, or (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, you may find one here:
#  http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
#  or you may search the http://www.gnu.org website for the version 2 license,
#  or you may write to the Free Software Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
#
#
# This script converts a plain text file with a line feed separated list
# of token names into the appropriate source and header files required by
# the DSN lexer.  See files "<base_source_path>/common/dsnlexer.cpp" and
# "<base_source_path>/include/dsnlexer.h" for more information about how
# the DSN lexer works.  The token list file format requires a single token
# per line.  Tokens can only contain lower case letters, numbers, and
# underscores.  The first letter of each token must be a lower case letter.
# Tokens must be unique.  If any of the above criteria are not met, the
# source and header files will not be generated and a build error will
# occur.
#
# Valid tokens:    a a1 foo_1 foo_bar2
# Invalid tokens:  1 A _foo bar_ foO
#
# Invocation Parameters are:  enum, inputFile, outCppFile, outHeaderFile
#
#     enum       - Required, namespace in which the enum T will be placed.
#                  Keep it short because from outside the class you want a short enum name
#                  like enum::T.   Enums are contained in their own namespace to avoid
#                  collisions on enum value names, a problem with C++ unless the enum
#                  itself is in a separate namespace.
#
#     inputFile  - Required, name of the token list file, or "*.keywords" file.
#                  Choose the basefilename carefully, it decides the class name
#                  used in the generated *_lexer.h file.
#
#     outCppFile - Optional, full path and file name of where to save the generated
#                  cpp keywords file.  If not defined, the output path is the same
#                  path as the token list file path, with a file name of *_keywords.cpp
#
#  outHeaderFile - Optional, full path and file name of where to save the generated
#                  *.h lexfer file.  If not defined, the output path is the same
#                  path as the token list file path, with a file name of *_lexer.h
#
# Use the max_lexer() CMake function from functions.cmake for invocation convenience.


#message( STATUS "TokenList2DsnLexer.cmake" )    # indicate we are running

set( tokens "" )
set( lineCount 0 )
set( dsnErrorMsg "TokenList2DsnLexer.cmake failure:" )

if( NOT EXISTS ${inputFile} )
    message( FATAL_ERROR "${dsnErrorMsg} file ${inputFile} cannot be found." )
endif()

if( NOT DEFINED enum )
    message( FATAL_ERROR "${dsnErrorMsg} missing \"enum\" processing ${inputFile}." )
endif()

get_filename_component( outputPath "${inputFile}" PATH )

# the keywords filename without extension is important, it sets the classname into RESULT
get_filename_component( result "${inputFile}" NAME_WE )
string( TOUPPER "${result}" RESULT )

set( LEXERCLASS "${RESULT}_LEXER" )
set( PARSERCLASS "${RESULT}_PARSER" )

#message( "enum:'${enum}' result:'${result}' outputPath:'${outputPath}' inputFile:'${inputFile}'" )

if( NOT DEFINED outCppFile )
    set( outCppFile "${outputPath}/${result}_keywords.cpp" )
endif()

if( NOT DEFINED outHeaderFile )
    set( outHeaderFile "${outputPath}/${result}_lexer.h" )
endif()

# Create tag for generating header file.
set( headerTag "${LEXERCLASS}_H_" )

set( includeFileHeader
"
/* Do not modify this file it was automatically generated by the
 * TokenList2DsnLexer CMake script.
 */

#ifndef ${headerTag}
#define ${headerTag}

#include <dsnlexer.h>

/**
 * C++ does not put enum _values_ in separate namespaces unless the enum itself
 * is in a separate namespace.  All the token enums must be in separate namespaces
 * otherwise the C++ compiler will eventually complain if it sees more than one
 * DSNLEXER in the same compilation unit, say by multiple header file inclusion.
 * Plus this also enables re-use of the same enum name T.  A typedef can always be used
 * to clarify which enum T is in play should that ever be a problem.  This is
 * unlikely since Parse() functions will usually only be exposed to one header
 * file like this one.  But if there is a problem, then use:
 *   typedef ${enum}::T T;
 * within that problem area.
 */
namespace ${enum}
{
    /// enum T contains all this lexer's tokens.
    enum T
    {
        // these first few are negative special ones for syntax, and are
        // inherited from DSNLEXER.
        T_NONE          = DSN_NONE,
        T_COMMENT       = DSN_COMMENT,
        T_STRING_QUOTE  = DSN_STRING_QUOTE,
        T_QUOTE_DEF     = DSN_QUOTE_DEF,
        T_DASH          = DSN_DASH,
        T_SYMBOL        = DSN_SYMBOL,
        T_NUMBER        = DSN_NUMBER,
        T_RIGHT         = DSN_RIGHT,        // right bracket: ')'
        T_LEFT          = DSN_LEFT,         // left bracket:  '('
        T_STRING        = DSN_STRING,       // a quoted string, stripped of the quotes
        T_EOF           = DSN_EOF,          // special case for end of file

"
)


set( sourceFileHeader
"
/* Do not modify this file it was automatically generated by the
 * TokenList2DsnLexer CMake script.
 *
 * Include this file in your lexer class to provide the keywords for
 * your DSN lexer.
 */

#include <${outHeaderFile}>

using namespace ${enum};

#define TOKDEF(x)    { #x, T_##x }

const KEYWORD ${LEXERCLASS}::keywords[] = {
"
)

file( STRINGS ${inputFile} lines NO_HEX_CONVERSION )

foreach( line ${lines} )
    math( EXPR lineCount "${lineCount} + 1" )

    # strip any comment from # to end of line
    string( REGEX REPLACE "#.*$" "" tmpToken "${line}" )
    string( STRIP "${tmpToken}" token )

    # Ignore empty lines.
    if( NOT token STREQUAL "" )           # if token is "off" simple if( token) does not work
        # Make sure token is valid.

        #message( "token=${token}" )

        string( REGEX MATCH "[a-z][_0-9a-z]*" validToken "${token}" )
        #message( "validToken=${validToken}" )

        if( validToken STREQUAL token )
            list( APPEND tokens "${validToken}" )
        else()
            message( FATAL_ERROR
                     "Invalid token string \"${tmpToken}\" at line ${lineCount} in file "
                     "<${inputFile}>." )
        endif()
    endif()
endforeach()

list( SORT tokens )

# Check for duplicates.
list( LENGTH tokens tokensBefore )
list( REMOVE_DUPLICATES tokens )
list( LENGTH tokens tokensAfter )

if( NOT ( tokensBefore EQUAL tokensAfter ) )
    message( FATAL_ERROR "Duplicate tokens found in file <${inputFile}>." )
endif()

file( WRITE "${outHeaderFile}" "${includeFileHeader}" )
file( WRITE "${outCppFile}" "${sourceFileHeader}" )

set( lineCount 1 )

foreach( token ${tokens} )
    if( lineCount EQUAL 1 )
        file( APPEND "${outHeaderFile}" "        T_${token} = 0" )
    else( lineCount EQUAL 1 )
        file( APPEND "${outHeaderFile}" "        T_${token}" )
    endif( lineCount EQUAL 1 )

    file(APPEND "${outCppFile}" "    TOKDEF( ${token} )" )

    if( lineCount EQUAL tokensAfter )
        file( APPEND "${outHeaderFile}" "\n" )
        file( APPEND "${outCppFile}" "\n" )
    else( lineCount EQUAL tokensAfter )
        file( APPEND "${outHeaderFile}" ",\n" )
        file( APPEND "${outCppFile}" ",\n" )
    endif( lineCount EQUAL tokensAfter )
    math( EXPR lineCount "${lineCount} + 1" )
endforeach()

file( APPEND "${outHeaderFile}"
"    };
}   // namespace ${enum}


/**
 * Class ${LEXERCLASS}
 * is an automatically generated class using the TokenList2DnsLexer.cmake
 * technology, based on keywords provided by file:
 *    ${inputFile}
 */
class ${LEXERCLASS} : public DSNLEXER
{
    /// Auto generated lexer keywords table and length:
    static const KEYWORD  keywords[];
    static const KEYWORD_MAP keywords_hash;
    static const unsigned keyword_count;

public:
    /**
     * Constructor ( const std::string&, const wxString& )
     * @param aSExpression is (utf8) text possibly from the clipboard that you want to parse.
     * @param aSource is a description of the origin of @a aSExpression, such as a filename.
     *   If left empty, then _(\"clipboard\") is used.
     */
    ${LEXERCLASS}( const std::string& aSExpression, const wxString& aSource = wxEmptyString ) :
        DSNLEXER( keywords, keyword_count, &keywords_hash, aSExpression, aSource )
    {
    }

    /**
     * Constructor ( FILE* )
     * takes @a aFile already opened for reading and @a aFilename as parameters.
     * The opened file is assumed to be positioned at the beginning of the file
     * for purposes of accurate line number reporting in error messages.  The
     * FILE is closed by this instance when its destructor is called.
     * @param aFile is a FILE already opened for reading.
     * @param aFilename is the name of the opened file, needed for error reporting.
     */
    ${LEXERCLASS}( FILE* aFile, const wxString& aFilename ) :
        DSNLEXER( keywords, keyword_count, &keywords_hash, aFile, aFilename )
    {
    }

    /**
     * Constructor ( LINE_READER* )
     * initializes a lexer and prepares to read from @a aLineReader which
     * is assumed ready, and may be in use by other DSNLEXERs also.  No ownership
     * is taken of @a aLineReader. This enables it to be used by other lexers also.
     * The transition between grammars in such a case, must happen on a text
     * line boundary, not within the same line of text.
     *
     * @param aLineReader is any subclassed instance of LINE_READER, such as
     *  STRING_LINE_READER or FILE_LINE_READER.  No ownership is taken of aLineReader.
     */
    ${LEXERCLASS}( LINE_READER* aLineReader ) :
        DSNLEXER( keywords, keyword_count, &keywords_hash, aLineReader )
    {
    }

    /**
     * Function TokenName
     * returns the name of the token in ASCII form.
     */
    static const char* TokenName( ${enum}::T aTok );

    /**
     * Function NextTok
     * returns the next token found in the input file or T_EOF when reaching
     * the end of file.  Users should wrap this function to return an enum
     * to aid in grammar debugging while running under a debugger, but leave
     * this lower level function returning an int (so the enum does not collide
     * with another usage).
     * @return ${enum}::T - the type of token found next.
     * @throw IO_ERROR - only if the LINE_READER throws it.
     */
    ${enum}::T NextTok()
    {
        return (${enum}::T) DSNLEXER::NextTok();
    }

    /**
     * Function NeedSYMBOL
     * calls NextTok() and then verifies that the token read in
     * satisfies bool IsSymbol().
     * If not, an IO_ERROR is thrown.
     * @return int - the actual token read in.
     * @throw IO_ERROR, if the next token does not satisfy IsSymbol()
     */
    ${enum}::T NeedSYMBOL()
    {
        return (${enum}::T) DSNLEXER::NeedSYMBOL();
    }

    /**
     * Function NeedSYMBOLorNUMBER
     * calls NextTok() and then verifies that the token read in
     * satisfies bool IsSymbol() or tok==T_NUMBER.
     * If not, an IO_ERROR is thrown.
     * @return int - the actual token read in.
     * @throw IO_ERROR, if the next token does not satisfy the above test
     */
    ${enum}::T NeedSYMBOLorNUMBER()
    {
        return (${enum}::T) DSNLEXER::NeedSYMBOLorNUMBER();
    }

    /**
     * Function CurTok
     * returns whatever NextTok() returned the last time it was called.
     */
    ${enum}::T CurTok()
    {
        return (${enum}::T) DSNLEXER::CurTok();
    }

    /**
     * Function PrevTok
     * returns whatever NextTok() returned the 2nd to last time it was called.
     */
    ${enum}::T PrevTok()
    {
        return (${enum}::T) DSNLEXER::PrevTok();
    }

    /**
     * Function GetCurStrAsToken
     * Used to support 'loose' matches (quoted tokens)
     */
    ${enum}::T GetCurStrAsToken()
    {
        return (${enum}::T) DSNLEXER::GetCurStrAsToken();
    }
};

// example usage

/**
 * Class ${LEXCLASS}_PARSER
 * holds data and functions pertinent to parsing a S-expression file .
 *
class ${PARSERCLASS} : public ${LEXERCLASS}
{

};
*/

#endif   // ${headerTag}
"
)

file( APPEND "${outCppFile}"
"};

const unsigned ${LEXERCLASS}::keyword_count = unsigned( sizeof( ${LEXERCLASS}::keywords )/sizeof( ${LEXERCLASS}::keywords[0] ) );


const char* ${LEXERCLASS}::TokenName( T aTok )
{
    const char* ret;

    if( aTok < 0 )
        ret = DSNLEXER::Syntax( aTok );
    else if( (unsigned) aTok < keyword_count )
        ret = keywords[aTok].name;
    else
        ret = \"token too big\";

    return ret;
}
"
)


file( APPEND "${outCppFile}"
"

const KEYWORD_MAP ${LEXERCLASS}::keywords_hash({
"
)

set( TOKEN_NUM 0 )

math( EXPR tokensAfter "${tokensAfter} - 1" )

foreach( token ${tokens} )
    file(APPEND "${outCppFile}" "    { \"${token}\", ${TOKEN_NUM} }" )

    if( TOKEN_NUM EQUAL tokensAfter )
        file( APPEND "${outCppFile}" "\n" )
    else( TOKEN_NUM EQUAL tokensAfter )
        file( APPEND "${outCppFile}" ",\n" )
    endif()

    math( EXPR TOKEN_NUM "${TOKEN_NUM} + 1" )
endforeach()

file( APPEND "${outCppFile}"
"});")