Simplify the generated lexers to the use of a templated class

2022-08-21 15:14:31 -04:00 · 2022-08-21 15:14:31 -04:00 · 6eb6447708
parent f63a099fad
commit 6eb6447708
2 changed files with 147 additions and 137 deletions
--- a/cmake/BuildSteps/TokenList2DsnLexer.cmake
+++ b/cmake/BuildSteps/TokenList2DsnLexer.cmake
@ -234,128 +234,10 @@ file( APPEND "${outHeaderFile}"
 * technology, based on keywords provided by file:
 *    ${inputFile}
 */
-class ${LEXERCLASS} : public DSNLEXER
+class ${LEXERCLASS} : public DSNLEXER_KEYWORDED<${enum}::T>
 {
    /// Auto generated lexer keywords table and length:
    static const KEYWORD  keywords[];
    static const KEYWORD_MAP keywords_hash;
    static const unsigned keyword_count;
 public:
-    /**
+    using DSNLEXER_KEYWORDED<${enum}::T>::DSNLEXER_KEYWORDED;
     * Constructor ( const std::string&, const wxString& )
     * @param aSExpression is (utf8) text possibly from the clipboard that you want to parse.
     * @param aSource is a description of the origin of @a aSExpression, such as a filename.
     *   If left empty, then _(\"clipboard\") is used.
     */
    ${LEXERCLASS}( const std::string& aSExpression, const wxString& aSource = wxEmptyString ) :
        DSNLEXER( keywords, keyword_count, &keywords_hash, aSExpression, aSource )
    {
    }
    /**
     * Constructor ( FILE* )
     * takes @a aFile already opened for reading and @a aFilename as parameters.
     * The opened file is assumed to be positioned at the beginning of the file
     * for purposes of accurate line number reporting in error messages.  The
     * FILE is closed by this instance when its destructor is called.
     * @param aFile is a FILE already opened for reading.
     * @param aFilename is the name of the opened file, needed for error reporting.
     */
    ${LEXERCLASS}( FILE* aFile, const wxString& aFilename ) :
        DSNLEXER( keywords, keyword_count, &keywords_hash, aFile, aFilename )
    {
    }
    /**
     * Constructor ( LINE_READER* )
     * initializes a lexer and prepares to read from @a aLineReader which
     * is assumed ready, and may be in use by other DSNLEXERs also.  No ownership
     * is taken of @a aLineReader. This enables it to be used by other lexers also.
     * The transition between grammars in such a case, must happen on a text
     * line boundary, not within the same line of text.
     *
     * @param aLineReader is any subclassed instance of LINE_READER, such as
     *  STRING_LINE_READER or FILE_LINE_READER.  No ownership is taken of aLineReader.
     */
    ${LEXERCLASS}( LINE_READER* aLineReader ) :
        DSNLEXER( keywords, keyword_count, &keywords_hash, aLineReader )
    {
    }
    /**
     * Function TokenName
     * returns the name of the token in ASCII form.
     */
    static const char* TokenName( ${enum}::T aTok );
    /**
     * Function NextTok
     * returns the next token found in the input file or T_EOF when reaching
     * the end of file.  Users should wrap this function to return an enum
     * to aid in grammar debugging while running under a debugger, but leave
     * this lower level function returning an int (so the enum does not collide
     * with another usage).
     * @return ${enum}::T - the type of token found next.
     * @throw IO_ERROR - only if the LINE_READER throws it.
     */
    ${enum}::T NextTok()
    {
        return (${enum}::T) DSNLEXER::NextTok();
    }
    /**
     * Function NeedSYMBOL
     * calls NextTok() and then verifies that the token read in
     * satisfies bool IsSymbol().
     * If not, an IO_ERROR is thrown.
     * @return int - the actual token read in.
     * @throw IO_ERROR, if the next token does not satisfy IsSymbol()
     */
    ${enum}::T NeedSYMBOL()
    {
        return (${enum}::T) DSNLEXER::NeedSYMBOL();
    }
    /**
     * Function NeedSYMBOLorNUMBER
     * calls NextTok() and then verifies that the token read in
     * satisfies bool IsSymbol() or tok==T_NUMBER.
     * If not, an IO_ERROR is thrown.
     * @return int - the actual token read in.
     * @throw IO_ERROR, if the next token does not satisfy the above test
     */
    ${enum}::T NeedSYMBOLorNUMBER()
    {
        return (${enum}::T) DSNLEXER::NeedSYMBOLorNUMBER();
    }
    /**
     * Function CurTok
     * returns whatever NextTok() returned the last time it was called.
     */
    ${enum}::T CurTok()
    {
        return (${enum}::T) DSNLEXER::CurTok();
    }
    /**
     * Function PrevTok
     * returns whatever NextTok() returned the 2nd to last time it was called.
     */
    ${enum}::T PrevTok()
    {
        return (${enum}::T) DSNLEXER::PrevTok();
    }
    /**
     * Function GetCurStrAsToken
     * Used to support 'loose' matches (quoted tokens)
     */
    ${enum}::T GetCurStrAsToken()
    {
        return (${enum}::T) DSNLEXER::GetCurStrAsToken();
    }
 };
 // example usage
@ -378,26 +260,11 @@ file( APPEND "${outCppFile}"
 "};
 const unsigned ${LEXERCLASS}::keyword_count = unsigned( sizeof( ${LEXERCLASS}::keywords )/sizeof( ${LEXERCLASS}::keywords[0] ) );
 const char* ${LEXERCLASS}::TokenName( T aTok )
 {
    const char* ret;
    if( aTok < 0 )
        ret = DSNLEXER::Syntax( aTok );
    else if( (unsigned) aTok < keyword_count )
        ret = keywords[aTok].name;
    else
        ret = \"token too big\";
    return ret;
 }
 "
 )
-file( APPEND "${outCppFile}" 
+file( APPEND "${outCppFile}"
 "
 const KEYWORD_MAP ${LEXERCLASS}::keywords_hash({
@ -410,7 +277,7 @@ math( EXPR tokensAfter "${tokensAfter} - 1" )
 foreach( token ${tokens} )
    file(APPEND "${outCppFile}" "    { \"${token}\", ${TOKEN_NUM} }" )
-    
+
    if( TOKEN_NUM EQUAL tokensAfter )
        file( APPEND "${outCppFile}" "\n" )
    else( TOKEN_NUM EQUAL tokensAfter )
--- a/include/dsnlexer.h
+++ b/include/dsnlexer.h
@ -555,4 +555,147 @@ protected:
 #endif // SWIG
 };
 template<typename ENUM_TYPE>
 class DSNLEXER_KEYWORDED : public DSNLEXER
 {
 public:
    /**
     * Constructor ( const std::string&, const wxString& )
     * @param aSExpression is (utf8) text possibly from the clipboard that you want to parse.
     * @param aSource is a description of the origin of @a aSExpression, such as a filename.
     *   If left empty, then _("clipboard") is used.
     */
    DSNLEXER_KEYWORDED( const std::string& aSExpression, const wxString& aSource = wxEmptyString ) :
            DSNLEXER( keywords, keyword_count, &keywords_hash, aSExpression, aSource )
    {
    }
    /**
     * Constructor ( FILE* )
     * takes @a aFile already opened for reading and @a aFilename as parameters.
     * The opened file is assumed to be positioned at the beginning of the file
     * for purposes of accurate line number reporting in error messages.  The
     * FILE is closed by this instance when its destructor is called.
     * @param aFile is a FILE already opened for reading.
     * @param aFilename is the name of the opened file, needed for error reporting.
     */
    DSNLEXER_KEYWORDED( FILE* aFile, const wxString& aFilename ) :
            DSNLEXER( keywords, keyword_count, &keywords_hash, aFile, aFilename )
    {
    }
    /**
     * Constructor ( LINE_READER* )
     * initializes a lexer and prepares to read from @a aLineReader which
     * is assumed ready, and may be in use by other DSNLEXERs also.  No ownership
     * is taken of @a aLineReader. This enables it to be used by other lexers also.
     * The transition between grammars in such a case, must happen on a text
     * line boundary, not within the same line of text.
     *
     * @param aLineReader is any subclassed instance of LINE_READER, such as
     *  STRING_LINE_READER or FILE_LINE_READER.  No ownership is taken of aLineReader.
     */
    DSNLEXER_KEYWORDED( LINE_READER* aLineReader ) :
            DSNLEXER( keywords, keyword_count, &keywords_hash, aLineReader )
    {
    }
    /**
     * Function NextTok
     * returns the next token found in the input file or T_EOF when reaching
     * the end of file.  Users should wrap this function to return an enum
     * to aid in grammar debugging while running under a debugger, but leave
     * this lower level function returning an int (so the enum does not collide
     * with another usage).
     * @return TSCHEMATIC_T::T - the type of token found next.
     * @throw IO_ERROR - only if the LINE_READER throws it.
     */
    ENUM_TYPE NextTok()
    {
        return (ENUM_TYPE) DSNLEXER::NextTok();
    }
    /**
     * Function NeedSYMBOL
     * calls NextTok() and then verifies that the token read in
     * satisfies bool IsSymbol().
     * If not, an IO_ERROR is thrown.
     * @return int - the actual token read in.
     * @throw IO_ERROR, if the next token does not satisfy IsSymbol()
     */
    ENUM_TYPE NeedSYMBOL()
    {
        return (ENUM_TYPE) DSNLEXER::NeedSYMBOL();
    }
    /**
     * Function NeedSYMBOLorNUMBER
     * calls NextTok() and then verifies that the token read in
     * satisfies bool IsSymbol() or tok==T_NUMBER.
     * If not, an IO_ERROR is thrown.
     * @return int - the actual token read in.
     * @throw IO_ERROR, if the next token does not satisfy the above test
     */
    ENUM_TYPE NeedSYMBOLorNUMBER()
    {
        return (ENUM_TYPE) DSNLEXER::NeedSYMBOLorNUMBER();
    }
    /**
     * Function CurTok
     * returns whatever NextTok() returned the last time it was called.
     */
    ENUM_TYPE CurTok()
    {
        return (ENUM_TYPE) DSNLEXER::CurTok();
    }
    /**
     * Function PrevTok
     * returns whatever NextTok() returned the 2nd to last time it was called.
     */
    ENUM_TYPE PrevTok()
    {
        return (ENUM_TYPE) DSNLEXER::PrevTok();
    }
    /**
     * Function GetCurStrAsToken
     * Used to support 'loose' matches (quoted tokens)
     */
    ENUM_TYPE GetCurStrAsToken()
    {
        return (ENUM_TYPE) DSNLEXER::GetCurStrAsToken();
    }
    /**
     * Function TokenName
     * returns the name of the token in ASCII form.
     */
    static const char* TokenName( ENUM_TYPE aTok );
 protected:
    /// Auto generated lexer keywords table and length:
    static const KEYWORD     keywords[];
    static const KEYWORD_MAP keywords_hash;
    static const unsigned    keyword_count;
 };
 template <typename ENUM_TYPE>
 const char* DSNLEXER_KEYWORDED <ENUM_TYPE>::TokenName( ENUM_TYPE aTok )
 {
    const char* ret;
    if( aTok < 0 )
        ret = DSNLEXER::Syntax( aTok );
    else if( (unsigned) aTok < keyword_count )
        ret = keywords[aTok].name;
    else
        ret = "token too big";
    return ret;
 }
 #endif  // DSNLEXER_H_