Simplify the generated lexers to the use of a templated class

2022-08-21 15:14:31 -04:00 · 2022-08-21 15:14:31 -04:00 · 6eb6447708
parent f63a099fad
commit 6eb6447708
2 changed files with 147 additions and 137 deletions
--- a/cmake/BuildSteps/TokenList2DsnLexer.cmake
+++ b/cmake/BuildSteps/TokenList2DsnLexer.cmake
@ -234,128 +234,10 @@ file( APPEND "${outHeaderFile}"
 * technology, based on keywords provided by file:
 *    ${inputFile}
 */
-class ${LEXERCLASS} : public DSNLEXER
+class ${LEXERCLASS} : public DSNLEXER_KEYWORDED<${enum}::T>
 {
-    /// Auto generated lexer keywords table and length:
-    static const KEYWORD  keywords[];
-    static const KEYWORD_MAP keywords_hash;
-    static const unsigned keyword_count;
-
 public:
-    /**
-     * Constructor ( const std::string&, const wxString& )
-     * @param aSExpression is (utf8) text possibly from the clipboard that you want to parse.
-     * @param aSource is a description of the origin of @a aSExpression, such as a filename.
-     *   If left empty, then _(\"clipboard\") is used.
-     */
-    ${LEXERCLASS}( const std::string& aSExpression, const wxString& aSource = wxEmptyString ) :
-        DSNLEXER( keywords, keyword_count, &keywords_hash, aSExpression, aSource )
-    {
-    }
-
-    /**
-     * Constructor ( FILE* )
-     * takes @a aFile already opened for reading and @a aFilename as parameters.
-     * The opened file is assumed to be positioned at the beginning of the file
-     * for purposes of accurate line number reporting in error messages.  The
-     * FILE is closed by this instance when its destructor is called.
-     * @param aFile is a FILE already opened for reading.
-     * @param aFilename is the name of the opened file, needed for error reporting.
-     */
-    ${LEXERCLASS}( FILE* aFile, const wxString& aFilename ) :
-        DSNLEXER( keywords, keyword_count, &keywords_hash, aFile, aFilename )
-    {
-    }
-
-    /**
-     * Constructor ( LINE_READER* )
-     * initializes a lexer and prepares to read from @a aLineReader which
-     * is assumed ready, and may be in use by other DSNLEXERs also.  No ownership
-     * is taken of @a aLineReader. This enables it to be used by other lexers also.
-     * The transition between grammars in such a case, must happen on a text
-     * line boundary, not within the same line of text.
-     *
-     * @param aLineReader is any subclassed instance of LINE_READER, such as
-     *  STRING_LINE_READER or FILE_LINE_READER.  No ownership is taken of aLineReader.
-     */
-    ${LEXERCLASS}( LINE_READER* aLineReader ) :
-        DSNLEXER( keywords, keyword_count, &keywords_hash, aLineReader )
-    {
-    }
-
-    /**
-     * Function TokenName
-     * returns the name of the token in ASCII form.
-     */
-    static const char* TokenName( ${enum}::T aTok );
-
-    /**
-     * Function NextTok
-     * returns the next token found in the input file or T_EOF when reaching
-     * the end of file.  Users should wrap this function to return an enum
-     * to aid in grammar debugging while running under a debugger, but leave
-     * this lower level function returning an int (so the enum does not collide
-     * with another usage).
-     * @return ${enum}::T - the type of token found next.
-     * @throw IO_ERROR - only if the LINE_READER throws it.
-     */
-    ${enum}::T NextTok()
-    {
-        return (${enum}::T) DSNLEXER::NextTok();
-    }
-
-    /**
-     * Function NeedSYMBOL
-     * calls NextTok() and then verifies that the token read in
-     * satisfies bool IsSymbol().
-     * If not, an IO_ERROR is thrown.
-     * @return int - the actual token read in.
-     * @throw IO_ERROR, if the next token does not satisfy IsSymbol()
-     */
-    ${enum}::T NeedSYMBOL()
-    {
-        return (${enum}::T) DSNLEXER::NeedSYMBOL();
-    }
-
-    /**
-     * Function NeedSYMBOLorNUMBER
-     * calls NextTok() and then verifies that the token read in
-     * satisfies bool IsSymbol() or tok==T_NUMBER.
-     * If not, an IO_ERROR is thrown.
-     * @return int - the actual token read in.
-     * @throw IO_ERROR, if the next token does not satisfy the above test
-     */
-    ${enum}::T NeedSYMBOLorNUMBER()
-    {
-        return (${enum}::T) DSNLEXER::NeedSYMBOLorNUMBER();
-    }
-
-    /**
-     * Function CurTok
-     * returns whatever NextTok() returned the last time it was called.
-     */
-    ${enum}::T CurTok()
-    {
-        return (${enum}::T) DSNLEXER::CurTok();
-    }
-
-    /**
-     * Function PrevTok
-     * returns whatever NextTok() returned the 2nd to last time it was called.
-     */
-    ${enum}::T PrevTok()
-    {
-        return (${enum}::T) DSNLEXER::PrevTok();
-    }
-
-    /**
-     * Function GetCurStrAsToken
-     * Used to support 'loose' matches (quoted tokens)
-     */
-    ${enum}::T GetCurStrAsToken()
-    {
-        return (${enum}::T) DSNLEXER::GetCurStrAsToken();
-    }
+    using DSNLEXER_KEYWORDED<${enum}::T>::DSNLEXER_KEYWORDED;
 };

 // example usage
@ -378,26 +260,11 @@ file( APPEND "${outCppFile}"
 "};

 const unsigned ${LEXERCLASS}::keyword_count = unsigned( sizeof( ${LEXERCLASS}::keywords )/sizeof( ${LEXERCLASS}::keywords[0] ) );
-
-
-const char* ${LEXERCLASS}::TokenName( T aTok )
-{
-    const char* ret;
-
-    if( aTok < 0 )
-        ret = DSNLEXER::Syntax( aTok );
-    else if( (unsigned) aTok < keyword_count )
-        ret = keywords[aTok].name;
-    else
-        ret = \"token too big\";
-
-    return ret;
-}
 "
 )


-file( APPEND "${outCppFile}" 
+file( APPEND "${outCppFile}"
 "

 const KEYWORD_MAP ${LEXERCLASS}::keywords_hash({
@ -410,7 +277,7 @@ math( EXPR tokensAfter "${tokensAfter} - 1" )

 foreach( token ${tokens} )
    file(APPEND "${outCppFile}" "    { \"${token}\", ${TOKEN_NUM} }" )
-    
+
    if( TOKEN_NUM EQUAL tokensAfter )
        file( APPEND "${outCppFile}" "\n" )
    else( TOKEN_NUM EQUAL tokensAfter )
--- a/include/dsnlexer.h
+++ b/include/dsnlexer.h
@ -555,4 +555,147 @@ protected:
 #endif // SWIG
 };

+template<typename ENUM_TYPE>
+class DSNLEXER_KEYWORDED : public DSNLEXER
+{
+public:
+    /**
+     * Constructor ( const std::string&, const wxString& )
+     * @param aSExpression is (utf8) text possibly from the clipboard that you want to parse.
+     * @param aSource is a description of the origin of @a aSExpression, such as a filename.
+     *   If left empty, then _("clipboard") is used.
+     */
+    DSNLEXER_KEYWORDED( const std::string& aSExpression, const wxString& aSource = wxEmptyString ) :
+            DSNLEXER( keywords, keyword_count, &keywords_hash, aSExpression, aSource )
+    {
+    }
+
+    /**
+     * Constructor ( FILE* )
+     * takes @a aFile already opened for reading and @a aFilename as parameters.
+     * The opened file is assumed to be positioned at the beginning of the file
+     * for purposes of accurate line number reporting in error messages.  The
+     * FILE is closed by this instance when its destructor is called.
+     * @param aFile is a FILE already opened for reading.
+     * @param aFilename is the name of the opened file, needed for error reporting.
+     */
+    DSNLEXER_KEYWORDED( FILE* aFile, const wxString& aFilename ) :
+            DSNLEXER( keywords, keyword_count, &keywords_hash, aFile, aFilename )
+    {
+    }
+
+    /**
+     * Constructor ( LINE_READER* )
+     * initializes a lexer and prepares to read from @a aLineReader which
+     * is assumed ready, and may be in use by other DSNLEXERs also.  No ownership
+     * is taken of @a aLineReader. This enables it to be used by other lexers also.
+     * The transition between grammars in such a case, must happen on a text
+     * line boundary, not within the same line of text.
+     *
+     * @param aLineReader is any subclassed instance of LINE_READER, such as
+     *  STRING_LINE_READER or FILE_LINE_READER.  No ownership is taken of aLineReader.
+     */
+    DSNLEXER_KEYWORDED( LINE_READER* aLineReader ) :
+            DSNLEXER( keywords, keyword_count, &keywords_hash, aLineReader )
+    {
+    }
+
+
+    /**
+     * Function NextTok
+     * returns the next token found in the input file or T_EOF when reaching
+     * the end of file.  Users should wrap this function to return an enum
+     * to aid in grammar debugging while running under a debugger, but leave
+     * this lower level function returning an int (so the enum does not collide
+     * with another usage).
+     * @return TSCHEMATIC_T::T - the type of token found next.
+     * @throw IO_ERROR - only if the LINE_READER throws it.
+     */
+    ENUM_TYPE NextTok()
+    {
+        return (ENUM_TYPE) DSNLEXER::NextTok();
+    }
+
+    /**
+     * Function NeedSYMBOL
+     * calls NextTok() and then verifies that the token read in
+     * satisfies bool IsSymbol().
+     * If not, an IO_ERROR is thrown.
+     * @return int - the actual token read in.
+     * @throw IO_ERROR, if the next token does not satisfy IsSymbol()
+     */
+    ENUM_TYPE NeedSYMBOL()
+    {
+        return (ENUM_TYPE) DSNLEXER::NeedSYMBOL();
+    }
+
+    /**
+     * Function NeedSYMBOLorNUMBER
+     * calls NextTok() and then verifies that the token read in
+     * satisfies bool IsSymbol() or tok==T_NUMBER.
+     * If not, an IO_ERROR is thrown.
+     * @return int - the actual token read in.
+     * @throw IO_ERROR, if the next token does not satisfy the above test
+     */
+    ENUM_TYPE NeedSYMBOLorNUMBER()
+    {
+        return (ENUM_TYPE) DSNLEXER::NeedSYMBOLorNUMBER();
+    }
+
+    /**
+     * Function CurTok
+     * returns whatever NextTok() returned the last time it was called.
+     */
+    ENUM_TYPE CurTok()
+    {
+        return (ENUM_TYPE) DSNLEXER::CurTok();
+    }
+
+    /**
+     * Function PrevTok
+     * returns whatever NextTok() returned the 2nd to last time it was called.
+     */
+    ENUM_TYPE PrevTok()
+    {
+        return (ENUM_TYPE) DSNLEXER::PrevTok();
+    }
+
+    /**
+     * Function GetCurStrAsToken
+     * Used to support 'loose' matches (quoted tokens)
+     */
+    ENUM_TYPE GetCurStrAsToken()
+    {
+        return (ENUM_TYPE) DSNLEXER::GetCurStrAsToken();
+    }
+
+    /**
+     * Function TokenName
+     * returns the name of the token in ASCII form.
+     */
+    static const char* TokenName( ENUM_TYPE aTok );
+
+protected:
+    /// Auto generated lexer keywords table and length:
+    static const KEYWORD     keywords[];
+    static const KEYWORD_MAP keywords_hash;
+    static const unsigned    keyword_count;
+};
+
+
+template <typename ENUM_TYPE>
+const char* DSNLEXER_KEYWORDED <ENUM_TYPE>::TokenName( ENUM_TYPE aTok )
+{
+    const char* ret;
+
+    if( aTok < 0 )
+        ret = DSNLEXER::Syntax( aTok );
+    else if( (unsigned) aTok < keyword_count )
+        ret = keywords[aTok].name;
+    else
+        ret = "token too big";
+
+    return ret;
+}
+
 #endif  // DSNLEXER_H_