Speed up DSNLEXER::findToken() to such an extent that it resulted in an approximate 13% reduction

in *.kicad_pcb file loading times.
2013-06-30 20:05:40 -05:00 · 2013-06-30 20:05:40 -05:00 · 44d31a1897
parent ee8901d9dd
commit 44d31a1897
5 changed files with 122 additions and 47 deletions
--- a/common/dsnlexer.cpp
+++ b/common/dsnlexer.cpp
@ -33,24 +33,13 @@
 #include <fctsys.h>
 #include <dsnlexer.h>

-//#include "fctsys.h"
-//#include "pcbnew.h"

 //#define STANDALONE  1       // enable this for stand alone testing.

-static int compare( const void* a1, const void* a2 )
-{
-    const KEYWORD* k1 = (const KEYWORD*) a1;
-    const KEYWORD* k2 = (const KEYWORD*) a2;
-
-    int ret = strcmp( k1->name, k2->name );
-    return ret;
-}
-

 //-----<DSNLEXER>-------------------------------------------------------------

-inline void DSNLEXER::init()
+void DSNLEXER::init()
 {
    curTok  = DSN_NONE;
    prevTok = DSN_NONE;
@ -61,6 +50,23 @@ inline void DSNLEXER::init()
    space_in_quoted_tokens = false;

    commentsAreTokens = false;
+
+#if 1
+    if( keywordCount > 11 )
+    {
+        // resize the hashtable bucket count
+        keyword_hash.reserve( keywordCount );
+    }
+
+    // fill the specialized "C string" hashtable from keywords[]
+    const KEYWORD*  it  = keywords;
+    const KEYWORD*  end = it + keywordCount;
+
+    for( ; it < end; ++it )
+    {
+        keyword_hash[it->name] = it->token;
+    }
+#endif
 }


@ -168,21 +174,21 @@ LINE_READER* DSNLEXER::PopReader()
 }


+#if 0
+static int compare( const void* a1, const void* a2 )
+{
+    const KEYWORD* k1 = (const KEYWORD*) a1;
+    const KEYWORD* k2 = (const KEYWORD*) a2;
+
+    int ret = strcmp( k1->name, k2->name );
+    return ret;
+}
+
 int DSNLEXER::findToken( const std::string& tok )
 {
-    // convert to lower case once, this should be faster than using strcasecmp()
-    // for each test in compare().
-    lowercase.clear();
-
-    for( std::string::const_iterator iter = tok.begin();  iter!=tok.end();  ++iter )
-        lowercase += (char) tolower( *iter );
-
    KEYWORD search;

-    search.name = lowercase.c_str();
-
-    // a boost hashtable might be a few percent faster, depending on
-    // hashtable size and quality of the hash function.
+    search.name = tok.c_str();

    const KEYWORD* findings = (const KEYWORD*) bsearch( &search,
                                   keywords, keywordCount,
@ -193,6 +199,19 @@ int DSNLEXER::findToken( const std::string& tok )
        return -1;
 }

+#else
+
+int DSNLEXER::findToken( const std::string& tok )
+{
+
+    KEYWORD_MAP::const_iterator it = keyword_hash.find( tok.c_str() );
+    if( it == keyword_hash.end() )
+        return -1;
+
+    return it->second;
+}
+#endif
+

 const char* DSNLEXER::Syntax( int aTok )
 {
--- a/common/getrunningmicrosecs.cpp
+++ b/common/getrunningmicrosecs.cpp
@ -25,7 +25,7 @@


 #include <config.h>
-
+#include <common.h>

 #if defined(_WIN32)

--- a/include/dsnlexer.h
+++ b/include/dsnlexer.h
@ -28,6 +28,7 @@
 #include <stdio.h>
 #include <string>
 #include <vector>
+#include <hashtables.h>

 #include <richio.h>

@ -106,10 +107,10 @@ protected:

    int                 curTok;                 ///< the current token obtained on last NextTok()
    std::string         curText;                ///< the text of the current token
-    std::string         lowercase;              ///< a scratch buf holding token in lowercase

-    const KEYWORD*      keywords;
-    unsigned            keywordCount;
+    const KEYWORD*      keywords;               ///< table sorted by CMake for bsearch()
+    unsigned            keywordCount;           ///< count of keywords table
+    KEYWORD_MAP         keyword_hash;           ///< fast, specialized "C string" hashtable

    void init();

--- a/include/hashtables.h
+++ b/include/hashtables.h
@ -59,29 +59,74 @@ typedef std::unordered_map< std::string, EDA_RECT >  RECT_MAP;
 /// Map a std::string to a wxString, used in PLUGINs.
 typedef boost::unordered_map< std::string, wxString >  PROPERTIES;

-/// Map a std::string to an integer.  Used in DSNLEXER.
-typedef boost::unordered_map< std::string, int >       KEYWORD_MAP;
+
+/// Equality test for "const char*" type used in very specialized KEYWORD_MAP below
+struct iequal_to : std::binary_function< const char*, const char*, bool >
+{
+    bool operator()( const char* x, const char* y ) const
+    {
+        return !strcmp( x, y );
+    }
+};
+
+
+/// Very fast and efficient hash function for "const char*" type, used in specialized
+/// KEYWORD_MAP below.
+/// taken from: http://www.boost.org/doc/libs/1_53_0/libs/unordered/examples/fnv1.hpp
+struct fnv_1a
+{
+    /* not used, std::string is too slow:
+    std::size_t operator()( std::string const& text ) const
+    {
+        std::size_t hash = 2166136261u;
+
+        for( std::string::const_iterator it = text.begin(), end = text.end();
+                it != end;  ++it )
+        {
+            hash ^= *it;
+            hash *= 16777619;
+        }
+        return hash;
+    }
+    */
+
+    std::size_t operator()( const char* it ) const
+    {
+        std::size_t hash = 2166136261u;
+
+        for( ; *it;  ++it )
+        {
+            hash ^= *it;
+            hash *= 16777619;
+        }
+        return hash;
+    }
+};
+
+
+/**
+ * Type KEYWORD_MAP
+ * is a hashtable made of a const char* and an int.  Note that use of this
+ * type outside very specific circumstances is foolish since there is no storage
+ * provided for the actual C string itself.  This type assumes use with type KEYWORD
+ * that is created by CMake and that table creates *constant* storage for C strings
+ * (and pointers to those C strings).  Here we are only interested in the C strings
+ * themselves and only the pointers are duplicated within the hashtable.
+ * If the strings were not constant and fixed, this type would not work.
+ * Also note that normally a hashtable (i.e. unordered_map) using a const char* key
+ * would simply compare the 32 bit or 64 bit pointers themselves, rather than
+ * the C strings which they are known to point to in this context.
+ * I force the latter behavior by supplying both "hash" and "equality" overloads
+ * to the hashtable (unordered_map) template.
+ * @author Dick Hollenbeck
+ */
+typedef boost::unordered_map< const char*, int, fnv_1a, iequal_to >     KEYWORD_MAP;
+

 /// Map a std::string to an EDA_RECT.
 /// The key is the classname of the derived wxformbuilder dialog.
 typedef boost::unordered_map< std::string, EDA_RECT >  RECT_MAP;

-
-#elif 0     // wx is inconsistent across platforms, will soon switch to boost
-
-// http://docs.wxwidgets.org/trunk/classwx_hash_map.html
-#include <wx/hashmap.h>
-
-/// Map a C string to a wxString, used in PLUGINs.
-WX_DECLARE_HASH_MAP( char*, wxString, wxStringHash, wxStringEqual, PROPERTIES );
-
-/// Map a C string to an integer.  Used in DSNLEXER.
-WX_DECLARE_HASH_MAP( char*, int, wxStringHash, wxStringEqual, KEYWORD_MAP );
-
-/// Map a C string to an EDA_RECT.
-/// The key is the classname of the derived wxformbuilder dialog.
-WX_DECLARE_HASH_MAP( char*, EDA_RECT, wxStringHash, wxStringEqual, RECT_MAP );
-
 #endif

 #endif // HASHTABLES_H_
--- a/pcbnew/files.cpp
+++ b/pcbnew/files.cpp
@ -283,9 +283,19 @@ bool PCB_EDIT_FRAME::LoadOnePcbFile( const wxString& aFileName, bool aAppend,
        props["page_width"]  = wxString::Format( wxT( "%d" ), GetPageSizeIU().x );
        props["page_height"] = wxString::Format( wxT( "%d" ), GetPageSizeIU().y );

+#if 0
+        // measure the time to load a BOARD.
+        unsigned startTime = GetRunningMicroSecs();
+#endif
+
        // load or append either:
        loadedBoard = pi->Load( GetBoard()->GetFileName(), aAppend ? GetBoard() : NULL, &props );

+#if 0
+        unsigned stopTime = GetRunningMicroSecs();
+        printf( "PLUGIN::Load(): %u usecs\n", stopTime - startTime );
+#endif
+
        // the Load plugin method makes a 'fresh' board, so we need to
        // set its own name
        GetBoard()->SetFileName( fileName.GetFullPath() );
@ -296,8 +306,8 @@ bool PCB_EDIT_FRAME::LoadOnePcbFile( const wxString& aFileName, bool aAppend,
                loadedBoard->GetFileFormatVersionAtLoad() < LEGACY_BOARD_FILE_VERSION )
            {
                DisplayInfoMessage( this,
-                                    _( "This file was created by an older version of Pcbnew.\
-\nIt will be stored in the new file format when you save this file again." ) );
+                    _(  "This file was created by an older version of Pcbnew.\n"
+                        "It will be stored in the new file format when you save this file again." ) );
            }

            SetBoard( loadedBoard );