Change DIR_CACHE by removing the need to cache Sweet strings in the

DIR_LIB_SOURCE. It only needs to be able to read Sweet strings quickly. @todo: Change public API regarding part rev string.
2010-12-20 11:14:25 -06:00 · 2010-12-20 11:14:25 -06:00 · 597f67755b
parent 020e875973 325a13a79d
commit 597f67755b
3 changed files with 231 additions and 124 deletions
--- a/new/CMakeLists.txt
+++ b/new/CMakeLists.txt
@ -58,7 +58,7 @@ endif()

 include_directories( ${CMAKE_CURRENT_SOURCE_DIR} )

-add_executable( test_dir_lib_source sch_dir_lib_source.cpp  ${PROJECT_SOURCE_DIR}/common/richio.cpp )
+add_executable( test_dir_lib_source sch_dir_lib_source.cpp )
 #add_executable( test_dir_lib_source EXCLUDE_FROM_ALL sch_dir_lib_source.cpp )

 target_link_libraries( test_dir_lib_source ${wxWidgets_LIBRARIES} )
--- a/new/sch_dir_lib_source.cpp
+++ b/new/sch_dir_lib_source.cpp
@ -125,38 +125,85 @@ static const char* strrstr( const char* haystack, const char* needle )
    return ret;
 }

+
 /**
 * Function endsWithRev
- * returns a pointer to the final string segment: "revN..." or NULL if none.
+ * returns a pointer to the final string segment: "revN[N..]" or NULL if none.
 * @param start is the beginning of string segment to test, the partname or
 *  any middle portion of it.
- * @param tail is a pointer to the terminating nul.
+ * @param tail is a pointer to the terminating nul, or one past inclusive end of
+ *  segment, i.e. the string segment of interest is [start,tail)
 * @param separator is the separating byte, expected: '.' or '/', depending on context.
 */
 static const char* endsWithRev( const char* start, const char* tail, char separator )
 {
    bool    sawDigit = false;

-    while( isdigit(*--tail) && tail>start )
+    while( tail>start && isdigit(*--tail) )
    {
        sawDigit = true;
    }

-    if( sawDigit && tail-3 >= start && tail[-3] == separator )
+    // if sawDigit, tail points to the 'v' here.
+
+    if( sawDigit && tail-3 >= start )
    {
-        tail -= 2;
-        if( tail[0]=='r' && tail[1]=='e' && tail[2]=='v' )
+        tail -= 3;
+
+        if( tail[0]==separator && tail[1]=='r' && tail[2]=='e' && tail[3]=='v' )
        {
-            return tail;
+            return tail+1;  // omit separator, return "revN[N..]"
        }
    }

    return 0;
 }

+// see struct BY_REV
+bool BY_REV::operator() ( const STRING& s1, const STRING& s2 ) const
+{
+    // avoid instantiating new STRINGs, and thank goodness that c_str() is const.

-bool DIR_LIB_SOURCE::makePartFileName( const char* aEntry,
-                        const STRING& aCategory, STRING* aPartName )
+    const char* rev1 = endsWithRev( s1.c_str(), s1.c_str()+s1.size(), '/' );
+    const char* rev2 = endsWithRev( s2.c_str(), s2.c_str()+s2.size(), '/' );
+
+    int rootLen1 =  rev1 ? rev1 - s1.c_str() : s1.size();
+    int rootLen2 =  rev2 ? rev2 - s2.c_str() : s2.size();
+
+    int r = memcmp( s1.c_str(), s2.c_str(), min( rootLen1, rootLen2 ) );
+
+    if( r )
+    {
+        return r < 0;
+    }
+
+    if( rootLen1 != rootLen2 )
+    {
+        return rootLen1 < rootLen2;
+    }
+
+    // root strings match at this point, compare the revision number numerically,
+    // and chose the higher numbered version as "less", according to std::set lingo.
+
+    if( bool(rev1) != bool(rev2) )
+    {
+        return bool(rev1) < bool(rev2);
+    }
+
+    if( rev1 && rev2 )
+    {
+        int rnum1 = atoi( rev1+3 );
+        int rnum2 = atoi( rev2+3 );
+
+        return rnum1 > rnum2;
+    }
+
+    return false;   // strings are equal, and they don't have a rev
+}
+
+
+bool DIR_LIB_SOURCE::makePartName( STRING* aPartName, const char* aEntry,
+                        const STRING& aCategory )
 {
    const char* cp = strrstr( aEntry, ".part" );

@ -165,19 +212,7 @@ bool DIR_LIB_SOURCE::makePartFileName( const char* aEntry,
    {
        const char* limit = cp + strlen( cp );

-        // if file extension is exactly ".part", and no rev
-        if( cp==limit-5 )
-        {
-            if( aCategory.size() )
-                *aPartName = aCategory + "/";
-            else
-                aPartName->clear();
-
-            aPartName->append( aEntry, cp - aEntry );
-            return true;
-        }
-
-        // if versioning, test for a trailing "revN.." type of string
+        // If versioning, then must find a trailing "revN.." type of string.
        if( useVersioning )
        {
            const char* rev = endsWithRev( cp + sizeof(".part") - 1, limit, '.' );
@ -194,14 +229,52 @@ bool DIR_LIB_SOURCE::makePartFileName( const char* aEntry,
                return true;
            }
        }
+
+        // If using versioning, then all valid partnames must have a rev string,
+        // so we don't even bother to try and load any other partfile down here.
+        else
+        {
+            // if file extension is exactly ".part", and no rev
+            if( cp==limit-5 )
+            {
+                if( aCategory.size() )
+                    *aPartName = aCategory + "/";
+                else
+                    aPartName->clear();
+
+                aPartName->append( aEntry, cp - aEntry );
+                return true;
+            }
+        }
    }

    return false;
 }

-static bool isCategoryName( const char* aName )
+
+STRING DIR_LIB_SOURCE::makeFileName( const STRING& aPartName )
 {
-    return true;
+    // create a fileName for the sweet string, using a reversible
+    // partname <-> fileName conversion protocol:
+
+    STRING  fileName = sourceURI + "/";
+
+    const char* rev = endsWithRev( aPartName.c_str(), aPartName.c_str()+aPartName.size(), '/' );
+
+    if( rev )
+    {
+        int basePartLen = rev - aPartName.c_str() - 1;  // omit '/' separator
+        fileName.append( aPartName, 0,  basePartLen );
+        fileName += ".part.";    // add '.' separator before rev
+        fileName += rev;
+    }
+    else
+    {
+        fileName += aPartName;
+        fileName += ".part";
+    }
+
+    return fileName;
 }


@ -211,9 +284,9 @@ void DIR_LIB_SOURCE::readSExpression( STRING* aResult, const STRING& aFilename )

    if( fw == -1 )
    {
-        STRING  msg = aFilename;
-        msg += " cannot be open()ed for reading";
-        throw IO_ERROR( msg.c_str() );
+        STRING  msg = strerror( errno );
+        msg += "; cannot open(O_RDONLY) file " + aFilename;
+        throw( IO_ERROR( msg.c_str() ) );
    }

    struct stat     fs;
@ -224,34 +297,43 @@ void DIR_LIB_SOURCE::readSExpression( STRING* aResult, const STRING& aFilename )
    if( fs.st_size > (1*1024*1024) )
    {
        STRING msg = aFilename;
-        msg += " seems too big.  ( > 1mbyte )";
+        msg += " seems too big.  ( > 1 mbyte )";
        throw IO_ERROR( msg.c_str() );
    }

-    // we reuse the same readBuffer, which is not thread safe, but the API
-    // is not expected to be thread safe.
-    readBuffer.resize( fs.st_size );
+    // reuse same readBuffer, which is not thread safe, but the API
+    // is not advertising thread safe (yet, if ever).
+    if( (int) fs.st_size > (int) readBuffer.size() )
+        readBuffer.resize( fs.st_size + 1000 );

-    size_t count = read( fw, &readBuffer[0], fs.st_size );
-    if( count != (size_t) fs.st_size )
+    int count = read( fw, &readBuffer[0], fs.st_size );
+    if( count != (int) fs.st_size )
    {
-        STRING msg = aFilename;
-        msg += " cannot be read";
-        throw IO_ERROR( msg.c_str() );
+        STRING  msg = strerror( errno );
+        msg += "; cannot read file " + aFilename;
+        throw( IO_ERROR( msg.c_str() ) );
    }

-    // std::string chars are not gauranteed to be contiguous in
+    // std::string chars are not guaranteed to be contiguous in
    // future implementations of C++, so this is why we did not read into
    // aResult directly.
    aResult->assign( &readBuffer[0], count );
 }


-DIR_LIB_SOURCE::DIR_LIB_SOURCE( const STRING& aDirectoryPath,
-                                bool doUseVersioning ) throw( IO_ERROR ) :
-    readBuffer( 512 )
+void DIR_LIB_SOURCE::cache() throw( IO_ERROR )
+{
+    partnames.clear();
+    categories.clear();
+
+    cacheOneDir( "" );
+}
+
+
+DIR_LIB_SOURCE::DIR_LIB_SOURCE( const STRING& aDirectoryPath,
+                                const STRING& aOptions ) throw( IO_ERROR ) :
+    useVersioning( strstr( aOptions.c_str(), "useVersioning" ) )
 {
-    useVersioning = doUseVersioning;
    sourceURI     = aDirectoryPath;
    sourceType    = "dir";

@ -264,17 +346,12 @@ DIR_LIB_SOURCE::DIR_LIB_SOURCE( const STRING& aDirectoryPath,
    if( strchr( "/\\", sourceURI[sourceURI.size()-1] ) )
        sourceURI.erase( sourceURI.size()-1 );

-    doOneDir( "" );
+    cache();
 }


 DIR_LIB_SOURCE::~DIR_LIB_SOURCE()
 {
-    // delete the sweet STRINGS, which "sweets" owns by pointer.
-    for( DIR_CACHE::iterator it = sweets.begin();  it != sweets.end();  ++it )
-    {
-        delete it->second;
-    }
 }


@ -288,26 +365,34 @@ void DIR_LIB_SOURCE::GetCategoricalPartNames( STRINGS* aResults, const STRING& a
        STRING  lower = aCategory + "/";
        STRING  upper = aCategory + char( '/' + 1 );

-        DIR_CACHE::const_iterator limit = sweets.upper_bound( upper );
+        PART_CACHE::const_iterator limit = partnames.upper_bound( upper );

-        for( DIR_CACHE::const_iterator it = sweets.lower_bound( lower );  it!=limit;  ++it )
+        for( PART_CACHE::const_iterator it = partnames.lower_bound( lower );  it!=limit;  ++it )
        {
-            const char* start = it->first.c_str();
-            size_t      len   = it->first.size();
+            /*
+            const char* start = it->c_str();
+            size_t      len   = it->size();

-            if( !endsWithRev( start, start+len, '/' ) )
-                aResults->push_back( it->first );
+            if( endsWithRev( start, start+len, '/' ) )
+                continue;
+            */
+
+            aResults->push_back( *it );
        }
    }
    else
    {
-        for( DIR_CACHE::const_iterator it = sweets.begin();  it!=sweets.end();  ++it )
+        for( PART_CACHE::const_iterator it = partnames.begin();  it!=partnames.end();  ++it )
        {
-            const char* start = it->first.c_str();
-            size_t      len   = it->first.size();
+            /*
+            const char* start = it->c_str();
+            size_t      len   = it->size();

            if( !endsWithRev( start, start+len, '/' ) )
-                aResults->push_back( it->first );
+                continue;
+            */
+
+            aResults->push_back( *it );
        }
    }
 }
@ -321,29 +406,20 @@ void DIR_LIB_SOURCE::ReadPart( STRING* aResult, const STRING& aPartName, const S
    if( aRev.size() )
        partname += "/" + aRev;

-    DIR_CACHE::iterator it = sweets.find( partname );
+    PART_CACHE::const_iterator it = partnames.find( partname );

-    if( it == sweets.end() )    // part not found
+    if( it == partnames.end() )    // part not found
    {
        partname += " not found.";
        throw IO_ERROR( partname.c_str() );
    }

-    if( !it->second )   // if the sweet string is not loaded yet
-    {
-        STRING  filename = sourceURI + "/" + aPartName + ".part";
+    // create a fileName for the sweet string
+    STRING  fileName = makeFileName( aPartName );

-        if( aRev.size() )
-        {
-            filename += "." + aRev;
-        }
+    // @todo what about aRev?, and define the public API wrt to aRev better.

-        it->second = new STRING();
-
-        readSExpression( it->second, filename );
-    }
-
-    *aResult = *it->second;
+    readSExpression( aResult, fileName );
 }


@ -362,40 +438,36 @@ void DIR_LIB_SOURCE::ReadParts( STRINGS* aResults, const STRINGS& aPartNames )

 void DIR_LIB_SOURCE::GetCategories( STRINGS* aResults ) throw( IO_ERROR )
 {
-    *aResults = categories;
+    aResults->clear();
+
+    // caller fetches them sorted.
+    for( NAME_CACHE::const_iterator it = categories.begin();  it!=categories.end();  ++it )
+    {
+        aResults->push_back( *it );
+    }
 }


 #if defined(DEBUG)
-#include <richio.h>

 void DIR_LIB_SOURCE::Show()
 {
    printf( "Show categories:\n" );
-    for( STRINGS::const_iterator it = categories.begin();  it!=categories.end();  ++it )
+    for( NAME_CACHE::const_iterator it = categories.begin();  it!=categories.end();  ++it )
        printf( " '%s'\n", it->c_str() );

    printf( "\n" );
    printf( "Show parts:\n" );
-    for( DIR_CACHE::const_iterator it = sweets.begin();  it != sweets.end();  ++it )
+    for( PART_CACHE::const_iterator it = partnames.begin();  it != partnames.end();  ++it )
    {
-        printf( " '%s'\n", it->first.c_str() );
-
-        if( it->second )
-        {
-            STRING_LINE_READER  slr( *it->second, wxString( wxConvertMB2WX( it->first.c_str() ) ) );
-            while( slr.ReadLine() )
-            {
-                printf( "    %s", (char*) slr );
-            }
-            printf( "\n" );
-        }
+        printf( " '%s'\n", it->c_str() );
    }
 }
+
 #endif


-void DIR_LIB_SOURCE::doOneDir( const STRING& aCategory ) throw( IO_ERROR )
+void DIR_LIB_SOURCE::cacheOneDir( const STRING& aCategory ) throw( IO_ERROR )
 {
    STRING      curDir = sourceURI;

@ -425,29 +497,29 @@ void DIR_LIB_SOURCE::doOneDir( const STRING& aCategory ) throw( IO_ERROR )

        if( !stat( fileName.c_str(), &fs ) )
        {
-            if( S_ISREG( fs.st_mode ) && makePartFileName( entry->d_name, aCategory, &partName ) )
+            // is this a valid part name?
+            if( S_ISREG( fs.st_mode ) && makePartName( &partName, entry->d_name, aCategory ) )
            {
-                /*
-                if( sweets.find( partName ) != sweets.end() )
+                std::pair<NAME_CACHE::iterator, bool> pair = partnames.insert( partName );
+
+                if( !pair.second )
                {
                    STRING  msg = partName;
                    msg += " has already been encountered";
                    throw IO_ERROR( msg.c_str() );
                }
-                */
-
-                sweets[partName] = NULL;  // NULL for now, load the sweet later.
            }

+            // is this an acceptable category name?
            else if( S_ISDIR( fs.st_mode ) && !aCategory.size() && isCategoryName( entry->d_name ) )
            {
                // only one level of recursion is used, controlled by the
                // emptiness of aCategory.
-                categories.push_back( entry->d_name );
+                categories.insert( entry->d_name );

                // somebody needs to test Windows (mingw), make sure it can
                // handle opendir() recursively
-                doOneDir( entry->d_name );
+                cacheOneDir( entry->d_name );
            }
            else
            {
@ -467,14 +539,15 @@ int main( int argc, char** argv )

    try
    {
-        DIR_LIB_SOURCE  uut( argv[1] ? argv[1] : "", true );
+//        DIR_LIB_SOURCE  uut( argv[1] ? argv[1] : "", "" );
+        DIR_LIB_SOURCE  uut( argv[1] ? argv[1] : "", "useVersioning" );

-        // initially, only the DIR_CACHE sweets and STRING categories are loaded:
+        // initially, only the NAME_CACHE sweets and STRING categories are loaded:
        uut.Show();

        uut.GetCategoricalPartNames( &partnames, "Category" );

-        printf( "GetCategoricalPartNames(Category):\n" );
+        printf( "\nGetCategoricalPartNames( aCatagory = 'Category' ):\n" );
        for( STRINGS::const_iterator it = partnames.begin();  it!=partnames.end();  ++it )
        {
            printf( " '%s'\n", it->c_str() );
@ -482,11 +555,10 @@ int main( int argc, char** argv )

        uut.ReadParts( &sweets, partnames );

-
        // fetch the part names for ALL categories.
        uut.GetCategoricalPartNames( &partnames );

-        printf( "GetCategoricalPartNames(ALL):\n" );
+        printf( "\nGetCategoricalPartNames( aCategory = '' i.e. ALL):\n" );
        for( STRINGS::const_iterator it = partnames.begin();  it!=partnames.end();  ++it )
        {
            printf( " '%s'\n", it->c_str() );
@ -494,7 +566,7 @@ int main( int argc, char** argv )

        uut.ReadParts( &sweets, partnames );

-        printf( "Sweets for ALL parts:\n" );
+        printf( "\nSweets for ALL parts:\n" );
        STRINGS::const_iterator pn = partnames.begin();
        for( STRINGS::const_iterator it = sweets.begin();  it!=sweets.end();  ++it, ++pn )
        {
--- a/new/sch_dir_lib_source.h
+++ b/new/sch_dir_lib_source.h
@ -28,17 +28,35 @@

 #include <sch_lib.h>

-#include <map>
+#include <set>
 #include <vector>


 /**
- * Type DIR_CACHE
- * is a tuple, where the key is partname (prefixed with the category if any),
- * and value is pointer to Sweet string which is loaded lazily, so can be NULL
- * until loaded.
+ * struct BY_REV
+ * is here to provide a custom way to compare STRINGs.  Namely, the revN[N..]
+ * string if present, is collated according to a 'higher revision first', but
+ * any part string without a revision, is even 'before' that.
 */
-typedef std::map< STRING, STRING* >     DIR_CACHE;
+struct BY_REV
+{
+    bool operator() ( const STRING& s1, const STRING& s2 ) const;
+};
+
+
+/**
+ * Type PART_CACHE
+ * holds a set of part names in sorted order, according to the sort
+ * order given by struct BY_REV.
+ */
+typedef std::set< STRING, BY_REV >  PART_CACHE;
+
+
+/**
+ * Type NAME_CACHE
+ * holds a set of categories in sorted order.
+ */
+typedef std::set< STRING >          NAME_CACHE;


 namespace SCH {
@ -55,29 +73,40 @@ class DIR_LIB_SOURCE : public LIB_SOURCE

    bool                useVersioning;  ///< use files with extension ".revNNN..", else not

-    DIR_CACHE           sweets;         ///< @todo, don't really need to cache the sweets, only the partnames.
+    /// normal partnames, some of which may be prefixed with a category,
+    /// and some of which may have legal "revN[N..]" type strings.
+    PART_CACHE          partnames;
+
+    /// categories which we expect to find in the set of @a partnames
+    NAME_CACHE          categories;

-    STRINGS             categories;
    std::vector<char>   readBuffer;     ///< used by readSExpression()

-
    /**
-     * Function isPartFileName
-     * returns true iff aName is a valid part file name.
+     * Function cache
+     * [re-]loads the directory cache(s).
     */
-    bool  isPartFileName( const char* aName );
+    void cache() throw( IO_ERROR );

    /**
-     * Function makePartFileName
+     * Function isCategoryName
+     * returns true iff aName is a valid category name.
+     */
+    bool isCategoryName( const char* aName )
+    {
+        return true;
+    }
+
+    /**
+     * Function makePartName
     * returns true iff aEntry holds a valid part filename, in the form of
     * "someroot.part[.revNNNN]"  where NNN are number characters [0-9]
     * @param aEntry is the raw directory entry without path information.
     * @param aCategory is the last portion of the directory path.
-     * @param aPartName is where to put a part name, assuming aEntry is legal.
+     * @param aPartName is where to put a part name, assuming @a aEntry is legal.
     * @return bool - true only if aEntry is a legal part file name.
     */
-    bool makePartFileName( const char* aEntry,
-                           const STRING& aCategory, STRING* aPartName );
+    bool makePartName( STRING* aPartName, const char* aEntry, const STRING& aCategory );

    /**
     * Function readSExpression
@ -87,14 +116,20 @@ class DIR_LIB_SOURCE : public LIB_SOURCE


    /**
-     * Function doOneDir
+     * Function cacheOneDir
     * loads part names [and categories] from a directory given by
     * "sourceURI + '/' + category"
     * Categories are only loaded if processing the top most directory because
     * only one level of categories are supported.  We know we are in the
     * top most directory if aCategory is empty.
     */
-    void doOneDir( const STRING& aCategory ) throw( IO_ERROR );
+    void cacheOneDir( const STRING& aCategory ) throw( IO_ERROR );
+
+    /**
+     * Function makeFileName
+     * converts a part name into a filename and returns it.
+     */
+    STRING makeFileName( const STRING& aPartName );

 //protected:
 public:
@ -112,14 +147,14 @@ public:
     * @param doUseVersioning if true means support versioning in the directory tree, otherwise
     *  only a single version of each part is recognized.
     */
-    DIR_LIB_SOURCE( const STRING& aDirectoryPath, bool doUseVersioning = false )
+    DIR_LIB_SOURCE( const STRING& aDirectoryPath, const STRING& aOptions = StrEmpty )
        throw( IO_ERROR );

    ~DIR_LIB_SOURCE();

    //-----<LIB_SOURCE implementation functions >------------------------------

-    void ReadPart( STRING* aResult, const STRING& aPartName, const STRING& aRev=StrEmpty )
+    void ReadPart( STRING* aResult, const STRING& aPartName, const STRING& aRev = StrEmpty )
        throw( IO_ERROR );

    void ReadParts( STRINGS* aResults, const STRINGS& aPartNames )
@ -127,7 +162,7 @@ public:

    void GetCategories( STRINGS* aResults ) throw( IO_ERROR );

-    void GetCategoricalPartNames( STRINGS* aResults, const STRING& aCategory=StrEmpty )
+    void GetCategoricalPartNames( STRINGS* aResults, const STRING& aCategory = StrEmpty )
        throw( IO_ERROR );

    void GetRevisions( STRINGS* aResults, const STRING& aPartName ) throw( IO_ERROR )