From 02ba568c255c9a1b2e5ceed0c8bd449386af85f5 Mon Sep 17 00:00:00 2001 From: Dick Hollenbeck Date: Mon, 20 Dec 2010 10:19:45 -0600 Subject: [PATCH] move to std::set, no longer cache the sweet strings --- new/CMakeLists.txt | 2 +- new/sch_dir_lib_source.cpp | 204 ++++++++++++++++++++++--------------- new/sch_dir_lib_source.h | 53 +++++++--- 3 files changed, 162 insertions(+), 97 deletions(-) diff --git a/new/CMakeLists.txt b/new/CMakeLists.txt index 7872b99f41..25cdfc5349 100644 --- a/new/CMakeLists.txt +++ b/new/CMakeLists.txt @@ -58,7 +58,7 @@ endif() include_directories( ${CMAKE_CURRENT_SOURCE_DIR} ) -add_executable( test_dir_lib_source sch_dir_lib_source.cpp ${PROJECT_SOURCE_DIR}/common/richio.cpp ) +add_executable( test_dir_lib_source sch_dir_lib_source.cpp ) #add_executable( test_dir_lib_source EXCLUDE_FROM_ALL sch_dir_lib_source.cpp ) target_link_libraries( test_dir_lib_source ${wxWidgets_LIBRARIES} ) diff --git a/new/sch_dir_lib_source.cpp b/new/sch_dir_lib_source.cpp index 4002f51fbb..881a5a644c 100644 --- a/new/sch_dir_lib_source.cpp +++ b/new/sch_dir_lib_source.cpp @@ -125,29 +125,34 @@ static const char* strrstr( const char* haystack, const char* needle ) return ret; } + /** * Function endsWithRev - * returns a pointer to the final string segment: "revN..." or NULL if none. + * returns a pointer to the final string segment: "revN[N..]" or NULL if none. * @param start is the beginning of string segment to test, the partname or * any middle portion of it. - * @param tail is a pointer to the terminating nul. + * @param tail is a pointer to the terminating nul, or one past inclusive end of + * segment, i.e. the string segment of interest is [start,tail) * @param separator is the separating byte, expected: '.' or '/', depending on context. */ static const char* endsWithRev( const char* start, const char* tail, char separator ) { bool sawDigit = false; - while( isdigit(*--tail) && tail>start ) + while( tail>start && isdigit(*--tail) ) { sawDigit = true; } - if( sawDigit && tail-3 >= start && tail[-3] == separator ) + // if sawDigit, tail points to the 'v' here. + + if( sawDigit && tail-3 >= start ) { - tail -= 2; - if( tail[0]=='r' && tail[1]=='e' && tail[2]=='v' ) + tail -= 3; + + if( tail[0]==separator && tail[1]=='r' && tail[2]=='e' && tail[2]=='v' ) { - return tail; + return tail+1; // omit separator, return "revN[N..]" } } @@ -155,6 +160,47 @@ static const char* endsWithRev( const char* start, const char* tail, char separa } +bool BY_REV::operator() ( const STRING& s1, const STRING& s2 ) const +{ + const char* rev1 = endsWithRev( s1.c_str(), s1.c_str()+s1.size(), '/' ); + const char* rev2 = endsWithRev( s2.c_str(), s2.c_str()+s2.size(), '/' ); + + // avoid instantiating new STRINGs + int rootLen1 = rev1 ? rev1 - s1.c_str() : s1.size(); + int rootLen2 = rev2 ? rev2 - s2.c_str() : s2.size(); + + int r = memcmp( s1.c_str(), s2.c_str(), min( rootLen1, rootLen2 ) ); + + if( r ) + { + return r < 0; + } + + if( rootLen1 != rootLen2 ) + { + return rootLen1 < rootLen2; + } + + // root strings match at this point, compare the revision number, numerically + // and chose the higher numbered version as "less", according to std::set lingo. + + if( bool(rev1) != bool(rev2) ) + { + return bool(rev1) < bool(rev2); + } + + if( rev1 && rev2 ) + { + int rnum1 = atoi( rev1+3 ); + int rnum2 = atoi( rev2+3 ); + + return rnum1 > rnum2; + } + + return false; // strings are equal +} + + bool DIR_LIB_SOURCE::makePartFileName( const char* aEntry, const STRING& aCategory, STRING* aPartName ) { @@ -211,9 +257,9 @@ void DIR_LIB_SOURCE::readSExpression( STRING* aResult, const STRING& aFilename ) if( fw == -1 ) { - STRING msg = aFilename; - msg += " cannot be open()ed for reading"; - throw IO_ERROR( msg.c_str() ); + STRING msg = strerror( errno ); + msg += "; cannot open(O_RDONLY) file " + aFilename; + throw( IO_ERROR( msg.c_str() ) ); } struct stat fs; @@ -224,34 +270,43 @@ void DIR_LIB_SOURCE::readSExpression( STRING* aResult, const STRING& aFilename ) if( fs.st_size > (1*1024*1024) ) { STRING msg = aFilename; - msg += " seems too big. ( > 1mbyte )"; + msg += " seems too big. ( > 1 mbyte )"; throw IO_ERROR( msg.c_str() ); } - // we reuse the same readBuffer, which is not thread safe, but the API - // is not expected to be thread safe. - readBuffer.resize( fs.st_size ); + // reuse same readBuffer, which is not thread safe, but the API + // is not advertising thread safe (yet, if ever). + if( (int) fs.st_size > (int) readBuffer.size() ) + readBuffer.resize( fs.st_size + 1000 ); - size_t count = read( fw, &readBuffer[0], fs.st_size ); - if( count != (size_t) fs.st_size ) + int count = read( fw, &readBuffer[0], fs.st_size ); + if( count != (int) fs.st_size ) { - STRING msg = aFilename; - msg += " cannot be read"; - throw IO_ERROR( msg.c_str() ); + STRING msg = strerror( errno ); + msg += "; cannot read file " + aFilename; + throw( IO_ERROR( msg.c_str() ) ); } - // std::string chars are not gauranteed to be contiguous in + // std::string chars are not guaranteed to be contiguous in // future implementations of C++, so this is why we did not read into // aResult directly. aResult->assign( &readBuffer[0], count ); } -DIR_LIB_SOURCE::DIR_LIB_SOURCE( const STRING& aDirectoryPath, - bool doUseVersioning ) throw( IO_ERROR ) : - readBuffer( 512 ) +void DIR_LIB_SOURCE::cache() throw( IO_ERROR ) +{ + partnames.clear(); + categories.clear(); + + cacheOneDir( "" ); +} + + +DIR_LIB_SOURCE::DIR_LIB_SOURCE( const STRING& aDirectoryPath, + const STRING& aOptions ) throw( IO_ERROR ) : + useVersioning( strstr( aOptions.c_str(), "useVersioning" ) ) { - useVersioning = doUseVersioning; sourceURI = aDirectoryPath; sourceType = "dir"; @@ -264,17 +319,12 @@ DIR_LIB_SOURCE::DIR_LIB_SOURCE( const STRING& aDirectoryPath, if( strchr( "/\\", sourceURI[sourceURI.size()-1] ) ) sourceURI.erase( sourceURI.size()-1 ); - doOneDir( "" ); + cache(); } DIR_LIB_SOURCE::~DIR_LIB_SOURCE() { - // delete the sweet STRINGS, which "sweets" owns by pointer. - for( DIR_CACHE::iterator it = sweets.begin(); it != sweets.end(); ++it ) - { - delete it->second; - } } @@ -288,26 +338,26 @@ void DIR_LIB_SOURCE::GetCategoricalPartNames( STRINGS* aResults, const STRING& a STRING lower = aCategory + "/"; STRING upper = aCategory + char( '/' + 1 ); - DIR_CACHE::const_iterator limit = sweets.upper_bound( upper ); + PART_CACHE::const_iterator limit = partnames.upper_bound( upper ); - for( DIR_CACHE::const_iterator it = sweets.lower_bound( lower ); it!=limit; ++it ) + for( PART_CACHE::const_iterator it = partnames.lower_bound( lower ); it!=limit; ++it ) { - const char* start = it->first.c_str(); - size_t len = it->first.size(); + const char* start = it->c_str(); + size_t len = it->size(); if( !endsWithRev( start, start+len, '/' ) ) - aResults->push_back( it->first ); + aResults->push_back( *it ); } } else { - for( DIR_CACHE::const_iterator it = sweets.begin(); it!=sweets.end(); ++it ) + for( PART_CACHE::const_iterator it = partnames.begin(); it!=partnames.end(); ++it ) { - const char* start = it->first.c_str(); - size_t len = it->first.size(); + const char* start = it->c_str(); + size_t len = it->size(); if( !endsWithRev( start, start+len, '/' ) ) - aResults->push_back( it->first ); + aResults->push_back( *it ); } } } @@ -321,29 +371,21 @@ void DIR_LIB_SOURCE::ReadPart( STRING* aResult, const STRING& aPartName, const S if( aRev.size() ) partname += "/" + aRev; - DIR_CACHE::iterator it = sweets.find( partname ); + PART_CACHE::const_iterator it = partnames.find( partname ); - if( it == sweets.end() ) // part not found + if( it == partnames.end() ) // part not found { partname += " not found."; throw IO_ERROR( partname.c_str() ); } - if( !it->second ) // if the sweet string is not loaded yet - { - STRING filename = sourceURI + "/" + aPartName + ".part"; + // create a filename for the sweet string + STRING filename = sourceURI + "/" + aPartName + ".part"; - if( aRev.size() ) - { - filename += "." + aRev; - } + if( aRev.size() ) + filename += "." + aRev; - it->second = new STRING(); - - readSExpression( it->second, filename ); - } - - *aResult = *it->second; + readSExpression( aResult, filename ); } @@ -362,40 +404,36 @@ void DIR_LIB_SOURCE::ReadParts( STRINGS* aResults, const STRINGS& aPartNames ) void DIR_LIB_SOURCE::GetCategories( STRINGS* aResults ) throw( IO_ERROR ) { - *aResults = categories; + aResults->clear(); + + // caller fetches them sorted. + for( NAME_CACHE::const_iterator it = categories.begin(); it!=categories.end(); ++it ) + { + aResults->push_back( *it ); + } } #if defined(DEBUG) -#include void DIR_LIB_SOURCE::Show() { printf( "Show categories:\n" ); - for( STRINGS::const_iterator it = categories.begin(); it!=categories.end(); ++it ) + for( NAME_CACHE::const_iterator it = categories.begin(); it!=categories.end(); ++it ) printf( " '%s'\n", it->c_str() ); printf( "\n" ); printf( "Show parts:\n" ); - for( DIR_CACHE::const_iterator it = sweets.begin(); it != sweets.end(); ++it ) + for( PART_CACHE::const_iterator it = partnames.begin(); it != partnames.end(); ++it ) { - printf( " '%s'\n", it->first.c_str() ); - - if( it->second ) - { - STRING_LINE_READER slr( *it->second, wxString( wxConvertMB2WX( it->first.c_str() ) ) ); - while( slr.ReadLine() ) - { - printf( " %s", (char*) slr ); - } - printf( "\n" ); - } + printf( " '%s'\n", it->c_str() ); } } + #endif -void DIR_LIB_SOURCE::doOneDir( const STRING& aCategory ) throw( IO_ERROR ) +void DIR_LIB_SOURCE::cacheOneDir( const STRING& aCategory ) throw( IO_ERROR ) { STRING curDir = sourceURI; @@ -425,29 +463,29 @@ void DIR_LIB_SOURCE::doOneDir( const STRING& aCategory ) throw( IO_ERROR ) if( !stat( fileName.c_str(), &fs ) ) { + // is this a valid part name? if( S_ISREG( fs.st_mode ) && makePartFileName( entry->d_name, aCategory, &partName ) ) { - /* - if( sweets.find( partName ) != sweets.end() ) + std::pair pair = partnames.insert( partName ); + + if( !pair.second ) { STRING msg = partName; msg += " has already been encountered"; throw IO_ERROR( msg.c_str() ); } - */ - - sweets[partName] = NULL; // NULL for now, load the sweet later. } + // is this an acceptable category name? else if( S_ISDIR( fs.st_mode ) && !aCategory.size() && isCategoryName( entry->d_name ) ) { // only one level of recursion is used, controlled by the // emptiness of aCategory. - categories.push_back( entry->d_name ); + categories.insert( entry->d_name ); // somebody needs to test Windows (mingw), make sure it can // handle opendir() recursively - doOneDir( entry->d_name ); + cacheOneDir( entry->d_name ); } else { @@ -467,14 +505,15 @@ int main( int argc, char** argv ) try { - DIR_LIB_SOURCE uut( argv[1] ? argv[1] : "", true ); +// DIR_LIB_SOURCE uut( argv[1] ? argv[1] : "", "" ); + DIR_LIB_SOURCE uut( argv[1] ? argv[1] : "", "useVersioning" ); - // initially, only the DIR_CACHE sweets and STRING categories are loaded: + // initially, only the NAME_CACHE sweets and STRING categories are loaded: uut.Show(); uut.GetCategoricalPartNames( &partnames, "Category" ); - printf( "GetCategoricalPartNames(Category):\n" ); + printf( "\nGetCategoricalPartNames( aCatagory = 'Category' ):\n" ); for( STRINGS::const_iterator it = partnames.begin(); it!=partnames.end(); ++it ) { printf( " '%s'\n", it->c_str() ); @@ -482,11 +521,10 @@ int main( int argc, char** argv ) uut.ReadParts( &sweets, partnames ); - // fetch the part names for ALL categories. uut.GetCategoricalPartNames( &partnames ); - printf( "GetCategoricalPartNames(ALL):\n" ); + printf( "\nGetCategoricalPartNames( aCategory = '' i.e. ALL):\n" ); for( STRINGS::const_iterator it = partnames.begin(); it!=partnames.end(); ++it ) { printf( " '%s'\n", it->c_str() ); @@ -494,7 +532,7 @@ int main( int argc, char** argv ) uut.ReadParts( &sweets, partnames ); - printf( "Sweets for ALL parts:\n" ); + printf( "\nSweets for ALL parts:\n" ); STRINGS::const_iterator pn = partnames.begin(); for( STRINGS::const_iterator it = sweets.begin(); it!=sweets.end(); ++it, ++pn ) { diff --git a/new/sch_dir_lib_source.h b/new/sch_dir_lib_source.h index 183e1b9a7c..4a847c1d79 100644 --- a/new/sch_dir_lib_source.h +++ b/new/sch_dir_lib_source.h @@ -28,17 +28,35 @@ #include -#include +#include #include /** - * Type DIR_CACHE - * is a tuple, where the key is partname (prefixed with the category if any), - * and value is pointer to Sweet string which is loaded lazily, so can be NULL - * until loaded. + * struct BY_REV + * is here to provide a custom way to compare STRINGs. Namely, the revN[N..] + * string if present, is collated according to a 'higher revision first', but + * any part string without a revision, is even 'before' that. */ -typedef std::map< STRING, STRING* > DIR_CACHE; +struct BY_REV +{ + bool operator() ( const STRING& s1, const STRING& s2 ) const; +}; + + +/** + * Type PART_CACHE + * holds a set of part names in sorted order, according to the sort + * order given by struct BY_REV. + */ +typedef std::set< STRING, BY_REV > PART_CACHE; + + +/** + * Type NAME_CACHE + * holds a set of categories in sorted order. + */ +typedef std::set< STRING > NAME_CACHE; namespace SCH { @@ -55,11 +73,20 @@ class DIR_LIB_SOURCE : public LIB_SOURCE bool useVersioning; ///< use files with extension ".revNNN..", else not - DIR_CACHE sweets; ///< @todo, don't really need to cache the sweets, only the partnames. + /// normal partnames, some of which may be prefixed with a category, + /// and some of which may have legal "revN[N..]" type strings. + PART_CACHE partnames; + + /// categories which we expect to find in the set of @a partnames + NAME_CACHE categories; - STRINGS categories; std::vector readBuffer; ///< used by readSExpression() + /** + * Function cache + * [re-]loads the directory cache(s). + */ + void cache() throw( IO_ERROR ); /** * Function isPartFileName @@ -87,14 +114,14 @@ class DIR_LIB_SOURCE : public LIB_SOURCE /** - * Function doOneDir + * Function cacheOneDir * loads part names [and categories] from a directory given by * "sourceURI + '/' + category" * Categories are only loaded if processing the top most directory because * only one level of categories are supported. We know we are in the * top most directory if aCategory is empty. */ - void doOneDir( const STRING& aCategory ) throw( IO_ERROR ); + void cacheOneDir( const STRING& aCategory ) throw( IO_ERROR ); //protected: public: @@ -112,14 +139,14 @@ public: * @param doUseVersioning if true means support versioning in the directory tree, otherwise * only a single version of each part is recognized. */ - DIR_LIB_SOURCE( const STRING& aDirectoryPath, bool doUseVersioning = false ) + DIR_LIB_SOURCE( const STRING& aDirectoryPath, const STRING& aOptions = StrEmpty ) throw( IO_ERROR ); ~DIR_LIB_SOURCE(); //----------------------------------- - void ReadPart( STRING* aResult, const STRING& aPartName, const STRING& aRev=StrEmpty ) + void ReadPart( STRING* aResult, const STRING& aPartName, const STRING& aRev = StrEmpty ) throw( IO_ERROR ); void ReadParts( STRINGS* aResults, const STRINGS& aPartNames ) @@ -127,7 +154,7 @@ public: void GetCategories( STRINGS* aResults ) throw( IO_ERROR ); - void GetCategoricalPartNames( STRINGS* aResults, const STRING& aCategory=StrEmpty ) + void GetCategoricalPartNames( STRINGS* aResults, const STRING& aCategory = StrEmpty ) throw( IO_ERROR ); void GetRevisions( STRINGS* aResults, const STRING& aPartName ) throw( IO_ERROR )