Change DIR_CACHE by removing the need to cache Sweet strings in the

DIR_LIB_SOURCE.  It only needs to be able to read Sweet strings quickly.
@todo: Change public API regarding part rev string.
This commit is contained in:
Dick Hollenbeck 2010-12-20 11:14:25 -06:00
commit 597f67755b
3 changed files with 231 additions and 124 deletions

View File

@ -58,7 +58,7 @@ endif()
include_directories( ${CMAKE_CURRENT_SOURCE_DIR} )
add_executable( test_dir_lib_source sch_dir_lib_source.cpp ${PROJECT_SOURCE_DIR}/common/richio.cpp )
add_executable( test_dir_lib_source sch_dir_lib_source.cpp )
#add_executable( test_dir_lib_source EXCLUDE_FROM_ALL sch_dir_lib_source.cpp )
target_link_libraries( test_dir_lib_source ${wxWidgets_LIBRARIES} )

View File

@ -125,38 +125,85 @@ static const char* strrstr( const char* haystack, const char* needle )
return ret;
}
/**
* Function endsWithRev
* returns a pointer to the final string segment: "revN..." or NULL if none.
* returns a pointer to the final string segment: "revN[N..]" or NULL if none.
* @param start is the beginning of string segment to test, the partname or
* any middle portion of it.
* @param tail is a pointer to the terminating nul.
* @param tail is a pointer to the terminating nul, or one past inclusive end of
* segment, i.e. the string segment of interest is [start,tail)
* @param separator is the separating byte, expected: '.' or '/', depending on context.
*/
static const char* endsWithRev( const char* start, const char* tail, char separator )
{
bool sawDigit = false;
while( isdigit(*--tail) && tail>start )
while( tail>start && isdigit(*--tail) )
{
sawDigit = true;
}
if( sawDigit && tail-3 >= start && tail[-3] == separator )
// if sawDigit, tail points to the 'v' here.
if( sawDigit && tail-3 >= start )
{
tail -= 2;
if( tail[0]=='r' && tail[1]=='e' && tail[2]=='v' )
tail -= 3;
if( tail[0]==separator && tail[1]=='r' && tail[2]=='e' && tail[3]=='v' )
{
return tail;
return tail+1; // omit separator, return "revN[N..]"
}
}
return 0;
}
// see struct BY_REV
bool BY_REV::operator() ( const STRING& s1, const STRING& s2 ) const
{
// avoid instantiating new STRINGs, and thank goodness that c_str() is const.
bool DIR_LIB_SOURCE::makePartFileName( const char* aEntry,
const STRING& aCategory, STRING* aPartName )
const char* rev1 = endsWithRev( s1.c_str(), s1.c_str()+s1.size(), '/' );
const char* rev2 = endsWithRev( s2.c_str(), s2.c_str()+s2.size(), '/' );
int rootLen1 = rev1 ? rev1 - s1.c_str() : s1.size();
int rootLen2 = rev2 ? rev2 - s2.c_str() : s2.size();
int r = memcmp( s1.c_str(), s2.c_str(), min( rootLen1, rootLen2 ) );
if( r )
{
return r < 0;
}
if( rootLen1 != rootLen2 )
{
return rootLen1 < rootLen2;
}
// root strings match at this point, compare the revision number numerically,
// and chose the higher numbered version as "less", according to std::set lingo.
if( bool(rev1) != bool(rev2) )
{
return bool(rev1) < bool(rev2);
}
if( rev1 && rev2 )
{
int rnum1 = atoi( rev1+3 );
int rnum2 = atoi( rev2+3 );
return rnum1 > rnum2;
}
return false; // strings are equal, and they don't have a rev
}
bool DIR_LIB_SOURCE::makePartName( STRING* aPartName, const char* aEntry,
const STRING& aCategory )
{
const char* cp = strrstr( aEntry, ".part" );
@ -165,19 +212,7 @@ bool DIR_LIB_SOURCE::makePartFileName( const char* aEntry,
{
const char* limit = cp + strlen( cp );
// if file extension is exactly ".part", and no rev
if( cp==limit-5 )
{
if( aCategory.size() )
*aPartName = aCategory + "/";
else
aPartName->clear();
aPartName->append( aEntry, cp - aEntry );
return true;
}
// if versioning, test for a trailing "revN.." type of string
// If versioning, then must find a trailing "revN.." type of string.
if( useVersioning )
{
const char* rev = endsWithRev( cp + sizeof(".part") - 1, limit, '.' );
@ -194,14 +229,52 @@ bool DIR_LIB_SOURCE::makePartFileName( const char* aEntry,
return true;
}
}
// If using versioning, then all valid partnames must have a rev string,
// so we don't even bother to try and load any other partfile down here.
else
{
// if file extension is exactly ".part", and no rev
if( cp==limit-5 )
{
if( aCategory.size() )
*aPartName = aCategory + "/";
else
aPartName->clear();
aPartName->append( aEntry, cp - aEntry );
return true;
}
}
}
return false;
}
static bool isCategoryName( const char* aName )
STRING DIR_LIB_SOURCE::makeFileName( const STRING& aPartName )
{
return true;
// create a fileName for the sweet string, using a reversible
// partname <-> fileName conversion protocol:
STRING fileName = sourceURI + "/";
const char* rev = endsWithRev( aPartName.c_str(), aPartName.c_str()+aPartName.size(), '/' );
if( rev )
{
int basePartLen = rev - aPartName.c_str() - 1; // omit '/' separator
fileName.append( aPartName, 0, basePartLen );
fileName += ".part."; // add '.' separator before rev
fileName += rev;
}
else
{
fileName += aPartName;
fileName += ".part";
}
return fileName;
}
@ -211,9 +284,9 @@ void DIR_LIB_SOURCE::readSExpression( STRING* aResult, const STRING& aFilename )
if( fw == -1 )
{
STRING msg = aFilename;
msg += " cannot be open()ed for reading";
throw IO_ERROR( msg.c_str() );
STRING msg = strerror( errno );
msg += "; cannot open(O_RDONLY) file " + aFilename;
throw( IO_ERROR( msg.c_str() ) );
}
struct stat fs;
@ -224,34 +297,43 @@ void DIR_LIB_SOURCE::readSExpression( STRING* aResult, const STRING& aFilename )
if( fs.st_size > (1*1024*1024) )
{
STRING msg = aFilename;
msg += " seems too big. ( > 1mbyte )";
msg += " seems too big. ( > 1 mbyte )";
throw IO_ERROR( msg.c_str() );
}
// we reuse the same readBuffer, which is not thread safe, but the API
// is not expected to be thread safe.
readBuffer.resize( fs.st_size );
// reuse same readBuffer, which is not thread safe, but the API
// is not advertising thread safe (yet, if ever).
if( (int) fs.st_size > (int) readBuffer.size() )
readBuffer.resize( fs.st_size + 1000 );
size_t count = read( fw, &readBuffer[0], fs.st_size );
if( count != (size_t) fs.st_size )
int count = read( fw, &readBuffer[0], fs.st_size );
if( count != (int) fs.st_size )
{
STRING msg = aFilename;
msg += " cannot be read";
throw IO_ERROR( msg.c_str() );
STRING msg = strerror( errno );
msg += "; cannot read file " + aFilename;
throw( IO_ERROR( msg.c_str() ) );
}
// std::string chars are not gauranteed to be contiguous in
// std::string chars are not guaranteed to be contiguous in
// future implementations of C++, so this is why we did not read into
// aResult directly.
aResult->assign( &readBuffer[0], count );
}
DIR_LIB_SOURCE::DIR_LIB_SOURCE( const STRING& aDirectoryPath,
bool doUseVersioning ) throw( IO_ERROR ) :
readBuffer( 512 )
void DIR_LIB_SOURCE::cache() throw( IO_ERROR )
{
partnames.clear();
categories.clear();
cacheOneDir( "" );
}
DIR_LIB_SOURCE::DIR_LIB_SOURCE( const STRING& aDirectoryPath,
const STRING& aOptions ) throw( IO_ERROR ) :
useVersioning( strstr( aOptions.c_str(), "useVersioning" ) )
{
useVersioning = doUseVersioning;
sourceURI = aDirectoryPath;
sourceType = "dir";
@ -264,17 +346,12 @@ DIR_LIB_SOURCE::DIR_LIB_SOURCE( const STRING& aDirectoryPath,
if( strchr( "/\\", sourceURI[sourceURI.size()-1] ) )
sourceURI.erase( sourceURI.size()-1 );
doOneDir( "" );
cache();
}
DIR_LIB_SOURCE::~DIR_LIB_SOURCE()
{
// delete the sweet STRINGS, which "sweets" owns by pointer.
for( DIR_CACHE::iterator it = sweets.begin(); it != sweets.end(); ++it )
{
delete it->second;
}
}
@ -288,26 +365,34 @@ void DIR_LIB_SOURCE::GetCategoricalPartNames( STRINGS* aResults, const STRING& a
STRING lower = aCategory + "/";
STRING upper = aCategory + char( '/' + 1 );
DIR_CACHE::const_iterator limit = sweets.upper_bound( upper );
PART_CACHE::const_iterator limit = partnames.upper_bound( upper );
for( DIR_CACHE::const_iterator it = sweets.lower_bound( lower ); it!=limit; ++it )
for( PART_CACHE::const_iterator it = partnames.lower_bound( lower ); it!=limit; ++it )
{
const char* start = it->first.c_str();
size_t len = it->first.size();
/*
const char* start = it->c_str();
size_t len = it->size();
if( !endsWithRev( start, start+len, '/' ) )
aResults->push_back( it->first );
if( endsWithRev( start, start+len, '/' ) )
continue;
*/
aResults->push_back( *it );
}
}
else
{
for( DIR_CACHE::const_iterator it = sweets.begin(); it!=sweets.end(); ++it )
for( PART_CACHE::const_iterator it = partnames.begin(); it!=partnames.end(); ++it )
{
const char* start = it->first.c_str();
size_t len = it->first.size();
/*
const char* start = it->c_str();
size_t len = it->size();
if( !endsWithRev( start, start+len, '/' ) )
aResults->push_back( it->first );
continue;
*/
aResults->push_back( *it );
}
}
}
@ -321,29 +406,20 @@ void DIR_LIB_SOURCE::ReadPart( STRING* aResult, const STRING& aPartName, const S
if( aRev.size() )
partname += "/" + aRev;
DIR_CACHE::iterator it = sweets.find( partname );
PART_CACHE::const_iterator it = partnames.find( partname );
if( it == sweets.end() ) // part not found
if( it == partnames.end() ) // part not found
{
partname += " not found.";
throw IO_ERROR( partname.c_str() );
}
if( !it->second ) // if the sweet string is not loaded yet
{
STRING filename = sourceURI + "/" + aPartName + ".part";
// create a fileName for the sweet string
STRING fileName = makeFileName( aPartName );
if( aRev.size() )
{
filename += "." + aRev;
}
// @todo what about aRev?, and define the public API wrt to aRev better.
it->second = new STRING();
readSExpression( it->second, filename );
}
*aResult = *it->second;
readSExpression( aResult, fileName );
}
@ -362,40 +438,36 @@ void DIR_LIB_SOURCE::ReadParts( STRINGS* aResults, const STRINGS& aPartNames )
void DIR_LIB_SOURCE::GetCategories( STRINGS* aResults ) throw( IO_ERROR )
{
*aResults = categories;
aResults->clear();
// caller fetches them sorted.
for( NAME_CACHE::const_iterator it = categories.begin(); it!=categories.end(); ++it )
{
aResults->push_back( *it );
}
}
#if defined(DEBUG)
#include <richio.h>
void DIR_LIB_SOURCE::Show()
{
printf( "Show categories:\n" );
for( STRINGS::const_iterator it = categories.begin(); it!=categories.end(); ++it )
for( NAME_CACHE::const_iterator it = categories.begin(); it!=categories.end(); ++it )
printf( " '%s'\n", it->c_str() );
printf( "\n" );
printf( "Show parts:\n" );
for( DIR_CACHE::const_iterator it = sweets.begin(); it != sweets.end(); ++it )
for( PART_CACHE::const_iterator it = partnames.begin(); it != partnames.end(); ++it )
{
printf( " '%s'\n", it->first.c_str() );
if( it->second )
{
STRING_LINE_READER slr( *it->second, wxString( wxConvertMB2WX( it->first.c_str() ) ) );
while( slr.ReadLine() )
{
printf( " %s", (char*) slr );
}
printf( "\n" );
}
printf( " '%s'\n", it->c_str() );
}
}
#endif
void DIR_LIB_SOURCE::doOneDir( const STRING& aCategory ) throw( IO_ERROR )
void DIR_LIB_SOURCE::cacheOneDir( const STRING& aCategory ) throw( IO_ERROR )
{
STRING curDir = sourceURI;
@ -425,29 +497,29 @@ void DIR_LIB_SOURCE::doOneDir( const STRING& aCategory ) throw( IO_ERROR )
if( !stat( fileName.c_str(), &fs ) )
{
if( S_ISREG( fs.st_mode ) && makePartFileName( entry->d_name, aCategory, &partName ) )
// is this a valid part name?
if( S_ISREG( fs.st_mode ) && makePartName( &partName, entry->d_name, aCategory ) )
{
/*
if( sweets.find( partName ) != sweets.end() )
std::pair<NAME_CACHE::iterator, bool> pair = partnames.insert( partName );
if( !pair.second )
{
STRING msg = partName;
msg += " has already been encountered";
throw IO_ERROR( msg.c_str() );
}
*/
sweets[partName] = NULL; // NULL for now, load the sweet later.
}
// is this an acceptable category name?
else if( S_ISDIR( fs.st_mode ) && !aCategory.size() && isCategoryName( entry->d_name ) )
{
// only one level of recursion is used, controlled by the
// emptiness of aCategory.
categories.push_back( entry->d_name );
categories.insert( entry->d_name );
// somebody needs to test Windows (mingw), make sure it can
// handle opendir() recursively
doOneDir( entry->d_name );
cacheOneDir( entry->d_name );
}
else
{
@ -467,14 +539,15 @@ int main( int argc, char** argv )
try
{
DIR_LIB_SOURCE uut( argv[1] ? argv[1] : "", true );
// DIR_LIB_SOURCE uut( argv[1] ? argv[1] : "", "" );
DIR_LIB_SOURCE uut( argv[1] ? argv[1] : "", "useVersioning" );
// initially, only the DIR_CACHE sweets and STRING categories are loaded:
// initially, only the NAME_CACHE sweets and STRING categories are loaded:
uut.Show();
uut.GetCategoricalPartNames( &partnames, "Category" );
printf( "GetCategoricalPartNames(Category):\n" );
printf( "\nGetCategoricalPartNames( aCatagory = 'Category' ):\n" );
for( STRINGS::const_iterator it = partnames.begin(); it!=partnames.end(); ++it )
{
printf( " '%s'\n", it->c_str() );
@ -482,11 +555,10 @@ int main( int argc, char** argv )
uut.ReadParts( &sweets, partnames );
// fetch the part names for ALL categories.
uut.GetCategoricalPartNames( &partnames );
printf( "GetCategoricalPartNames(ALL):\n" );
printf( "\nGetCategoricalPartNames( aCategory = '' i.e. ALL):\n" );
for( STRINGS::const_iterator it = partnames.begin(); it!=partnames.end(); ++it )
{
printf( " '%s'\n", it->c_str() );
@ -494,7 +566,7 @@ int main( int argc, char** argv )
uut.ReadParts( &sweets, partnames );
printf( "Sweets for ALL parts:\n" );
printf( "\nSweets for ALL parts:\n" );
STRINGS::const_iterator pn = partnames.begin();
for( STRINGS::const_iterator it = sweets.begin(); it!=sweets.end(); ++it, ++pn )
{

View File

@ -28,17 +28,35 @@
#include <sch_lib.h>
#include <map>
#include <set>
#include <vector>
/**
* Type DIR_CACHE
* is a tuple, where the key is partname (prefixed with the category if any),
* and value is pointer to Sweet string which is loaded lazily, so can be NULL
* until loaded.
* struct BY_REV
* is here to provide a custom way to compare STRINGs. Namely, the revN[N..]
* string if present, is collated according to a 'higher revision first', but
* any part string without a revision, is even 'before' that.
*/
typedef std::map< STRING, STRING* > DIR_CACHE;
struct BY_REV
{
bool operator() ( const STRING& s1, const STRING& s2 ) const;
};
/**
* Type PART_CACHE
* holds a set of part names in sorted order, according to the sort
* order given by struct BY_REV.
*/
typedef std::set< STRING, BY_REV > PART_CACHE;
/**
* Type NAME_CACHE
* holds a set of categories in sorted order.
*/
typedef std::set< STRING > NAME_CACHE;
namespace SCH {
@ -55,29 +73,40 @@ class DIR_LIB_SOURCE : public LIB_SOURCE
bool useVersioning; ///< use files with extension ".revNNN..", else not
DIR_CACHE sweets; ///< @todo, don't really need to cache the sweets, only the partnames.
/// normal partnames, some of which may be prefixed with a category,
/// and some of which may have legal "revN[N..]" type strings.
PART_CACHE partnames;
/// categories which we expect to find in the set of @a partnames
NAME_CACHE categories;
STRINGS categories;
std::vector<char> readBuffer; ///< used by readSExpression()
/**
* Function isPartFileName
* returns true iff aName is a valid part file name.
* Function cache
* [re-]loads the directory cache(s).
*/
bool isPartFileName( const char* aName );
void cache() throw( IO_ERROR );
/**
* Function makePartFileName
* Function isCategoryName
* returns true iff aName is a valid category name.
*/
bool isCategoryName( const char* aName )
{
return true;
}
/**
* Function makePartName
* returns true iff aEntry holds a valid part filename, in the form of
* "someroot.part[.revNNNN]" where NNN are number characters [0-9]
* @param aEntry is the raw directory entry without path information.
* @param aCategory is the last portion of the directory path.
* @param aPartName is where to put a part name, assuming aEntry is legal.
* @param aPartName is where to put a part name, assuming @a aEntry is legal.
* @return bool - true only if aEntry is a legal part file name.
*/
bool makePartFileName( const char* aEntry,
const STRING& aCategory, STRING* aPartName );
bool makePartName( STRING* aPartName, const char* aEntry, const STRING& aCategory );
/**
* Function readSExpression
@ -87,14 +116,20 @@ class DIR_LIB_SOURCE : public LIB_SOURCE
/**
* Function doOneDir
* Function cacheOneDir
* loads part names [and categories] from a directory given by
* "sourceURI + '/' + category"
* Categories are only loaded if processing the top most directory because
* only one level of categories are supported. We know we are in the
* top most directory if aCategory is empty.
*/
void doOneDir( const STRING& aCategory ) throw( IO_ERROR );
void cacheOneDir( const STRING& aCategory ) throw( IO_ERROR );
/**
* Function makeFileName
* converts a part name into a filename and returns it.
*/
STRING makeFileName( const STRING& aPartName );
//protected:
public:
@ -112,14 +147,14 @@ public:
* @param doUseVersioning if true means support versioning in the directory tree, otherwise
* only a single version of each part is recognized.
*/
DIR_LIB_SOURCE( const STRING& aDirectoryPath, bool doUseVersioning = false )
DIR_LIB_SOURCE( const STRING& aDirectoryPath, const STRING& aOptions = StrEmpty )
throw( IO_ERROR );
~DIR_LIB_SOURCE();
//-----<LIB_SOURCE implementation functions >------------------------------
void ReadPart( STRING* aResult, const STRING& aPartName, const STRING& aRev=StrEmpty )
void ReadPart( STRING* aResult, const STRING& aPartName, const STRING& aRev = StrEmpty )
throw( IO_ERROR );
void ReadParts( STRINGS* aResults, const STRINGS& aPartNames )
@ -127,7 +162,7 @@ public:
void GetCategories( STRINGS* aResults ) throw( IO_ERROR );
void GetCategoricalPartNames( STRINGS* aResults, const STRING& aCategory=StrEmpty )
void GetCategoricalPartNames( STRINGS* aResults, const STRING& aCategory = StrEmpty )
throw( IO_ERROR );
void GetRevisions( STRINGS* aResults, const STRING& aPartName ) throw( IO_ERROR )