diff --git a/common/dsnlexer.cpp b/common/dsnlexer.cpp index 6f8f6d064c..883a85db38 100644 --- a/common/dsnlexer.cpp +++ b/common/dsnlexer.cpp @@ -369,18 +369,25 @@ int DSNLEXER::NeedNUMBER( const char* aExpectation ) throw( IO_ERROR ) /** * Function isSpace - * strips the upper bits of the int to ensure the value passed to C++ %isspace() is - * in the range of 0-255 + * tests for whitespace. Our whitespace, by our definition, is a subset of ASCII, + * i.e. no bytes with MSB on can be considered whitespace, since they are likely part + * of a multibyte UTF8 character. */ -static inline bool isSpace( int cc ) +static bool isSpace( int cc ) { - // Warning: we are using UTF8 char, so values are coded from 0x01 to 0xFF - // isspace( int value ) works fine under Linux, - // but seems use only a 7 bits value under mingw, in comparisons. - // (for instance 0xA0 is seen as 0x20) - // So we need to test if the value is ASCII ( <= 127) and a space ( ' ', \t, \n ... ) - // and not just a space: - return ( (unsigned) cc <= 127 ) && ::isspace( cc ); + if( cc <= ' ' ) + { + switch( cc ) + { + case ' ': + case '\n': + case '\r': + case '\t': + case '\0': // PCAD s-expression files have this. + return true; + } + } + return false; } diff --git a/common/richio.cpp b/common/richio.cpp index 0b3a1f8ef8..4ab811d96c 100644 --- a/common/richio.cpp +++ b/common/richio.cpp @@ -90,6 +90,11 @@ FILE_LINE_READER::FILE_LINE_READER( FILE* aFile, const wxString& aFileName, iOwn( doOwn ), fp( aFile ) { + if( doOwn ) + { + setvbuf( fp, NULL, _IOFBF, BUFSIZ * 8 ); + } + source = aFileName; lineNum = aStartingLineNumber; } @@ -101,6 +106,10 @@ FILE_LINE_READER::~FILE_LINE_READER() fclose( fp ); } +#if 0 + +// The strlen() will trip on embedded nuls which can come in via bad data files. +// Try an alternate technique below. unsigned FILE_LINE_READER::ReadLine() throw( IO_ERROR ) { @@ -129,6 +138,40 @@ unsigned FILE_LINE_READER::ReadLine() throw( IO_ERROR ) return length; } +#else +unsigned FILE_LINE_READER::ReadLine() throw( IO_ERROR ) +{ + length = 0; + + for(;;) + { + if( length >= maxLineLength ) + THROW_IO_ERROR( _( "Maximum line length exceeded" ) ); + + if( length >= capacity ) + expandCapacity( capacity * 2 ); + + // faster, POSIX compatible fgetc(), no locking. + int cc = getc_unlocked( fp ); + if( cc == EOF ) + break; + + line[ length++ ] = (char) cc; + + if( cc == '\n' ) + break; + } + + line[ length ] = 0; + + // lineNum is incremented even if there was no line read, because this + // leads to better error reporting when we hit an end of file. + ++lineNum; + + return length; +} +#endif + STRING_LINE_READER::STRING_LINE_READER( const std::string& aString, const wxString& aSource ) : LINE_READER( LINE_READER_LINE_DEFAULT_MAX ),