From 5778d74fbb41855da4e5ee8fc7df8044b5d2d184 Mon Sep 17 00:00:00 2001 From: Dick Hollenbeck Date: Fri, 6 Dec 2013 06:51:39 -0600 Subject: [PATCH] sketch out class UTF8::uni_iter, add tools/make-UTF8.sh --- tools/UTF8.cpp | 141 +++++++++++++++++++++++++++++++++++++++++---- tools/make-UTF8.sh | 5 ++ 2 files changed, 136 insertions(+), 10 deletions(-) create mode 100755 tools/make-UTF8.sh diff --git a/tools/UTF8.cpp b/tools/UTF8.cpp index aaee8af8f4..f15694d337 100644 --- a/tools/UTF8.cpp +++ b/tools/UTF8.cpp @@ -2,12 +2,14 @@ #include #include #include +#include /** * Class UTF8 * is an 8 bit std::string assuredly encoded in UTF8 that supplies special - * conversion support to and from wxString. + * conversion support to and from wxString, and has iteration over + * UTF8 code points. */ class UTF8 : public std::string { @@ -17,57 +19,176 @@ public: UTF8( const wxString& o ) : std::string( (const char*) o.utf8_str() ) { + // @todo: should not be inline. } UTF8( const char* txt ) : std::string( txt ) { + // ok inline } - UTF8( const std::string& o ) : + explicit UTF8( const std::string& o ) : std::string( o ) { + // ok inline } UTF8() : std::string() { + // ok inline } UTF8& operator = ( const wxString& o ) { + // @todo: should not be inline. std::string::operator=( (const char*) o.utf8_str() ); + return *this; + } + + UTF8& operator = ( const std::string& o ) + { + std::string::operator = ( o ); + return *this; } operator wxString () const { + // @todo: should not be inline. return wxString( c_str(), wxConvUTF8 ); } + + static int uni_forward( const_iterator it, uint32_t* result ) + { + // @todo: have this read UTF8 characters into result, not bytes. + // What's here now is scaffolding, reading single byte characters only. + *result = (unsigned char) *it; + return 1; + } + + /** + * class uni_iter + * is a non-mutable iterator that walks through code points in the UTF8 encoded + * string. The normal ++(), ++(int), ->(), and *() operators are all supported and + * they return a uint32_t holding the unicode character appropriate for respective + * operation. + */ + class uni_iter : public std::string::const_iterator + { + const_iterator it; + + public: + uni_iter( const_iterator start ) : + it( start ) + { + } + + /// pre-increment and return unicode at new position + uint32_t operator++() + { + uint32_t result; + + // advance, and toss the result + it += uni_forward( it, &result ); + + // get the next result, but do not advance: + uni_forward( it, &result ); + + return result; + } + + /// post-increment and return unicode at initial position + uint32_t operator++( int ) + { + uint32_t result; + + // grab the result and advance. + it += uni_forward( it, &result ); + return result; + } + + /// return unicode at current position + uint32_t operator->() const + { + uint32_t result; + + // grab the result, do not advance + uni_forward( it, &result ); + return result; + } + + /// return unicode at current position + uint32_t operator*() const + { + uint32_t result; + + // grab the result, do not advance + uni_forward( it, &result ); + return result; + } + + bool operator==( const uni_iter& other ) const { return it == other.it; } + bool operator!=( const uni_iter& other ) const { return it != other.it; } + bool operator< ( const uni_iter& other ) const { return it < other.it; } + bool operator<=( const uni_iter& other ) const { return it <= other.it; } + bool operator> ( const uni_iter& other ) const { return it > other.it; } + bool operator>=( const uni_iter& other ) const { return it >= other.it; } + }; + + uni_iter ubegin() const + { + return uni_iter( begin() ); + } + + uni_iter uend() const + { + return uni_iter( end() ); + } }; -void aFunctionTaking_wxString( const wxString& wx ) +wxString aFunctionTaking_wxString( const wxString& wx ) { printf( "%s: '%s'\n", __func__, UTF8( wx ).c_str() ); + + return wx; } int main() { - UTF8 utf; + UTF8 u1 = "output"; std::string str = "input"; wxString wx = wxT( "input" ); - utf = str; + u1 = str; - wxString wx2 = utf; + wxString wx2 = u1; - UTF8 utf2 = wx2; + UTF8 u2 = wx2; - printf( "here is some text:%s\n", utf2.c_str() ); + u2 += 'X'; - // this is the key accomplishment here, passing a UTF8 to a function taking wxString: - aFunctionTaking_wxString( utf2 ); + printf( "utf2:'%s'\n", u2.c_str() ); + + // key accomplishments here: + // 1) passing a UTF8 to a function which normally takes a wxString. + // 2) return a wxString back into a UTF8. + UTF8 result = aFunctionTaking_wxString( u2 ); + + printf( "result:'%s'\n", result.c_str() ); + + // test the unicode iterator: + for( UTF8::uni_iter it = u2.ubegin(); it != u2.uend(); ) + { + printf( " _%c_", it++ ); + + // after UTF7::uni_forward() is implemented, it++ %c is no longer useable. + // printf( " _%02x_", it++ ); + } + printf( "\n" ); return 0; } + diff --git a/tools/make-UTF8.sh b/tools/make-UTF8.sh new file mode 100755 index 0000000000..2e7e7510fc --- /dev/null +++ b/tools/make-UTF8.sh @@ -0,0 +1,5 @@ +WXCONFIG=wx-config +INCLUDE=/usr/include/wx-2.8 + +g++ -I $INCLUDE $($WXCONFIG --cppflags) UTF8.cpp -o test $($WXCONFIG --libs) +