sketch out class UTF8::uni_iter, add tools/make-UTF8.sh
This commit is contained in:
parent
6274740de9
commit
391ff6699a
141
tools/UTF8.cpp
141
tools/UTF8.cpp
|
@ -2,12 +2,14 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <wx/string.h>
|
#include <wx/string.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class UTF8
|
* Class UTF8
|
||||||
* is an 8 bit std::string assuredly encoded in UTF8 that supplies special
|
* is an 8 bit std::string assuredly encoded in UTF8 that supplies special
|
||||||
* conversion support to and from wxString.
|
* conversion support to and from wxString, and has iteration over
|
||||||
|
* UTF8 code points.
|
||||||
*/
|
*/
|
||||||
class UTF8 : public std::string
|
class UTF8 : public std::string
|
||||||
{
|
{
|
||||||
|
@ -17,57 +19,176 @@ public:
|
||||||
UTF8( const wxString& o ) :
|
UTF8( const wxString& o ) :
|
||||||
std::string( (const char*) o.utf8_str() )
|
std::string( (const char*) o.utf8_str() )
|
||||||
{
|
{
|
||||||
|
// @todo: should not be inline.
|
||||||
}
|
}
|
||||||
|
|
||||||
UTF8( const char* txt ) :
|
UTF8( const char* txt ) :
|
||||||
std::string( txt )
|
std::string( txt )
|
||||||
{
|
{
|
||||||
|
// ok inline
|
||||||
}
|
}
|
||||||
|
|
||||||
UTF8( const std::string& o ) :
|
explicit UTF8( const std::string& o ) :
|
||||||
std::string( o )
|
std::string( o )
|
||||||
{
|
{
|
||||||
|
// ok inline
|
||||||
}
|
}
|
||||||
|
|
||||||
UTF8() :
|
UTF8() :
|
||||||
std::string()
|
std::string()
|
||||||
{
|
{
|
||||||
|
// ok inline
|
||||||
}
|
}
|
||||||
|
|
||||||
UTF8& operator = ( const wxString& o )
|
UTF8& operator = ( const wxString& o )
|
||||||
{
|
{
|
||||||
|
// @todo: should not be inline.
|
||||||
std::string::operator=( (const char*) o.utf8_str() );
|
std::string::operator=( (const char*) o.utf8_str() );
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
UTF8& operator = ( const std::string& o )
|
||||||
|
{
|
||||||
|
std::string::operator = ( o );
|
||||||
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
operator wxString () const
|
operator wxString () const
|
||||||
{
|
{
|
||||||
|
// @todo: should not be inline.
|
||||||
return wxString( c_str(), wxConvUTF8 );
|
return wxString( c_str(), wxConvUTF8 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int uni_forward( const_iterator it, uint32_t* result )
|
||||||
|
{
|
||||||
|
// @todo: have this read UTF8 characters into result, not bytes.
|
||||||
|
// What's here now is scaffolding, reading single byte characters only.
|
||||||
|
*result = (unsigned char) *it;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* class uni_iter
|
||||||
|
* is a non-mutable iterator that walks through code points in the UTF8 encoded
|
||||||
|
* string. The normal ++(), ++(int), ->(), and *() operators are all supported and
|
||||||
|
* they return a uint32_t holding the unicode character appropriate for respective
|
||||||
|
* operation.
|
||||||
|
*/
|
||||||
|
class uni_iter : public std::string::const_iterator
|
||||||
|
{
|
||||||
|
const_iterator it;
|
||||||
|
|
||||||
|
public:
|
||||||
|
uni_iter( const_iterator start ) :
|
||||||
|
it( start )
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
/// pre-increment and return unicode at new position
|
||||||
|
uint32_t operator++()
|
||||||
|
{
|
||||||
|
uint32_t result;
|
||||||
|
|
||||||
|
// advance, and toss the result
|
||||||
|
it += uni_forward( it, &result );
|
||||||
|
|
||||||
|
// get the next result, but do not advance:
|
||||||
|
uni_forward( it, &result );
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// post-increment and return unicode at initial position
|
||||||
|
uint32_t operator++( int )
|
||||||
|
{
|
||||||
|
uint32_t result;
|
||||||
|
|
||||||
|
// grab the result and advance.
|
||||||
|
it += uni_forward( it, &result );
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// return unicode at current position
|
||||||
|
uint32_t operator->() const
|
||||||
|
{
|
||||||
|
uint32_t result;
|
||||||
|
|
||||||
|
// grab the result, do not advance
|
||||||
|
uni_forward( it, &result );
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// return unicode at current position
|
||||||
|
uint32_t operator*() const
|
||||||
|
{
|
||||||
|
uint32_t result;
|
||||||
|
|
||||||
|
// grab the result, do not advance
|
||||||
|
uni_forward( it, &result );
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator==( const uni_iter& other ) const { return it == other.it; }
|
||||||
|
bool operator!=( const uni_iter& other ) const { return it != other.it; }
|
||||||
|
bool operator< ( const uni_iter& other ) const { return it < other.it; }
|
||||||
|
bool operator<=( const uni_iter& other ) const { return it <= other.it; }
|
||||||
|
bool operator> ( const uni_iter& other ) const { return it > other.it; }
|
||||||
|
bool operator>=( const uni_iter& other ) const { return it >= other.it; }
|
||||||
|
};
|
||||||
|
|
||||||
|
uni_iter ubegin() const
|
||||||
|
{
|
||||||
|
return uni_iter( begin() );
|
||||||
|
}
|
||||||
|
|
||||||
|
uni_iter uend() const
|
||||||
|
{
|
||||||
|
return uni_iter( end() );
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
void aFunctionTaking_wxString( const wxString& wx )
|
wxString aFunctionTaking_wxString( const wxString& wx )
|
||||||
{
|
{
|
||||||
printf( "%s: '%s'\n", __func__, UTF8( wx ).c_str() );
|
printf( "%s: '%s'\n", __func__, UTF8( wx ).c_str() );
|
||||||
|
|
||||||
|
return wx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
UTF8 utf;
|
UTF8 u1 = "output";
|
||||||
std::string str = "input";
|
std::string str = "input";
|
||||||
wxString wx = wxT( "input" );
|
wxString wx = wxT( "input" );
|
||||||
|
|
||||||
utf = str;
|
u1 = str;
|
||||||
|
|
||||||
wxString wx2 = utf;
|
wxString wx2 = u1;
|
||||||
|
|
||||||
UTF8 utf2 = wx2;
|
UTF8 u2 = wx2;
|
||||||
|
|
||||||
printf( "here is some text:%s\n", utf2.c_str() );
|
u2 += 'X';
|
||||||
|
|
||||||
// this is the key accomplishment here, passing a UTF8 to a function taking wxString:
|
printf( "utf2:'%s'\n", u2.c_str() );
|
||||||
aFunctionTaking_wxString( utf2 );
|
|
||||||
|
// key accomplishments here:
|
||||||
|
// 1) passing a UTF8 to a function which normally takes a wxString.
|
||||||
|
// 2) return a wxString back into a UTF8.
|
||||||
|
UTF8 result = aFunctionTaking_wxString( u2 );
|
||||||
|
|
||||||
|
printf( "result:'%s'\n", result.c_str() );
|
||||||
|
|
||||||
|
// test the unicode iterator:
|
||||||
|
for( UTF8::uni_iter it = u2.ubegin(); it != u2.uend(); )
|
||||||
|
{
|
||||||
|
printf( " _%c_", it++ );
|
||||||
|
|
||||||
|
// after UTF7::uni_forward() is implemented, it++ %c is no longer useable.
|
||||||
|
// printf( " _%02x_", it++ );
|
||||||
|
}
|
||||||
|
printf( "\n" );
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
WXCONFIG=wx-config
|
||||||
|
INCLUDE=/usr/include/wx-2.8
|
||||||
|
|
||||||
|
g++ -I $INCLUDE $($WXCONFIG --cppflags) UTF8.cpp -o test $($WXCONFIG --libs)
|
||||||
|
|
Loading…
Reference in New Issue