kicad/thirdparty/compoundfilereader/utf.h

138 lines
2.9 KiB
C++

#pragma once
#include <stdint.h>
#include <string>
template <typename T>
static bool GetNextCodePointFromUTF16z(const T* u16, size_t* pos, uint32_t* cp)
{
*cp = static_cast<uint32_t>(u16[*pos]);
if (*cp == 0)
return false;
(*pos)++;
if ((*cp & 0xFC00) == 0xD800)
{
uint16_t cp2 = static_cast<uint16_t>(u16[*pos]);
if ((cp2 & 0xFC00) == 0xDC00)
{
(*pos)++;
*cp = (*cp << 10) + cp2 - 0x35FDC00;
}
}
return true;
}
template <typename T>
static bool GetNextCodePointFromUTF16(const T* u16, size_t len, size_t* pos, uint32_t* cp)
{
if (len == 0)
return GetNextCodePointFromUTF16z(u16, pos, cp);
if (*pos >= len)
return false;
*cp = static_cast<uint32_t>(u16[*pos]);
(*pos)++;
if ((*cp & 0xFC00) == 0xD800)
{
if (*pos < len)
{
uint16_t cp2 = static_cast<uint16_t>(u16[*pos]);
if ((cp2 & 0xFC00) == 0xDC00)
{
(*pos)++;
*cp = (*cp << 10) + cp2 - 0x35FDC00;
}
}
}
return true;
}
static int CodePointToUTF8(uint32_t cp, uint32_t* c1, uint32_t* c2, uint32_t* c3, uint32_t* c4)
{
if (cp < 0x80)
{
*c1 = cp;
return 1;
}
else if (cp <= 0x7FF)
{
*c1 = (cp >> 6) + 0xC0;
*c2 = (cp & 0x3F) + 0x80;
return 2;
}
else if (cp <= 0xFFFF)
{
*c1 = (cp >> 12) + 0xE0;
*c2 = ((cp >> 6) & 0x3F) + 0x80;
*c3 = (cp & 0x3F) + 0x80;
return 3;
}
else if (cp <= 0x10FFFF)
{
*c1 = (cp >> 18) + 0xF0;
*c2 = ((cp >> 12) & 0x3F) + 0x80;
*c3 = ((cp >> 6) & 0x3F) + 0x80;
*c4 = (cp & 0x3F) + 0x80;
return 4;
}
return 0;
}
template <typename T>
std::string UTF16ToUTF8(const T* u16, size_t len = 0)
{
std::string u8;
uint32_t cp;
size_t pos = 0;
while (GetNextCodePointFromUTF16(u16, len, &pos, &cp))
{
uint32_t c[4];
int count = CodePointToUTF8(cp, c, c+1, c+2, c+3);
for (int i = 0; i < count; i++)
{
u8 += static_cast<char>(c[i]);
}
}
return u8;
}
template <typename T>
std::wstring UTF16ToWstring(const T* u16, size_t len = 0)
{
std::wstring ret;
#ifdef _MSC_VER
while (*u16) ret += *u16++;
#else
uint32_t cp;
size_t pos = 0;
while (GetNextCodePointFromUTF16(u16, len, &pos, &cp))
{
ret += cp;
}
#endif
return ret;
}
template <typename T>
std::string WstringToUTF8(const T* wstr)
{
#ifdef _MSC_VER
return UTF16ToUTF8(wstr);
#else
std::string u8;
uint32_t cp;
while ((cp = *wstr++) != 0)
{
uint32_t c[4];
int count = CodePointToUTF8(cp, c, c+1, c+2, c+3);
for (int i = 0; i < count; i++)
{
u8 += static_cast<char>(c[i]);
}
}
return u8;
#endif
}