Overhaul UTF modules and add full set of UTFs conversion functions.

This commit is contained in:
Xpol Wan 2015-11-21 11:00:14 +08:00
parent a789e31605
commit e145255479
2 changed files with 118 additions and 49 deletions

View File

@ -116,43 +116,94 @@ void trimUTF16Vector(std::vector<char16_t>& str)
}
}
bool UTF8ToUTF16(const std::string& utf8, std::u16string& outUtf16)
template <typename T>
struct ConvertTrait {
typedef T ArgType;
};
template <>
struct ConvertTrait<char> {
typedef UTF8 ArgType;
};
template <>
struct ConvertTrait<char16_t> {
typedef UTF16 ArgType;
};
template <>
struct ConvertTrait<char32_t> {
typedef UTF32 ArgType;
};
template <typename From, typename To, typename FromTrait = ConvertTrait<From>, typename ToTrait = ConvertTrait<To>>
bool utfConvert(
const std::basic_string<From>& from, std::basic_string<To>& to,
ConversionResult(*cvtfunc)(const typename FromTrait::ArgType**, const typename FromTrait::ArgType*,
typename ToTrait::ArgType**, typename ToTrait::ArgType*,
ConversionFlags)
)
{
if (utf8.empty())
static_assert(sizeof(From) == sizeof(typename FromTrait::ArgType), "Error size mismatched");
static_assert(sizeof(To) == sizeof(typename ToTrait::ArgType), "Error size mismatched");
if (from.empty())
{
outUtf16.clear();
to.clear();
return true;
}
bool ret = false;
const size_t utf16Bytes = (utf8.length()+1) * sizeof(char16_t);
char16_t* utf16 = (char16_t*)malloc(utf16Bytes);
memset(utf16, 0, utf16Bytes);
// See: http://unicode.org/faq/utf_bom.html#gen6
static const int most_bytes_per_character = 4;
char* utf16ptr = reinterpret_cast<char*>(utf16);
const UTF8* error = nullptr;
const size_t maxNumberOfChars = from.length(); // all UTFs at most one element represents one character.
const size_t numberOfOut = maxNumberOfChars * most_bytes_per_character / sizeof(To);
if (llvm::ConvertUTF8toWide(2, utf8, utf16ptr, error))
{
outUtf16 = utf16;
ret = true;
}
std::basic_string<To> working(numberOfOut, 0);
free(utf16);
auto inbeg = reinterpret_cast<const typename FromTrait::ArgType*>(&from[0]);
auto inend = inbeg + from.length();
return ret;
auto outbeg = reinterpret_cast<typename ToTrait::ArgType*>(&working[0]);
auto outend = outbeg + working.length();
auto r = cvtfunc(&inbeg, inend, &outbeg, outend, strictConversion);
if (r != conversionOK)
return false;
working.resize(reinterpret_cast<To*>(outbeg) - &working[0]);
to = std::move(working);
return true;
};
bool UTF8ToUTF16(const std::string& utf8, std::u16string& outUtf16)
{
return utfConvert(utf8, outUtf16, ConvertUTF8toUTF16);
}
bool UTF8ToUTF32(const std::string& utf8, std::u32string& outUtf32)
{
return utfConvert(utf8, outUtf32, ConvertUTF8toUTF32);
}
bool UTF16ToUTF8(const std::u16string& utf16, std::string& outUtf8)
{
if (utf16.empty())
{
outUtf8.clear();
return true;
}
return utfConvert(utf16, outUtf8, ConvertUTF16toUTF8);
}
bool UTF16ToUTF32(const std::u16string& utf16, std::u32string& outUtf32)
{
return utfConvert(utf16, outUtf32, ConvertUTF16toUTF32);
}
return llvm::convertUTF16ToUTF8String(utf16, outUtf8);
bool UTF32ToUTF8(const std::u32string& utf32, std::string& outUtf8)
{
return utfConvert(utf32, outUtf8, ConvertUTF32toUTF8);
}
bool UTF32ToUTF16(const std::u32string& utf32, std::u16string& outUtf16)
{
return utfConvert(utf32, outUtf16, ConvertUTF32toUTF16);
}
#if (CC_TARGET_PLATFORM == CC_PLATFORM_ANDROID)
@ -198,14 +249,7 @@ jstring newStringUTFJNI(JNIEnv* env, std::string utf8Str, bool* ret)
std::vector<char16_t> getChar16VectorFromUTF16String(const std::u16string& utf16)
{
std::vector<char16_t> ret;
size_t len = utf16.length();
ret.reserve(len);
for (size_t i = 0; i < len; ++i)
{
ret.push_back(utf16[i]);
}
return ret;
return std::vector<char16_t>(utf16.begin(), utf16.end());
}
long getCharacterCountInUTF8String(const std::string& utf8)

View File

@ -39,9 +39,14 @@ NS_CC_BEGIN
namespace StringUtils {
/**
* @brief Converts utf8 string to utf16 string.
* @param utf8 The utf8 string to be converted.
* @param outUtf16 The output utf16 string.
* @brief Converts from UTF8 string to UTF16 string.
*
* This function resizes \p outUtf16 to required size and
* fill its contents with result UTF16 string if convertion success.
* If convertion fails it guarantees not to change \p outUtf16.
*
* @param inUtf8 The source UTF8 string to be converted from.
* @param outUtf16 The output string to hold the result UTF16s.
* @return True if succeed, otherwise false.
* @note Please check the return value before using \p outUtf16
* e.g.
@ -53,24 +58,44 @@ namespace StringUtils {
* }
* @endcode
*/
CC_DLL bool UTF8ToUTF16(const std::string& utf8, std::u16string& outUtf16);
CC_DLL bool UTF8ToUTF16(const std::string& inUtf8, std::u16string& outUtf16);
/**
* @brief Converts utf16 string to utf8 string.
* @param utf16 The utf16 string to be converted.
* @param outUtf8 The output utf8 string.
* @return True if succeed, otherwise false.
* @note Please check the return value before using \p outUtf8
* e.g.
* @code
* std::string utf8;
* bool ret = StringUtils::UTF16ToUTF8(u"\u4f60\u597d", utf16);
* if (ret) {
* do_some_thing_with_utf8(utf8);
* }
* @endcode
* @brief Same as \a UTF8ToUTF16 but converts form UTF8 to UTF32.
*
* @see UTF8ToUTF16
*/
CC_DLL bool UTF16ToUTF8(const std::u16string& utf16, std::string& outUtf8);
CC_DLL bool UTF8ToUTF32(const std::string& inUtf8, std::u32string& outUtf32);
/**
* @brief Same as \a UTF8ToUTF16 but converts form UTF16 to UTF8.
*
* @see UTF8ToUTF16
*/
CC_DLL bool UTF16ToUTF8(const std::u16string& inUtf16, std::string& outUtf8);
/**
* @brief Same as \a UTF8ToUTF16 but converts form UTF16 to UTF32.
*
* @see UTF8ToUTF16
*/
CC_DLL bool UTF16ToUTF32(const std::u16string& inUtf16, std::u32string& outUtf32);
/**
* @brief Same as \a UTF8ToUTF16 but converts form UTF32 to UTF8.
*
* @see UTF8ToUTF16
*/
CC_DLL bool UTF32ToUTF8(const std::u32string& inUtf32, std::string& outUtf8);
/**
* @brief Same as \a UTF8ToUTF16 but converts form UTF32 to UTF16.
*
* @see UTF8ToUTF16
*/
CC_DLL bool UTF32ToUTF16(const std::u32string& inUtf32, std::u16string& outUtf16);
#if (CC_TARGET_PLATFORM == CC_PLATFORM_ANDROID)