diff --git a/cocos/base/ccUTF8.cpp b/cocos/base/ccUTF8.cpp index 39b8223e43..1c7e6f9eed 100644 --- a/cocos/base/ccUTF8.cpp +++ b/cocos/base/ccUTF8.cpp @@ -36,8 +36,7 @@ NS_CC_BEGIN namespace StringUtils { -std::string CC_DLL format(const char* format, ...) -{ +std::string CC_DLL format(const char* format, ...) { va_list args; va_start(args, format); auto ret = vformat(format, args); @@ -46,12 +45,13 @@ std::string CC_DLL format(const char* format, ...) } /*--- This a C++ universal sprintf in the future. ** @pitfall: The behavior of vsnprintf between VS2013 and VS2015/2017 is different -** VS2013 or Unix-Like System will return -1 when buffer not enough, but VS2015/2017 will return the actural needed length for buffer at this station +** VS2013 or Unix-Like System will return -1 when buffer not enough, but VS2015/2017 will return the actural needed +*length for buffer at this station ** The _vsnprintf behavior is compatible API which always return -1 when buffer isn't enough at VS2013/2015/2017 -** Yes, The vsnprintf is more efficient implemented by MSVC 19.0 or later, AND it's also standard-compliant, see reference: http://www.cplusplus.com/reference/cstdio/vsnprintf/ +** Yes, The vsnprintf is more efficient implemented by MSVC 19.0 or later, AND it's also standard-compliant, see +*reference: http://www.cplusplus.com/reference/cstdio/vsnprintf/ */ -std::string vformat(const char* format, va_list ap) -{ +std::string vformat(const char* format, va_list ap) { #define CC_VSNPRINTF_BUFFER_LENGTH 512 std::string buf(CC_VSNPRINTF_BUFFER_LENGTH, '\0'); @@ -60,14 +60,10 @@ std::string vformat(const char* format, va_list ap) int nret = vsnprintf(&buf.front(), buf.length() + 1, format, args); va_end(args); - if (nret >= 0) - { - if ((unsigned int)nret < buf.length()) - { + if (nret >= 0) { + if ((unsigned int) nret < buf.length()) { buf.resize(nret); - } - else if ((unsigned int)nret > buf.length()) - { // handle return required length when buffer insufficient + } else if ((unsigned int) nret > buf.length()) { // handle return required length when buffer insufficient buf.resize(nret); va_copy(args, ap); @@ -75,21 +71,17 @@ std::string vformat(const char* format, va_list ap) va_end(args); } // else equals, do nothing. - } - else - { // handle return -1 when buffer insufficient - /* - vs2013/older & glibc <= 2.0.6, they would return -1 when the output was truncated. - see: http://man7.org/linux/man-pages/man3/vsnprintf.3.html - */ -#if (defined(__linux__) && ((__GLIBC__ < 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ < 1)))) || \ -(defined(_MSC_VER) && _MSC_VER < 1900) - enum : size_t - { + } else { // handle return -1 when buffer insufficient + /* + vs2013/older & glibc <= 2.0.6, they would return -1 when the output was truncated. + see: http://man7.org/linux/man-pages/man3/vsnprintf.3.html + */ +#if (defined(__linux__) && ((__GLIBC__ < 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ < 1)))) \ + || (defined(_MSC_VER) && _MSC_VER < 1900) + enum : size_t { enlarge_limits = (1 << 20), // limits the buffer cost memory less than 2MB }; - do - { + do { buf.resize(buf.length() << 1); va_copy(args, ap); @@ -102,9 +94,9 @@ std::string vformat(const char* format, va_list ap) else buf = "strfmt: an error is encountered!"; #else - /* other standard implementation - see: http://www.cplusplus.com/reference/cstdio/vsnprintf/ - */ + /* other standard implementation + see: http://www.cplusplus.com/reference/cstdio/vsnprintf/ + */ buf = "strfmt: an error is encountered!"; #endif } @@ -118,13 +110,13 @@ std::string vformat(const char* format, va_list ap) * * Return value: the index of the last character that is not c. * */ -unsigned int getIndexOfLastNotChar16(const std::vector& str, char16_t c) -{ +unsigned int getIndexOfLastNotChar16(const std::vector& str, char16_t c) { int len = static_cast(str.size()); int i = len - 1; for (; i >= 0; --i) - if (str[i] != c) return i; + if (str[i] != c) + return i; return i; } @@ -137,15 +129,14 @@ unsigned int getIndexOfLastNotChar16(const std::vector& str, char16_t * * Return value: the trimmed string. * */ -static void trimUTF16VectorFromIndex(std::vector& str, int index) -{ +static void trimUTF16VectorFromIndex(std::vector& str, int index) { int size = static_cast(str.size()); if (index >= size || index < 0) return; str.erase(str.begin() + index, str.begin() + size); } - + /* * @str: the string to trim * @index: the index to start trimming from. @@ -154,12 +145,11 @@ static void trimUTF16VectorFromIndex(std::vector& str, int index) * * Return value: the trimmed string. * */ -static void trimUTF32VectorFromIndex(std::vector& str, int index) -{ +static void trimUTF32VectorFromIndex(std::vector& str, int index) { int size = static_cast(str.size()); if (index >= size || index < 0) return; - + str.erase(str.begin() + index, str.begin() + size); } @@ -170,48 +160,42 @@ static void trimUTF32VectorFromIndex(std::vector& str, int index) * * Return value: weather the character is a whitespace character. * */ -bool isUnicodeSpace(char32_t ch) -{ - return (ch >= 0x0009 && ch <= 0x000D) || ch == 0x0020 || ch == 0x0085 || ch == 0x00A0 || ch == 0x1680 - || (ch >= 0x2000 && ch <= 0x200A) || ch == 0x2028 || ch == 0x2029 || ch == 0x202F - || ch == 0x205F || ch == 0x3000; +bool isUnicodeSpace(char32_t ch) { + return (ch >= 0x0009 && ch <= 0x000D) || ch == 0x0020 || ch == 0x0085 || ch == 0x00A0 || ch == 0x1680 + || (ch >= 0x2000 && ch <= 0x200A) || ch == 0x2028 || ch == 0x2029 || ch == 0x202F || ch == 0x205F + || ch == 0x3000; } -bool isCJKUnicode(char32_t ch) -{ - return (ch >= 0x4E00 && ch <= 0x9FBF) // CJK Unified Ideographs - || (ch >= 0x2E80 && ch <= 0x2FDF) // CJK Radicals Supplement & Kangxi Radicals - || (ch >= 0x2FF0 && ch <= 0x30FF) // Ideographic Description Characters, CJK Symbols and Punctuation & Japanese - || (ch >= 0x3100 && ch <= 0x31BF) // Korean - || (ch >= 0xAC00 && ch <= 0xD7AF) // Hangul Syllables - || (ch >= 0xF900 && ch <= 0xFAFF) // CJK Compatibility Ideographs - || (ch >= 0xFE30 && ch <= 0xFE4F) // CJK Compatibility Forms - || (ch >= 0x31C0 && ch <= 0x4DFF) // Other extensions - || (ch >= 0x1f004 && ch <= 0x1f682);// Emoji +bool isCJKUnicode(char32_t ch) { + return (ch >= 0x4E00 && ch <= 0x9FBF) // CJK Unified Ideographs + || (ch >= 0x2E80 && ch <= 0x2FDF) // CJK Radicals Supplement & Kangxi Radicals + || (ch >= 0x2FF0 && ch <= 0x30FF) // Ideographic Description Characters, CJK Symbols and Punctuation & Japanese + || (ch >= 0x3100 && ch <= 0x31BF) // Korean + || (ch >= 0xAC00 && ch <= 0xD7AF) // Hangul Syllables + || (ch >= 0xF900 && ch <= 0xFAFF) // CJK Compatibility Ideographs + || (ch >= 0xFE30 && ch <= 0xFE4F) // CJK Compatibility Forms + || (ch >= 0x31C0 && ch <= 0x4DFF) // Other extensions + || (ch >= 0x1f004 && ch <= 0x1f682); // Emoji } - -bool isUnicodeNonBreaking(char32_t ch) -{ - return ch == 0x00A0 // Non-Breaking Space - || ch == 0x202F // Narrow Non-Breaking Space - || ch == 0x2007 // Figure Space - || ch == 0x2060; // Word Joiner + +bool isUnicodeNonBreaking(char32_t ch) { + return ch == 0x00A0 // Non-Breaking Space + || ch == 0x202F // Narrow Non-Breaking Space + || ch == 0x2007 // Figure Space + || ch == 0x2060; // Word Joiner } - -void trimUTF16Vector(std::vector& str) -{ + +void trimUTF16Vector(std::vector& str) { int len = static_cast(str.size()); - if ( len <= 0 ) + if (len <= 0) return; int last_index = len - 1; // Only start trimming if the last character is whitespace.. - if (isUnicodeSpace(str[last_index])) - { - for (int i = last_index - 1; i >= 0; --i) - { + if (isUnicodeSpace(str[last_index])) { + for (int i = last_index - 1; i >= 0; --i) { if (isUnicodeSpace(str[i])) last_index = i; else @@ -221,27 +205,24 @@ void trimUTF16Vector(std::vector& str) trimUTF16VectorFromIndex(str, last_index); } } - -void trimUTF32Vector(std::vector& str) -{ + +void trimUTF32Vector(std::vector& str) { int len = static_cast(str.size()); - - if ( len <= 0 ) + + if (len <= 0) return; - + int last_index = len - 1; - + // Only start trimming if the last character is whitespace.. - if (isUnicodeSpace(str[last_index])) - { - for (int i = last_index - 1; i >= 0; --i) - { + if (isUnicodeSpace(str[last_index])) { + for (int i = last_index - 1; i >= 0; --i) { if (isUnicodeSpace(str[i])) last_index = i; else break; } - + trimUTF32VectorFromIndex(str, last_index); } } @@ -265,18 +246,13 @@ struct ConvertTrait { }; template , typename ToTrait = ConvertTrait> -bool utfConvert( - const std::basic_string& from, std::basic_string& to, - ConversionResult(*cvtfunc)(const typename FromTrait::ArgType**, const typename FromTrait::ArgType*, - typename ToTrait::ArgType**, typename ToTrait::ArgType*, - ConversionFlags) - ) -{ +bool utfConvert(const std::basic_string& from, std::basic_string& to, + ConversionResult (*cvtfunc)(const typename FromTrait::ArgType**, const typename FromTrait::ArgType*, + typename ToTrait::ArgType**, typename ToTrait::ArgType*, ConversionFlags)) { static_assert(sizeof(From) == sizeof(typename FromTrait::ArgType), "Error size mismatched"); static_assert(sizeof(To) == sizeof(typename ToTrait::ArgType), "Error size mismatched"); - if (from.empty()) - { + if (from.empty()) { to.clear(); return true; } @@ -285,7 +261,7 @@ bool utfConvert( static const int most_bytes_per_character = 4; const size_t maxNumberOfChars = from.length(); // all UTFs at most one element represents one character. - const size_t numberOfOut = maxNumberOfChars * most_bytes_per_character / sizeof(To); + const size_t numberOfOut = maxNumberOfChars * most_bytes_per_character / sizeof(To); std::basic_string working(numberOfOut, 0); @@ -295,7 +271,7 @@ bool utfConvert( auto outbeg = reinterpret_cast(&working[0]); auto outend = outbeg + working.length(); - auto r = cvtfunc(&inbeg, inend, &outbeg, outend, strictConversion); + auto r = cvtfunc(&inbeg, inend, &outbeg, outend, strictConversion); if (r != conversionOK) return false; @@ -306,60 +282,47 @@ bool utfConvert( }; -bool UTF8ToUTF16(const std::string& utf8, std::u16string& outUtf16) -{ +bool UTF8ToUTF16(const std::string& utf8, std::u16string& outUtf16) { return utfConvert(utf8, outUtf16, ConvertUTF8toUTF16); } -bool UTF8ToUTF32(const std::string& utf8, std::u32string& outUtf32) -{ +bool UTF8ToUTF32(const std::string& utf8, std::u32string& outUtf32) { return utfConvert(utf8, outUtf32, ConvertUTF8toUTF32); } -bool UTF16ToUTF8(const std::u16string& utf16, std::string& outUtf8) -{ +bool UTF16ToUTF8(const std::u16string& utf16, std::string& outUtf8) { return utfConvert(utf16, outUtf8, ConvertUTF16toUTF8); } - -bool UTF16ToUTF32(const std::u16string& utf16, std::u32string& outUtf32) -{ + +bool UTF16ToUTF32(const std::u16string& utf16, std::u32string& outUtf32) { return utfConvert(utf16, outUtf32, ConvertUTF16toUTF32); } -bool UTF32ToUTF8(const std::u32string& utf32, std::string& outUtf8) -{ +bool UTF32ToUTF8(const std::u32string& utf32, std::string& outUtf8) { return utfConvert(utf32, outUtf8, ConvertUTF32toUTF8); } -bool UTF32ToUTF16(const std::u32string& utf32, std::u16string& outUtf16) -{ +bool UTF32ToUTF16(const std::u32string& utf32, std::u16string& outUtf16) { return utfConvert(utf32, outUtf16, ConvertUTF32toUTF16); } -#if (CC_TARGET_PLATFORM == CC_PLATFORM_ANDROID) -std::string getStringUTFCharsJNI(JNIEnv* env, jstring srcjStr, bool* ret) -{ +#if (CC_TARGET_PLATFORM == CC_PLATFORM_ANDROID) +std::string getStringUTFCharsJNI(JNIEnv* env, jstring srcjStr, bool* ret) { std::string utf8Str; - if(srcjStr != nullptr && env != nullptr) - { - const unsigned short * unicodeChar = ( const unsigned short *)env->GetStringChars(srcjStr, nullptr); - size_t unicodeCharLength = env->GetStringLength(srcjStr); - const std::u16string unicodeStr((const char16_t *)unicodeChar, unicodeCharLength); + if (srcjStr != nullptr && env != nullptr) { + const unsigned short* unicodeChar = (const unsigned short*) env->GetStringChars(srcjStr, nullptr); + size_t unicodeCharLength = env->GetStringLength(srcjStr); + const std::u16string unicodeStr((const char16_t*) unicodeChar, unicodeCharLength); bool flag = UTF16ToUTF8(unicodeStr, utf8Str); - if (ret) - { + if (ret) { *ret = flag; } - if (!flag) - { + if (!flag) { utf8Str = ""; } env->ReleaseStringChars(srcjStr, unicodeChar); - } - else - { - if (ret) - { + } else { + if (ret) { *ret = false; } utf8Str = ""; @@ -367,95 +330,90 @@ std::string getStringUTFCharsJNI(JNIEnv* env, jstring srcjStr, bool* ret) return utf8Str; } -jstring newStringUTFJNI(JNIEnv* env, const std::string& utf8Str, bool* ret) -{ +jstring newStringUTFJNI(JNIEnv* env, const std::string& utf8Str, bool* ret) { std::u16string utf16Str; bool flag = cocos2d::StringUtils::UTF8ToUTF16(utf8Str, utf16Str); - if (ret) - { + if (ret) { *ret = flag; } - if(!flag) - { + if (!flag) { utf16Str.clear(); } - jstring stringText = env->NewString((const jchar*)utf16Str.data(), utf16Str.length()); + jstring stringText = env->NewString((const jchar*) utf16Str.data(), utf16Str.length()); return stringText; } #endif -std::vector getChar16VectorFromUTF16String(const std::u16string& utf16) -{ +std::vector getChar16VectorFromUTF16String(const std::u16string& utf16) { return std::vector(utf16.begin(), utf16.end()); } -long getCharacterCountInUTF8String(const std::string& utf8) -{ - return getUTF8StringLength((const UTF8*)utf8.c_str()); +long getCharacterCountInUTF8String(const std::string& utf8) { + return getUTF8StringLength((const UTF8*) utf8.c_str()); } bool hasNonAsciiUTF8(const char* str, size_t len) { - for (size_t i = 0; i < len;) - { + return detectNonAsciiUTF8(str, len, nullptr); +} + +bool detectNonAsciiUTF8(const char* str, size_t len, bool* pAllCharsAreAscii) { + bool allCharsAreAscii = true; + bool nonAsciiUTF8Found = false; + for (size_t i = 0; i < len;) { int numByte = getNumBytesForUTF8(str[i]); - if (numByte > 1) - { // byte=1, is ascii character - if (isLegalUTF8Sequence((const UTF8*) &str[i], (const UTF8*) &str[i] + numByte)) - return true; + if (isLegalUTF8Sequence((const UTF8*) &str[i], (const UTF8*) &str[i] + numByte)) { + if (numByte > 1) { + nonAsciiUTF8Found = true; + allCharsAreAscii = false; + break; + } + } else { // not a valid utf-8 chars + allCharsAreAscii = false; } i += numByte; } - return false; -} + if (pAllCharsAreAscii) + *pAllCharsAreAscii = allCharsAreAscii; + + return nonAsciiUTF8Found; +} bool isLegalUTF8String(const char* str, size_t len) { - return ::isLegalUTF8String((const UTF8**)&str, (const UTF8*)str + len); + return ::isLegalUTF8String((const UTF8**) &str, (const UTF8*) str + len); } -StringUTF8::StringUTF8() -{ +StringUTF8::StringUTF8() {} -} - -StringUTF8::StringUTF8(const std::string& newStr) -{ +StringUTF8::StringUTF8(const std::string& newStr) { replace(newStr); } -StringUTF8::~StringUTF8() -{ +StringUTF8::~StringUTF8() {} -} - -std::size_t StringUTF8::length() const -{ +std::size_t StringUTF8::length() const { return _str.size(); } -void StringUTF8::replace(const std::string& newStr) -{ +void StringUTF8::replace(const std::string& newStr) { _str.clear(); - if (!newStr.empty()) - { - UTF8* sequenceUtf8 = (UTF8*)newStr.c_str(); + if (!newStr.empty()) { + UTF8* sequenceUtf8 = (UTF8*) newStr.c_str(); int lengthString = getUTF8StringLength(sequenceUtf8); - if (lengthString == 0) - { + if (lengthString == 0) { CCLOG("Bad utf-8 set string: %s", newStr.c_str()); return; } - while (*sequenceUtf8) - { + while (*sequenceUtf8) { std::size_t lengthChar = getNumBytesForUTF8(*sequenceUtf8); CharUTF8 charUTF8; - charUTF8._char.append((char*)sequenceUtf8, lengthChar); + charUTF8._char.append((char*) sequenceUtf8, lengthChar); sequenceUtf8 += lengthChar; _str.push_back(charUTF8); @@ -463,68 +421,54 @@ void StringUTF8::replace(const std::string& newStr) } } -std::string StringUTF8::getAsCharSequence() const -{ +std::string StringUTF8::getAsCharSequence() const { return getAsCharSequence(0, std::numeric_limits::max()); } -std::string StringUTF8::getAsCharSequence(std::size_t pos) const -{ +std::string StringUTF8::getAsCharSequence(std::size_t pos) const { return getAsCharSequence(pos, std::numeric_limits::max()); } -std::string StringUTF8::getAsCharSequence(std::size_t pos, std::size_t len) const -{ +std::string StringUTF8::getAsCharSequence(std::size_t pos, std::size_t len) const { std::string charSequence; std::size_t maxLen = _str.size() - pos; - if (len > maxLen) - { + if (len > maxLen) { len = maxLen; } std::size_t endPos = len + pos; - while (pos < endPos) - { + while (pos < endPos) { charSequence.append(_str[pos++]._char); } return charSequence; } -bool StringUTF8::deleteChar(std::size_t pos) -{ - if (pos < _str.size()) - { +bool StringUTF8::deleteChar(std::size_t pos) { + if (pos < _str.size()) { _str.erase(_str.begin() + pos); return true; - } - else - { + } else { return false; } } -bool StringUTF8::insert(std::size_t pos, const std::string& insertStr) -{ +bool StringUTF8::insert(std::size_t pos, const std::string& insertStr) { StringUTF8 utf8(insertStr); return insert(pos, utf8); } -bool StringUTF8::insert(std::size_t pos, const StringUTF8& insertStr) -{ - if (pos <= _str.size()) - { +bool StringUTF8::insert(std::size_t pos, const StringUTF8& insertStr) { + if (pos <= _str.size()) { _str.insert(_str.begin() + pos, insertStr._str.begin(), insertStr._str.end()); return true; - } - else - { + } else { return false; } } -} //namespace StringUtils { +} // namespace StringUtils NS_CC_END diff --git a/cocos/base/ccUTF8.h b/cocos/base/ccUTF8.h index bc4bbf666a..3da4f327de 100644 --- a/cocos/base/ccUTF8.h +++ b/cocos/base/ccUTF8.h @@ -218,6 +218,11 @@ CC_DLL std::vector getChar16VectorFromUTF16String(const std::u16string */ CC_DLL bool hasNonAsciiUTF8(const char* str, size_t len); +/** + * @brief Whether contains utf-8 or all characters are ascii + */ +CC_DLL bool detectNonAsciiUTF8(const char* str, size_t len, bool* pAllCharsAreAscii); + /** * @brief isLegalUTF8String, contains ascii characters */