diff --git a/cocos/2d/CCLabelTextFormatter.cpp b/cocos/2d/CCLabelTextFormatter.cpp index 9cbf5c4ddf..c826bed01b 100644 --- a/cocos/2d/CCLabelTextFormatter.cpp +++ b/cocos/2d/CCLabelTextFormatter.cpp @@ -90,8 +90,9 @@ int Label::getFirstWordLen(const std::u32string& utf32Text, int startIndex, int char32_t character = utf32Text[index]; if (character == StringUtils::UnicodeCharacters::NewLine - || StringUtils::isUnicodeSpace(character) - || StringUtils::isCJKUnicode(character)) + || (!StringUtils::isUnicodeNonBreaking(character) + && (StringUtils::isUnicodeSpace(character) + || StringUtils::isCJKUnicode(character)))) { break; } diff --git a/cocos/base/ccUTF8.cpp b/cocos/base/ccUTF8.cpp index 7459fcfb32..c3d8c14945 100644 --- a/cocos/base/ccUTF8.cpp +++ b/cocos/base/ccUTF8.cpp @@ -141,7 +141,7 @@ static void trimUTF32VectorFromIndex(std::vector& str, int index) * */ bool isUnicodeSpace(char32_t ch) { - return (ch >= 0x0009 && ch <= 0x000D) || ch == 0x0020 || ch == 0x0085 || ch == 0x1680 + return (ch >= 0x0009 && ch <= 0x000D) || ch == 0x0020 || ch == 0x0085 || ch == 0x00A0 || ch == 0x1680 || (ch >= 0x2000 && ch <= 0x200A) || ch == 0x2028 || ch == 0x2029 || ch == 0x202F || ch == 0x205F || ch == 0x3000; } @@ -158,7 +158,15 @@ bool isCJKUnicode(char32_t ch) || (ch >= 0x31C0 && ch <= 0x4DFF) // Other extensions || (ch >= 0x1f004 && ch <= 0x1f682);// Emoji } - + +bool isUnicodeNonBreaking(char32_t ch) +{ + return ch == 0x00A0 // Non-Breaking Space + || ch == 0x202F // Narrow Non-Breaking Space + || ch == 0x2007 // Figure Space + || ch == 0x2060; // Word Joiner +} + void trimUTF16Vector(std::vector& str) { int len = static_cast(str.size()); diff --git a/cocos/base/ccUTF8.h b/cocos/base/ccUTF8.h index 5638fc0329..ac27f62738 100644 --- a/cocos/base/ccUTF8.h +++ b/cocos/base/ccUTF8.h @@ -178,7 +178,20 @@ CC_DLL bool isUnicodeSpace(char32_t ch); * */ CC_DLL bool isCJKUnicode(char32_t ch); - + +/** + * @brief Whether the character is a non-breaking character. + * @param ch The unicode character. + * @returns Whether the character is a non-breaking character. + * + * @see https://en.wikipedia.org/wiki/Space_(punctuation)#Spaces_in_Unicode + * @see https://en.wikipedia.org/wiki/Non-breaking_space + * @see https://en.wikipedia.org/wiki/Figure_space + * @see https://en.wikipedia.org/wiki/Word_joiner + * + */ +CC_DLL bool isUnicodeNonBreaking(char32_t ch); + /** * @brief Returns the length of the string in characters. * @param utf8 An UTF-8 encoded string.