axmol/core/base/UTF8.h

274 lines
8.5 KiB
C
Raw Normal View History

2019-11-23 20:27:39 +08:00
/****************************************************************************
Copyright (c) 2014 cocos2d-x.org
Copyright (c) 2014-2016 Chukong Technologies Inc.
Copyright (c) 2017-2018 Xiamen Yaji Software Co., Ltd.
Copyright (c) 2021 Bytedance Inc.
2019-11-23 20:27:39 +08:00
2022-10-01 16:24:52 +08:00
https://axmolengine.github.io/
2019-11-23 20:27:39 +08:00
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
****************************************************************************/
#ifndef __cocos2dx__ccUTF8__
#define __cocos2dx__ccUTF8__
#include "platform/PlatformMacros.h"
2019-11-23 20:27:39 +08:00
#include <vector>
#include <string>
#include <sstream>
2022-07-16 10:43:05 +08:00
#if (AX_TARGET_PLATFORM == AX_PLATFORM_ANDROID)
2021-12-25 10:04:45 +08:00
# include "platform/android/jni/JniHelper.h"
2019-11-23 20:27:39 +08:00
#endif
NS_AX_BEGIN
2019-11-23 20:27:39 +08:00
2021-12-25 10:04:45 +08:00
namespace StringUtils
{
2019-11-23 20:27:39 +08:00
2021-12-25 10:04:45 +08:00
namespace UnicodeCharacters
{
const char32_t NewLine = 0x000A; // 10
const char32_t CarriageReturn = 0x000D; // 13
const char32_t NextCharNoChangeX = 0x0008; // 8
const char32_t Space = 0x0020; // 32
const char32_t NoBreakSpace = 0x00A0; // 160
} // namespace UnicodeCharacters
namespace AsciiCharacters
{
const char NewLine = '\n';
const char CarriageReturn = '\r';
const char NextCharNoChangeX = '\b';
const char Space = ' ';
} // namespace AsciiCharacters
2019-11-23 20:27:39 +08:00
2021-12-25 10:04:45 +08:00
template <typename T>
2019-11-23 20:27:39 +08:00
std::string toString(T arg)
{
std::stringstream ss;
ss << arg;
return ss.str();
}
2022-07-16 10:43:05 +08:00
std::string AX_DLL format(const char* format, ...) AX_FORMAT_PRINTF(1, 2);
std::string AX_DLL vformat(const char* format, va_list ap);
2019-11-23 20:27:39 +08:00
/**
* @brief Converts from UTF8 string to UTF16 string.
*
* This function resizes \p outUtf16 to required size and
* fill its contents with result UTF16 string if conversion success.
* If conversion fails it guarantees not to change \p outUtf16.
*
* @param inUtf8 The source UTF8 string to be converted from.
* @param outUtf16 The output string to hold the result UTF16s.
* @return True if succeed, otherwise false.
* @note Please check the return value before using \p outUtf16
* e.g.
* @code
* std::u16string utf16;
* bool ret = StringUtils::UTF8ToUTF16("你好hello", utf16);
* if (ret) {
* do_some_thing_with_utf16(utf16);
* }
* @endcode
*/
2022-07-16 10:43:05 +08:00
AX_DLL bool UTF8ToUTF16(std::string_view inUtf8, std::u16string& outUtf16);
2019-11-23 20:27:39 +08:00
/**
* @brief Same as \a UTF8ToUTF16 but converts form UTF8 to UTF32.
*
* @see UTF8ToUTF16
*/
2022-07-16 10:43:05 +08:00
AX_DLL bool UTF8ToUTF32(std::string_view inUtf8, std::u32string& outUtf32);
2019-11-23 20:27:39 +08:00
/**
* @brief Same as \a UTF8ToUTF16 but converts form UTF16 to UTF8.
*
* @see UTF8ToUTF16
*/
2022-07-16 10:43:05 +08:00
AX_DLL bool UTF16ToUTF8(std::u16string_view inUtf16, std::string& outUtf8);
2021-12-25 10:04:45 +08:00
2019-11-23 20:27:39 +08:00
/**
* @brief Same as \a UTF8ToUTF16 but converts form UTF16 to UTF32.
*
* @see UTF8ToUTF16
*/
2022-07-16 10:43:05 +08:00
AX_DLL bool UTF16ToUTF32(std::u16string_view inUtf16, std::u32string& outUtf32);
2019-11-23 20:27:39 +08:00
/**
* @brief Same as \a UTF8ToUTF16 but converts form UTF32 to UTF8.
*
* @see UTF8ToUTF16
*/
2022-07-16 10:43:05 +08:00
AX_DLL bool UTF32ToUTF8(std::u32string_view inUtf32, std::string& outUtf8);
2021-12-25 10:04:45 +08:00
2019-11-23 20:27:39 +08:00
/**
* @brief Same as \a UTF8ToUTF16 but converts form UTF32 to UTF16.
*
* @see UTF8ToUTF16
*/
2022-07-16 10:43:05 +08:00
AX_DLL bool UTF32ToUTF16(std::u32string_view inUtf32, std::u16string& outUtf16);
2019-11-23 20:27:39 +08:00
2022-07-16 10:43:05 +08:00
#if (AX_TARGET_PLATFORM == AX_PLATFORM_ANDROID)
2019-11-23 20:27:39 +08:00
/**
2021-12-25 10:04:45 +08:00
* @brief convert jstring to utf8 std::string, same function with env->getStringUTFChars.
* because getStringUTFChars can not pass special emoticon
* @param env The JNI Env
* @param srcjStr The jstring which want to convert
* @param ret True if the conversion succeeds and the ret pointer isn't null
* @returns the result of utf8 string
*/
2022-07-16 10:43:05 +08:00
AX_DLL std::string getStringUTFCharsJNI(JNIEnv* env, jstring srcjStr, bool* ret = nullptr);
2019-11-23 20:27:39 +08:00
/**
2021-12-25 10:04:45 +08:00
* @brief create a jstring with utf8 std::string, same function with env->newStringUTF
* because newStringUTF can not convert special emoticon
* @param env The JNI Env
* @param srcjStr The std::string which want to convert
* @param ret True if the conversion succeeds and the ret pointer isn't null
* @returns the result of jstring,the jstring need to DeleteLocalRef(jstring);
*/
2022-07-16 10:43:05 +08:00
AX_DLL jstring newStringUTFJNI(JNIEnv* env, std::string_view utf8Str, bool* ret = nullptr);
2019-11-23 20:27:39 +08:00
#endif
/**
* @brief Trims the unicode spaces at the end of char16_t vector.
*/
2022-07-16 10:43:05 +08:00
AX_DLL void trimUTF16Vector(std::vector<char16_t>& str);
2021-12-25 10:04:45 +08:00
2019-11-23 20:27:39 +08:00
/**
* @brief Trims the unicode spaces at the end of char32_t vector.
*/
2022-07-16 10:43:05 +08:00
AX_DLL void trimUTF32Vector(std::vector<char32_t>& str);
2019-11-23 20:27:39 +08:00
/**
* @brief Whether the character is a whitespace character.
* @param ch The unicode character.
* @returns Whether the character is a white space character.
*
* @see http://en.wikipedia.org/wiki/Whitespace_character#Unicode
*
*/
2022-07-16 10:43:05 +08:00
AX_DLL bool isUnicodeSpace(char32_t ch);
2019-11-23 20:27:39 +08:00
/**
* @brief Whether the character is a Chinese/Japanese/Korean character.
* @param ch The unicode character.
* @returns Whether the character is a Chinese character.
*
* @see http://www.searchtb.com/2012/04/chinese_encode.html
* @see http://tieba.baidu.com/p/748765987
*
*/
2022-07-16 10:43:05 +08:00
AX_DLL bool isCJKUnicode(char32_t ch);
2021-12-25 10:04:45 +08:00
2019-11-23 20:27:39 +08:00
/**
* @brief Whether the character is a non-breaking character.
* @param ch The unicode character.
* @returns Whether the character is a non-breaking character.
*
* @see https://en.wikipedia.org/wiki/Space_(punctuation)#Spaces_in_Unicode
* @see https://en.wikipedia.org/wiki/Non-breaking_space
* @see https://en.wikipedia.org/wiki/Figure_space
* @see https://en.wikipedia.org/wiki/Word_joiner
*
*/
2022-07-16 10:43:05 +08:00
AX_DLL bool isUnicodeNonBreaking(char32_t ch);
2021-12-25 10:04:45 +08:00
2019-11-23 20:27:39 +08:00
/**
* @brief Returns the length of the string in characters.
* @param utf8 An UTF-8 encoded string.
* @returns The length of the string in characters.
*/
2022-07-16 10:43:05 +08:00
AX_DLL int32_t getCharacterCountInUTF8String(std::string_view utf8);
2019-11-23 20:27:39 +08:00
/**
* @brief Gets the index of the last character that is not equal to the character given.
* @param str The string to be searched.
* @param c The character to be searched for.
* @returns The index of the last character that is not \p c.
*/
2022-07-16 10:43:05 +08:00
AX_DLL unsigned int getIndexOfLastNotChar16(const std::vector<char16_t>& str, char16_t c);
2019-11-23 20:27:39 +08:00
/**
* @brief Gets char16_t vector from a given utf16 string.
*/
2022-07-16 10:43:05 +08:00
AX_DLL std::vector<char16_t> getChar16VectorFromUTF16String(const std::u16string& utf16);
2019-11-23 20:27:39 +08:00
2021-06-01 11:47:19 +08:00
/**
* @brief Whether has non-ascii utf-8 characters
*/
2022-07-16 10:43:05 +08:00
AX_DLL bool hasNonAsciiUTF8(const char* str, size_t len);
2019-11-23 20:27:39 +08:00
/**
* @brief Whether contains utf-8 or all characters are ascii
*/
2022-07-16 10:43:05 +08:00
AX_DLL bool detectNonAsciiUTF8(const char* str, size_t len, bool restrictUTF8, bool* pAllCharsAreAscii);
2021-06-01 11:47:19 +08:00
/**
* @brief isLegalUTF8String, contains ascii characters
*/
2022-07-16 10:43:05 +08:00
AX_DLL bool isLegalUTF8String(const char* str, size_t len);
2019-11-23 20:27:39 +08:00
/**
2021-12-25 10:04:45 +08:00
* Utf8 sequence
* Store all utf8 chars as std::string
* Build from std::string
*/
2022-07-16 10:43:05 +08:00
class AX_DLL StringUTF8
2019-11-23 20:27:39 +08:00
{
public:
struct CharUTF8
{
std::string _char;
bool isASCII() const { return _char.size() == 1; }
};
typedef std::vector<CharUTF8> CharUTF8Store;
StringUTF8();
StringUTF8(std::string_view newStr);
2019-11-23 20:27:39 +08:00
~StringUTF8();
std::size_t length() const;
void replace(std::string_view newStr);
2019-11-23 20:27:39 +08:00
std::string getAsCharSequence() const;
std::string getAsCharSequence(std::size_t pos) const;
std::string getAsCharSequence(std::size_t pos, std::size_t len) const;
bool deleteChar(std::size_t pos);
bool insert(std::size_t pos, std::string_view insertStr);
2019-11-23 20:27:39 +08:00
bool insert(std::size_t pos, const StringUTF8& insertStr);
CharUTF8Store& getString() { return _str; }
const CharUTF8Store& getString() const { return _str; }
private:
CharUTF8Store _str;
};
2021-12-25 10:04:45 +08:00
} // namespace StringUtils
2019-11-23 20:27:39 +08:00
NS_AX_END
2019-11-23 20:27:39 +08:00
#endif /** defined(__cocos2dx__ccUTF8__) */