issue #4660: Refactors utf8 api, make it safer and easier to use.

2014-05-08 11:10:54 +08:00 · 2014-05-08 11:10:54 +08:00 · 20a8808b78
parent 42ffd25fca
commit 20a8808b78
2 changed files with 321 additions and 434 deletions
--- a/cocos/2d/ccUTF8.cpp
+++ b/cocos/2d/ccUTF8.cpp
@ -1,127 +1,36 @@
-/* 
+/****************************************************************************
- * This file uses some implementations of gutf8.c in glib.
+ Copyright (c) 2014 cocos2d-x.org
- *
+ Copyright (c) 2014 Chukong Technologies Inc.
- * gutf8.c - Operations on UTF-8 strings.
+
- *
+ http://www.cocos2d-x.org
- * Copyright (C) 1999      Tom Tromey
+
- * Copyright (C) 2000      Red Hat, Inc.
+ Permission is hereby granted, free of charge, to any person obtaining a copy
- * Copyright (c) 2013-2014 Chukong Technologies Inc.
+ of this software and associated documentation files (the "Software"), to deal
- *
+ in the Software without restriction, including without limitation the rights
- * This library is free software; you can redistribute it and/or
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * modify it under the terms of the GNU Lesser General Public
+ copies of the Software, and to permit persons to whom the Software is
- * License as published by the Free Software Foundation; either
+ furnished to do so, subject to the following conditions:
- * version 2 of the License, or (at your option) any later version.
+
- *
+ The above copyright notice and this permission notice shall be included in
- * This library is distributed in the hope that it will be useful,
+ all copies or substantial portions of the Software.
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
+
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * Lesser General Public License for more details.
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- *
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * You should have received a copy of the GNU Lesser General Public
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * License along with this library; if not, write to the
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * Boston, MA 02111-1307, USA.
+ THE SOFTWARE.
- */
+ ****************************************************************************/
 #include "ccUTF8.h"
 #include "2d/platform/CCCommon.h"
 #include "base/CCConsole.h"
 #include "ConvertUTF.h"
 NS_CC_BEGIN
-int cc_wcslen(const unsigned short* str)
+namespace StringUtils {
 {
    int i=0;
    while(*str++) i++;
    return i;
 }
 /* Code from GLIB gutf8.c starts here. */
 #define UTF8_COMPUTE(Char, Mask, Len)        \
 if (Char < 128)                \
 {                        \
 Len = 1;                    \
 Mask = 0x7f;                \
 }                        \
 else if ((Char & 0xe0) == 0xc0)        \
 {                        \
 Len = 2;                    \
 Mask = 0x1f;                \
 }                        \
 else if ((Char & 0xf0) == 0xe0)        \
 {                        \
 Len = 3;                    \
 Mask = 0x0f;                \
 }                        \
 else if ((Char & 0xf8) == 0xf0)        \
 {                        \
 Len = 4;                    \
 Mask = 0x07;                \
 }                        \
 else if ((Char & 0xfc) == 0xf8)        \
 {                        \
 Len = 5;                    \
 Mask = 0x03;                \
 }                        \
 else if ((Char & 0xfe) == 0xfc)        \
 {                        \
 Len = 6;                    \
 Mask = 0x01;                \
 }                        \
 else                        \
 Len = -1;
 #define UTF8_LENGTH(Char)            \
 ((Char) < 0x80 ? 1 :                \
 ((Char) < 0x800 ? 2 :            \
 ((Char) < 0x10000 ? 3 :            \
 ((Char) < 0x200000 ? 4 :            \
 ((Char) < 0x4000000 ? 5 : 6)))))
 #define UTF8_GET(Result, Chars, Count, Mask, Len)    \
 (Result) = (Chars)[0] & (Mask);            \
 for ((Count) = 1; (Count) < (Len); ++(Count))        \
 {                            \
 if (((Chars)[(Count)] & 0xc0) != 0x80)        \
 {                        \
 (Result) = -1;                \
 break;                    \
 }                        \
 (Result) <<= 6;                    \
 (Result) |= ((Chars)[(Count)] & 0x3f);        \
 }
 #define UNICODE_VALID(Char)            \
 ((Char) < 0x110000 &&                \
 (((Char) & 0xFFFFF800) != 0xD800) &&        \
 ((Char) < 0xFDD0 || (Char) > 0xFDEF) &&    \
 ((Char) & 0xFFFE) != 0xFFFE)
 static const char utf8_skip_data[256] = {
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    2, 2, 2, 2, 2, 2, 2,
    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5,
    5, 5, 5, 6, 6, 1, 1
 };
 static const char *const g_utf8_skip = utf8_skip_data;
 #define cc_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(unsigned char *)(p)])
 /*
 * @str:    the string to search through.
@ -129,14 +38,14 @@ static const char *const g_utf8_skip = utf8_skip_data;
 *
 * Return value: the index of the last character that is not c.
 * */
-unsigned int cc_utf8_find_last_not_char(std::vector<unsigned short> str, unsigned short c)
+unsigned int getIndexOfLastNotChar16(const std::vector<char16_t>& str, char16_t c)
 {
    int len = static_cast<int>(str.size());
-    
+
    int i = len - 1;
    for (; i >= 0; --i)
        if (str[i] != c) return i;
-    
+
    return i;
 }
@ -148,13 +57,13 @@ unsigned int cc_utf8_find_last_not_char(std::vector<unsigned short> str, unsigne
 *
 * Return value: the trimmed string.
 * */
-static void cc_utf8_trim_from(std::vector<unsigned short>* str, int index)
+static void trimUTF16VectorFromIndex(std::vector<char16_t>& str, int index)
 {
-    int size = static_cast<int>(str->size());
+    int size = static_cast<int>(str.size());
    if (index >= size || index < 0)
        return;
-    
+
-    str->erase(str->begin() + index, str->begin() + size);
+    str.erase(str.begin() + index, str.begin() + size);
 }
 /*
@ -164,14 +73,14 @@ static void cc_utf8_trim_from(std::vector<unsigned short>* str, int index)
 *
 * Return value: weather the character is a whitespace character.
 * */
-bool isspace_unicode(unsigned short ch)
+bool isUnicodeSpace(char16_t ch)
 {
    return  (ch >= 0x0009 && ch <= 0x000D) || ch == 0x0020 || ch == 0x0085 || ch == 0x00A0 || ch == 0x1680
    || (ch >= 0x2000 && ch <= 0x200A) || ch == 0x2028 || ch == 0x2029 || ch == 0x202F
    ||  ch == 0x205F || ch == 0x3000;
 }
-bool iscjk_unicode(unsigned short ch)
+bool isCJKUnicode(char16_t ch)
 {
    return (ch >= 0x4E00 && ch <= 0x9FBF)   // CJK Unified Ideographs
        || (ch >= 0x2E80 && ch <= 0x2FDF)   // CJK Radicals Supplement & Kangxi Radicals
@ -183,131 +92,165 @@ bool iscjk_unicode(unsigned short ch)
        || (ch >= 0x31C0 && ch <= 0x4DFF);  // Other exiensions
 }
-void cc_utf8_trim_ws(std::vector<unsigned short>* str)
+void trimUTF16Vector(std::vector<char16_t>& str)
 {
-    int len = static_cast<int>(str->size());
+    int len = static_cast<int>(str.size());
-    
+
    if ( len <= 0 )
        return;
-    
+
    int last_index = len - 1;
-    
+
    // Only start trimming if the last character is whitespace..
-    if (isspace_unicode((*str)[last_index]))
+    if (isUnicodeSpace(str[last_index]))
    {
        for (int i = last_index - 1; i >= 0; --i)
        {
-            if (isspace_unicode((*str)[i]))
+            if (isUnicodeSpace(str[i]))
                last_index = i;
            else
                break;
        }
-        
+
-        cc_utf8_trim_from(str, last_index);
+        trimUTF16VectorFromIndex(str, last_index);
    }
 }
-/*
+bool UTF8ToUTF16(const std::string& utf8, std::u16string& outUtf16)
 * cc_utf8_strlen:
 * @p: pointer to the start of a UTF-8 encoded string.
 * @max: the maximum number of bytes to examine. If @max
 *       is less than 0, then the string is assumed to be
 *       null-terminated. If @max is 0, @p will not be examined and
 *       may be %nullptr.
 *
 * Returns the length of the string in characters.
 *
 * Return value: the length of the string in characters
 **/
 long
 cc_utf8_strlen (const char * p, int max)
 {
-    long len = 0;
+    if (utf8.empty())
    const char *start = p;
    if (!(p != nullptr || max == 0))
    {
-        return 0;
+        outUtf16.clear();
        return true;
    }
-    
+
-    if (max < 0)
+    bool ret = false;
    const size_t utf16Bytes = (utf8.length()+1) << 1;
    char16_t* utf16 = (char16_t*)malloc(utf16Bytes);
    memset(utf16, 0, utf16Bytes);
    UTF16* utf16Start = (UTF16*)utf16;
    UTF16* utf16End = ((UTF16*)utf16) + (utf8.length());
    const UTF8* utf8Start = (const UTF8*)utf8.data();
    const UTF8* utf8End = ((const UTF8*)utf8.data()) + utf8.length();
    if (conversionOK == ConvertUTF8toUTF16((const UTF8 **) &utf8Start, utf8End, &utf16Start, utf16End, strictConversion))
    {
-        while (*p)
+        outUtf16 = utf16;
-        {
+        ret = true;
            p = cc_utf8_next_char (p);
            ++len;
        }
    }
-    else
+
-    {
+    free(utf16);
-        if (max == 0 || !*p)
+
-            return 0;
+    return ret;
        p = cc_utf8_next_char (p);
        while (p - start < max && *p)
        {
            ++len;
            p = cc_utf8_next_char (p);
        }
        /* only do the last len increment if we got a complete
         * char (don't count partial chars)
         */
        if (p - start == max)
            ++len;
    }
    return len;
 }
-/*
+bool UTF16ToUTF8(const std::u16string& utf16, std::string& outUtf8)
 * g_utf8_get_char:
 * @p: a pointer to Unicode character encoded as UTF-8
 *
 * Converts a sequence of bytes encoded as UTF-8 to a Unicode character.
 * If @p does not point to a valid UTF-8 encoded character, results are
 * undefined. If you are not sure that the bytes are complete
 * valid Unicode characters, you should use g_utf8_get_char_validated()
 * instead.
 *
 * Return value: the resulting character
 **/
 static unsigned int
 cc_utf8_get_char (const char * p)
 {
-    int i, mask = 0, len;
+    if (utf16.empty())
-    unsigned int result;
+    {
-    unsigned char c = (unsigned char) *p;
+        outUtf8.clear();
-    
+        return true;
-    UTF8_COMPUTE (c, mask, len);
+    }
-    if (len == -1)
+
-        return (unsigned int) - 1;
+    bool ret = false;
-    UTF8_GET (result, p, i, mask, len);
+    const size_t utf8Bytes = (utf16.length() << 2) + 1;
-    
+    char* utf8 = (char*)malloc(utf8Bytes);
-    return result;
+    memset(utf8, 0, utf8Bytes);
    UTF8 *utf8Start = (UTF8*)utf8;
    UTF8 *utf8End = ((UTF8*)utf8) + (utf8Bytes -1);
    const UTF16* utf16Start = (const UTF16*)utf16.data();
    const UTF16* utf16End = ((const UTF16*)utf16.data()) + utf16.length();
    if (conversionOK == ConvertUTF16toUTF8(&utf16Start, utf16End, &utf8Start, utf8End, strictConversion))
    {
        outUtf8 = utf8;
        ret = true;
    }
    free(utf8);
    return ret;
 }
-
+std::vector<char16_t> getUTF16VectorFromUTF16String(const std::u16string& str)
 unsigned short* cc_utf8_to_utf16(const char* str_old, int length/* = -1 */, int* rUtf16Size/* = nullptr */)
 {
-    long len = cc_utf8_strlen(str_old, length);
+    std::vector<char16_t> str_new;
-    if (rUtf16Size != nullptr) {
+
-        *rUtf16Size = static_cast<int>(len);
+    size_t len = str.length();
-    }
+    for (size_t i = 0; i < len; ++i)
    unsigned short* str_new = new unsigned short[len + 1];
    str_new[len] = 0;
    for (int i = 0; i < len; ++i)
    {
-        str_new[i] = cc_utf8_get_char(str_old);
+        str_new.push_back(str[i]);
        str_old = cc_utf8_next_char(str_old);
    }
    return str_new;
 }
 std::vector<char16_t> getChar16VectorFromUTF16String(const std::u16string& utf16)
 {
    std::vector<char16_t> ret;
    size_t len = utf16.length();
    ret.reserve(len);
    for (size_t i = 0; i < len; ++i)
    {
        ret.push_back(utf16[i]);
    }
    return ret;
 }
 long getCharacterCountInUTF8String(const std::string& utf8)
 {
    return getUTF8StringLength((const UTF8*)utf8.c_str());
 }
 } //namespace StringUtils {
 int cc_wcslen(const unsigned short* str)
 {
    int i=0;
    while(*str++) i++;
    return i;
 }
 void cc_utf8_trim_ws(std::vector<unsigned short>* str)
 {
    // unsigned short and char16_t are both 2 bytes
    std::vector<char16_t>* ret = reinterpret_cast<std::vector<char16_t>*>(str);
    StringUtils::trimUTF16Vector(*ret);
 }
 bool isspace_unicode(unsigned short ch)
 {
    return StringUtils::isUnicodeSpace(ch);
 }
 bool iscjk_unicode(unsigned short ch)
 {
    return StringUtils::isCJKUnicode(ch);
 }
 long cc_utf8_strlen (const char * p, int max)
 {
    CC_UNUSED_PARAM(max);
    return StringUtils::getCharacterCountInUTF8String(p);
 }
 unsigned int cc_utf8_find_last_not_char(const std::vector<unsigned short>& str, unsigned short c)
 {
    std::vector<char16_t> char16Vector;
    for (const auto& e : str)
    {
        char16Vector.push_back(e);
    }
    return StringUtils::getIndexOfLastNotChar16(char16Vector, c);
 }
 std::vector<unsigned short> cc_utf16_vec_from_utf16_str(const unsigned short* str)
 {
    int len = cc_wcslen(str);
@ -320,209 +263,56 @@ std::vector<unsigned short> cc_utf16_vec_from_utf16_str(const unsigned short* st
    return str_new;
 }
-/**
+unsigned short* cc_utf8_to_utf16(const char* str_old, int length/* = -1*/, int* rUtf16Size/* = nullptr*/)
 * cc_unichar_to_utf8:
 * @c: a ISO10646 character code
 * @outbuf: output buffer, must have at least 6 bytes of space.
 *       If %nullptr, the length will be computed and returned
 *       and nothing will be written to @outbuf.
 *
 * Converts a single character to UTF-8.
 *
 * Return value: number of bytes written
 **/
 int
 cc_unichar_to_utf8 (unsigned int c,
                   char   *outbuf)
 {
-    int len = 0;
+    if (str_old == nullptr)
-    int first;
+        return nullptr;
    int i;
-    if (c < 0x80)
+    unsigned short* ret = nullptr;
    std::u16string outUtf16;
    bool succeed = StringUtils::UTF8ToUTF16(str_old, outUtf16);
    if (succeed)
    {
-        first = 0;
+        ret = new unsigned short[outUtf16.length() + 1];
-        len = 1;
+        ret[outUtf16.length()] = 0;
-    }
+        memcpy(ret, outUtf16.data(), outUtf16.length());
    else if (c < 0x800)
    {
        first = 0xc0;
        len = 2;
    }
    else if (c < 0x10000)
    {
        first = 0xe0;
        len = 3;
    }
    else if (c < 0x200000)
    {
        first = 0xf0;
        len = 4;
    }
    else if (c < 0x4000000)
    {
        first = 0xf8;
        len = 5;
    }
    else
    {
        first = 0xfc;
        len = 6;
    }
-    if (outbuf)
+    return ret;
    {
        for (i = len - 1; i > 0; --i)
        {
            outbuf[i] = (c & 0x3f) | 0x80;
            c >>= 6;
        }
        outbuf[0] = c | first;
    }
    return len;
 }
-#define SURROGATE_VALUE(h,l) (((h) - 0xd800) * 0x400 + (l) - 0xdc00 + 0x10000)
+char * cc_utf16_to_utf8 (const unsigned short  *str,
-
+                  int             len,
-/**
+                  long            *items_read,
- * cc_utf16_to_utf8:
+                  long            *items_written)
 * @str: a UTF-16 encoded string
 * @len: the maximum length of @str to use. If @len < 0, then
 *       the string is terminated with a 0 character.
 * @items_read: location to store number of words read, or %nullptr.
 *              If %nullptr, then %G_CONVERT_ERROR_PARTIAL_INPUT will be
 *              returned in case @str contains a trailing partial
 *              character. If an error occurs then the index of the
 *              invalid input is stored here.
 * @items_written: location to store number of bytes written, or %nullptr.
 *                 The value stored here does not include the trailing
 *                 0 byte.
 * @error: location to store the error occuring, or %nullptr to ignore
 *         errors. Any of the errors in #GConvertError other than
 *         %G_CONVERT_ERROR_NO_CONVERSION may occur.
 *
 * Convert a string from UTF-16 to UTF-8. The result will be
 * terminated with a 0 byte.
 *
 * Return value: a pointer to a newly allocated UTF-8 string.
 *               This value must be freed with free(). If an
 *               error occurs, %nullptr will be returned and
 *               @error set.
 **/
 char *
 cc_utf16_to_utf8 (const unsigned short  *str,
                 int             len,
                 long            *items_read,
                 long            *items_written)
 {
-    /* This function and g_utf16_to_ucs4 are almost exactly identical - The lines that differ
+    if (str == nullptr)
-     * are marked.
+        return nullptr;
     */
    const unsigned short *in;
    char *out;
    char *result = nullptr;
    int n_bytes;
    unsigned int high_surrogate;
    if (str == 0) return nullptr;
-    n_bytes = 0;
+    std::u16string utf16;
-    in = str;
+    int utf16Len = len < 0 ? cc_wcslen(str) : len;
-    high_surrogate = 0;
+    
-    while ((len < 0 || in - str < len) && *in)
+    for (int i = 0; i < utf16Len; ++i)
    {
-        unsigned short c = *in;
+        utf16.push_back(str[i]);
        unsigned int wc;
        if (c >= 0xdc00 && c < 0xe000) /* low surrogate */
        {
            if (high_surrogate)
            {
                wc = SURROGATE_VALUE (high_surrogate, c);
                high_surrogate = 0;
            }
            else
            {
                CCLOGERROR("Invalid sequence in conversion input");
                goto err_out;
            }
        }
        else
        {
            if (high_surrogate)
            {
                CCLOGERROR("Invalid sequence in conversion input");
                goto err_out;
            }
            if (c >= 0xd800 && c < 0xdc00) /* high surrogate */
            {
                high_surrogate = c;
                goto next1;
            }
            else
                wc = c;
        }
        /********** DIFFERENT for UTF8/UCS4 **********/
        n_bytes += UTF8_LENGTH (wc);
    next1:
        in++;
    }
-    if (high_surrogate && !items_read)
+    char* ret = nullptr;
-    {        
+    std::string outUtf8;
-        CCLOGERROR("Partial character sequence at end of input");
+    bool succeed = StringUtils::UTF16ToUTF8(utf16, outUtf8);
        goto err_out;
    }
-    /* At this point, everything is valid, and we just need to convert
+    if (succeed)
     */
    /********** DIFFERENT for UTF8/UCS4 **********/
    result = new char[n_bytes + 1];
    high_surrogate = 0;
    out = result;
    in = str;
    while (out < result + n_bytes)
    {
-        unsigned short c = *in;
+        ret = new char[outUtf8.length() + 1];
-        unsigned int wc;
+        ret[outUtf8.length()] = '\0';
-        
+        memcpy(ret, outUtf8.data(), outUtf8.length());
        if (c >= 0xdc00 && c < 0xe000) /* low surrogate */
        {
            wc = SURROGATE_VALUE (high_surrogate, c);
            high_surrogate = 0;
        }
        else if (c >= 0xd800 && c < 0xdc00) /* high surrogate */
        {
            high_surrogate = c;
            goto next2;
        }
        else
            wc = c;
        /********** DIFFERENT for UTF8/UCS4 **********/
        out += cc_unichar_to_utf8 (wc, out);
    next2:
        in++;
    }
-    /********** DIFFERENT for UTF8/UCS4 **********/
+    return ret;
    *out = '\0';
    if (items_written)
    /********** DIFFERENT for UTF8/UCS4 **********/
        *items_written = out - result;
 err_out:
    if (items_read)
        *items_read = in - str;
    return result;
 }
 NS_CC_END
--- a/cocos/2d/ccUTF8.h
+++ b/cocos/2d/ccUTF8.h
@ -1,35 +1,131 @@
-/*
+/****************************************************************************
- * Copyright (C) 1999      Tom Tromey
+ Copyright (c) 2014 cocos2d-x.org
- * Copyright (C) 2000      Red Hat, Inc.
+ Copyright (c) 2014 Chukong Technologies Inc.
- * Copyright (c) 2013-2014 Chukong Technologies Inc.
+
- *
+ http://www.cocos2d-x.org
- * This library is free software; you can redistribute it and/or
+
- * modify it under the terms of the GNU Lesser General Public
+ Permission is hereby granted, free of charge, to any person obtaining a copy
- * License as published by the Free Software Foundation; either
+ of this software and associated documentation files (the "Software"), to deal
- * version 2 of the License, or (at your option) any later version.
+ in the Software without restriction, including without limitation the rights
- *
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * This library is distributed in the hope that it will be useful,
+ copies of the Software, and to permit persons to whom the Software is
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ furnished to do so, subject to the following conditions:
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+
- * Lesser General Public License for more details.
+ The above copyright notice and this permission notice shall be included in
- *
+ all copies or substantial portions of the Software.
- * You should have received a copy of the GNU Lesser General Public
+
- * License along with this library; if not, write to the
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * Boston, MA 02111-1307, USA.
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- */
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 ****************************************************************************/
 #ifndef __cocos2dx__ccUTF8__
 #define __cocos2dx__ccUTF8__
 #include "base/CCPlatformMacros.h"
 #include <vector>
 #include <string>
 NS_CC_BEGIN
 namespace StringUtils {
 /**
 *  @brief Converts utf8 string to utf16 string
 *  @param utf8 The utf8 string to be converted
 *  @param outUtf16 The output utf16 string
 *  @return true if succeed, otherwise false
 *  @note Please check the return value before using \p outUtf16
 *  e.g.
 *  @code
 *    std::u16string utf16;
 *    bool ret = StringUtils::UTF8ToUTF16("你好hello", utf16);
 *    if (ret) {
 *        do_some_thing_with_utf16(utf16);
 *    }
 *  @endcode
 */
 CC_DLL bool UTF8ToUTF16(const std::string& utf8, std::u16string& outUtf16);
 /**
 *  @brief Converts utf16 string to utf8 string
 *  @param utf16 The utf16 string to be converted
 *  @param outUtf8 The output utf8 string
 *  @return true if succeed, otherwise false
 *  @note Please check the return value before using \p outUtf8
 *  e.g.
 *  @code
 *    std::string utf8;
 *    bool ret = StringUtils::UTF16ToUTF8(u"\u4f60\u597d", utf16);
 *    if (ret) {
 *        do_some_thing_with_utf8(utf8);
 *    }
 *  @endcode
 */
 CC_DLL bool UTF16ToUTF8(const std::u16string& utf16, std::string& outUtf8);
 /**
 *  @brief Trims the unicode spaces at the end of char16_t vector
 */
 CC_DLL void trimUTF16Vector(std::vector<char16_t>& str);
 /**
 *  @brief Whether the character is a whitespace character.
 *
 *  @param ch    the unicode character
 *  @returns     whether the character is a white space character.
 *
 *  @see http://en.wikipedia.org/wiki/Whitespace_character#Unicode
 *
 */
 CC_DLL bool isUnicodeSpace(char16_t ch);
 /**
 *  @brief Whether the character is a Chinese/Japanese/Korean character.
 *
 *  @param ch    the unicode character
 *  @returns     whether the character is a Chinese character.
 *
 *  @see http://www.searchtb.com/2012/04/chinese_encode.html
 *  @see http://tieba.baidu.com/p/748765987
 *
 */
 CC_DLL bool isCJKUnicode(char16_t ch);
 /**
 *  @brief Returns the length of the string in characters.
 *
 *  @param utf8 an UTF-8 encoded string.
 *  @returns the length of the string in characters
 */
 CC_DLL long getCharacterCountInUTF8String(const std::string& utf8);
 /**
 *  @brief Gets the index of the last character that is not equal to the character given.
 *
 *  @param str   the string to be searched.
 *  @param c     the character to be searched for.
 *
 *  @returns the index of the last character that is not \p c.
 *
 */
 CC_DLL unsigned int getIndexOfLastNotChar16(const std::vector<char16_t>& str, char16_t c);
 /**
 *  @brief Gets char16_t vector from a given utf16 string
 */
 CC_DLL std::vector<char16_t> getChar16VectorFromUTF16String(const std::u16string& utf16);
 } // namespace StringUtils {
 CC_DLL int cc_wcslen(const unsigned short* str);
-CC_DLL void cc_utf8_trim_ws(std::vector<unsigned short>* str);
+CC_DEPRECATED_ATTRIBUTE void cc_utf8_trim_ws(std::vector<unsigned short>* str);
 /**
 * Whether the character is a whitespace character.
@ -39,7 +135,7 @@ CC_DLL void cc_utf8_trim_ws(std::vector<unsigned short>* str);
 *
 * @see http://en.wikipedia.org/wiki/Whitespace_character#Unicode
 * */
-CC_DLL bool isspace_unicode(unsigned short ch);
+CC_DEPRECATED_ATTRIBUTE bool isspace_unicode(unsigned short ch);
 /**
 * Whether the character is a Chinese/Japanese/Korean character.
@ -50,7 +146,7 @@ CC_DLL bool isspace_unicode(unsigned short ch);
 * @see http://www.searchtb.com/2012/04/chinese_encode.html
 * @see http://tieba.baidu.com/p/748765987
 * */
-CC_DLL bool iscjk_unicode(unsigned short ch);
+CC_DEPRECATED_ATTRIBUTE bool iscjk_unicode(unsigned short ch);
 /**
 * Returns the length of the string in characters.
@ -62,7 +158,7 @@ CC_DLL bool iscjk_unicode(unsigned short ch);
 *
 * @returns the length of the string in characters
 **/
-CC_DLL long
+CC_DEPRECATED_ATTRIBUTE long
 cc_utf8_strlen (const char * p, int max);
 /**
@ -73,9 +169,9 @@ cc_utf8_strlen (const char * p, int max);
 *
 * @returns the index of the last character that is not \p c.
 * */
-CC_DLL unsigned int cc_utf8_find_last_not_char(std::vector<unsigned short> str, unsigned short c);
+CC_DEPRECATED_ATTRIBUTE unsigned int cc_utf8_find_last_not_char(const std::vector<unsigned short>& str, unsigned short c);
-CC_DLL std::vector<unsigned short> cc_utf16_vec_from_utf16_str(const unsigned short* str);
+CC_DEPRECATED_ATTRIBUTE std::vector<unsigned short> cc_utf16_vec_from_utf16_str(const unsigned short* str);
 /**
 * Creates a utf8 string from a cstring.
@ -84,7 +180,7 @@ CC_DLL std::vector<unsigned short> cc_utf16_vec_from_utf16_str(const unsigned sh
 *
 * @returns the newly created utf8 string.
 * */
-CC_DLL unsigned short* cc_utf8_to_utf16(const char* str_old, int length = -1, int* rUtf16Size = nullptr);
+CC_DEPRECATED_ATTRIBUTE unsigned short* cc_utf8_to_utf16(const char* str_old, int length = -1, int* rUtf16Size = nullptr);
 /**
 * Convert a string from UTF-16 to UTF-8. The result will be null terminated.
@ -103,12 +199,13 @@ CC_DLL unsigned short* cc_utf8_to_utf16(const char* str_old, int length = -1, in
 * @returns a pointer to a newly allocated UTF-8 string. This value must be
 *          freed with free(). If an error occurs, %nullptr will be returned.
 **/
-CC_DLL char *
+CC_DEPRECATED_ATTRIBUTE char *
 cc_utf16_to_utf8 (const unsigned short  *str,
                  int             len,
                  long            *items_read,
                  long            *items_written);
 NS_CC_END
 #endif /* defined(__cocos2dx__ccUTF8__) */