Merge pull request #6635 from dumganhar/iss4660-unicode

issue #4660: ccUTF8 uses utf8-utf16 conversion in llvm3.4
2014-05-12 17:57:25 +08:00 · 2014-05-12 17:57:25 +08:00 · 9ab2dae212
parent cbf2629bf0 d8b913dcd8
commit 9ab2dae212
30 changed files with 1795 additions and 672 deletions
--- a/build/cocos2d_libs.xcodeproj/project.pbxproj
+++ b/build/cocos2d_libs.xcodeproj/project.pbxproj
@ -98,6 +98,10 @@
 		1A12775A18DFCC4F0005F345 /* CCTweenFunction.h in Headers */ = {isa = PBXBuildFile; fileRef = 2986667918B1B079000E39CA /* CCTweenFunction.h */; };
 		1A12775B18DFCC540005F345 /* CCTweenFunction.h in Headers */ = {isa = PBXBuildFile; fileRef = 2986667918B1B079000E39CA /* CCTweenFunction.h */; };
 		1A12775C18DFCC590005F345 /* CCTweenFunction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2986667818B1B079000E39CA /* CCTweenFunction.cpp */; };
+		1A1645B0191B726C008C7C7F /* ConvertUTF.c in Sources */ = {isa = PBXBuildFile; fileRef = 1A1645AE191B726C008C7C7F /* ConvertUTF.c */; };
+		1A1645B1191B726C008C7C7F /* ConvertUTF.c in Sources */ = {isa = PBXBuildFile; fileRef = 1A1645AE191B726C008C7C7F /* ConvertUTF.c */; };
+		1A1645B2191B726C008C7C7F /* ConvertUTFWrapper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1A1645AF191B726C008C7C7F /* ConvertUTFWrapper.cpp */; };
+		1A1645B3191B726C008C7C7F /* ConvertUTFWrapper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1A1645AF191B726C008C7C7F /* ConvertUTFWrapper.cpp */; };
 		1A570061180BC5A10088DEC7 /* CCAction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1A570047180BC5A10088DEC7 /* CCAction.cpp */; };
 		1A570062180BC5A10088DEC7 /* CCAction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1A570047180BC5A10088DEC7 /* CCAction.cpp */; };
 		1A570063180BC5A10088DEC7 /* CCAction.h in Headers */ = {isa = PBXBuildFile; fileRef = 1A570048180BC5A10088DEC7 /* CCAction.h */; };
@ -765,6 +769,8 @@
 		1ABA68AF1888D700007D1BB4 /* CCFontCharMap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ABA68AC1888D700007D1BB4 /* CCFontCharMap.cpp */; };
 		1ABA68B01888D700007D1BB4 /* CCFontCharMap.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ABA68AD1888D700007D1BB4 /* CCFontCharMap.h */; };
 		1ABA68B11888D700007D1BB4 /* CCFontCharMap.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ABA68AD1888D700007D1BB4 /* CCFontCharMap.h */; };
+		1AC0269C1914068200FA920D /* ConvertUTF.h in Headers */ = {isa = PBXBuildFile; fileRef = 1AC026991914068200FA920D /* ConvertUTF.h */; };
+		1AC0269D1914068200FA920D /* ConvertUTF.h in Headers */ = {isa = PBXBuildFile; fileRef = 1AC026991914068200FA920D /* ConvertUTF.h */; };
 		1AD71DA9180E26E600808F54 /* CCBAnimationManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1AD71CFA180E26E600808F54 /* CCBAnimationManager.cpp */; };
 		1AD71DAA180E26E600808F54 /* CCBAnimationManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1AD71CFA180E26E600808F54 /* CCBAnimationManager.cpp */; };
 		1AD71DAB180E26E600808F54 /* CCBAnimationManager.h in Headers */ = {isa = PBXBuildFile; fileRef = 1AD71CFB180E26E600808F54 /* CCBAnimationManager.h */; };
@ -1870,6 +1876,8 @@
 		1A0DB7301823827C0025743D /* CCGL.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCGL.h; sourceTree = "<group>"; };
 		1A0DB7311823827C0025743D /* CCEAGLView.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCEAGLView.h; sourceTree = "<group>"; };
 		1A0DB7351823828F0025743D /* CCGL.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCGL.h; sourceTree = "<group>"; };
+		1A1645AE191B726C008C7C7F /* ConvertUTF.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = ConvertUTF.c; sourceTree = "<group>"; };
+		1A1645AF191B726C008C7C7F /* ConvertUTFWrapper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ConvertUTFWrapper.cpp; sourceTree = "<group>"; };
 		1A570047180BC5A10088DEC7 /* CCAction.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CCAction.cpp; sourceTree = "<group>"; };
 		1A570048180BC5A10088DEC7 /* CCAction.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCAction.h; sourceTree = "<group>"; };
 		1A570049180BC5A10088DEC7 /* CCActionCamera.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CCActionCamera.cpp; sourceTree = "<group>"; };
@ -2121,6 +2129,7 @@
 		1AAF584E180E40B9000584C8 /* LocalStorageAndroid.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = LocalStorageAndroid.cpp; sourceTree = "<group>"; };
 		1ABA68AC1888D700007D1BB4 /* CCFontCharMap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CCFontCharMap.cpp; sourceTree = "<group>"; };
 		1ABA68AD1888D700007D1BB4 /* CCFontCharMap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCFontCharMap.h; sourceTree = "<group>"; };
+		1AC026991914068200FA920D /* ConvertUTF.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ConvertUTF.h; sourceTree = "<group>"; };
 		1AD71CFA180E26E600808F54 /* CCBAnimationManager.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CCBAnimationManager.cpp; sourceTree = "<group>"; };
 		1AD71CFB180E26E600808F54 /* CCBAnimationManager.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCBAnimationManager.h; sourceTree = "<group>"; };
 		1AD71CFC180E26E600808F54 /* CCBFileLoader.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CCBFileLoader.cpp; sourceTree = "<group>"; };
@ -3367,6 +3376,7 @@
 		1A57033E180BD0490088DEC7 /* external */ = {
 			isa = PBXGroup;
 			children = (
+				1AC026971914068200FA920D /* ConvertUTF */,
 				46C02E0418E91123004B7456 /* xxhash */,
 				46A168B21807AF9C005B8026 /* Box2D */,
 				46A1693A1807AFD6005B8026 /* chipmunk */,
@ -3784,6 +3794,17 @@
 			path = "local-storage";
 			sourceTree = "<group>";
 		};
+		1AC026971914068200FA920D /* ConvertUTF */ = {
+			isa = PBXGroup;
+			children = (
+				1A1645AE191B726C008C7C7F /* ConvertUTF.c */,
+				1AC026991914068200FA920D /* ConvertUTF.h */,
+				1A1645AF191B726C008C7C7F /* ConvertUTFWrapper.cpp */,
+			);
+			name = ConvertUTF;
+			path = ../external/ConvertUTF;
+			sourceTree = "<group>";
+		};
 		1AD71CF7180E26E600808F54 /* editor-support */ = {
 			isa = PBXGroup;
 			children = (
@ -4992,6 +5013,7 @@
 				50FCEB9918C72017004AD434 /* CheckBoxReader.h in Headers */,
 				50E6D33E18E174130051CA34 /* UIVBox.h in Headers */,
 				500DC9AC19106300007B91BF /* ZipUtils.h in Headers */,
+				1AC0269C1914068200FA920D /* ConvertUTF.h in Headers */,
 				1A57019F180BCB590088DEC7 /* CCFont.h in Headers */,
 				1A5701A3180BCB590088DEC7 /* CCFontAtlas.h in Headers */,
 				1A01C68618F57BE800EFE3A6 /* CCArray.h in Headers */,
@ -5556,6 +5578,7 @@
 				5034CA44191D591100CE6051 /* ccShader_Label.vert in Headers */,
 				1A5702F5180BCE750088DEC7 /* CCTMXObjectGroup.h in Headers */,
 				1A5702F9180BCE750088DEC7 /* CCTMXTiledMap.h in Headers */,
+				1AC0269D1914068200FA920D /* ConvertUTF.h in Headers */,
 				500DC9A119106300007B91BF /* CCVector.h in Headers */,
 				1A5702FD180BCE750088DEC7 /* CCTMXXMLParser.h in Headers */,
 				1A570303180BCE890088DEC7 /* CCParallaxNode.h in Headers */,
@ -6085,6 +6108,7 @@
 				2AC795DB1862870F005EC8E1 /* SkeletonBounds.cpp in Sources */,
 				2AC795DC1862870F005EC8E1 /* Event.cpp in Sources */,
 				1A01C68A18F57BE800EFE3A6 /* CCDeprecated.cpp in Sources */,
+				1A1645B0191B726C008C7C7F /* ConvertUTF.c in Sources */,
 				500DC93219106300007B91BF /* CCAutoreleasePool.cpp in Sources */,
 				2905FA5618CF08D100240AA3 /* UILayout.cpp in Sources */,
 				2AC795DD1862870F005EC8E1 /* EventData.cpp in Sources */,
@ -6185,6 +6209,7 @@
 				500DC9B619106E6D007B91BF /* TransformUtils.cpp in Sources */,
 				1A5701EE180BCB8C0088DEC7 /* CCTransitionProgress.cpp in Sources */,
 				1A5701F7180BCBAD0088DEC7 /* CCMenu.cpp in Sources */,
+				1A1645B2191B726C008C7C7F /* ConvertUTFWrapper.cpp in Sources */,
 				1A5701FB180BCBAD0088DEC7 /* CCMenuItem.cpp in Sources */,
 				1A570202180BCBD40088DEC7 /* CCClippingNode.cpp in Sources */,
 				06CAAACF186AD7FC0012A414 /* TriggerBase.cpp in Sources */,
@ -6793,12 +6818,14 @@
 				1A8C59EC180E930E00EF57C3 /* CCSkin.cpp in Sources */,
 				2905FA4718CF08D100240AA3 /* UIButton.cpp in Sources */,
 				1A8C59F0180E930E00EF57C3 /* CCSpriteFrameCacheHelper.cpp in Sources */,
+				1A1645B1191B726C008C7C7F /* ConvertUTF.c in Sources */,
 				B2AF2FA218EBAEAE00C5807C /* Vector2.cpp in Sources */,
 				500DC8D219105F7D007B91BF /* CCAffineTransform.cpp in Sources */,
 				1A8C59F4180E930E00EF57C3 /* CCSSceneReader.cpp in Sources */,
 				1A8C59F8180E930E00EF57C3 /* CCTransformHelp.cpp in Sources */,
 				1A8C59FC180E930E00EF57C3 /* CCTween.cpp in Sources */,
 				2905FA5318CF08D100240AA3 /* UIImageView.cpp in Sources */,
+				1A1645B3191B726C008C7C7F /* ConvertUTFWrapper.cpp in Sources */,
 				1A8C5A04180E930E00EF57C3 /* CCUtilMath.cpp in Sources */,
 				2905FA7518CF08D100240AA3 /* UIScrollView.cpp in Sources */,
 				1A8C5A0E180E930E00EF57C3 /* DictionaryHelper.cpp in Sources */,
--- a/cocos/2d/CCFont.cpp
+++ b/cocos/2d/CCFont.cpp
@ -100,47 +100,6 @@ const char * Font::getCurrentGlyphCollection() const
    }
 }

-unsigned short* Font::getUTF16Text(const char *text, int &outNumLetters) const
-{
-    unsigned short* utf16String = cc_utf8_to_utf16(text);
-    
-    if(!utf16String)
-        return 0;
-    
-    outNumLetters = cc_wcslen(utf16String);
-    return utf16String;
-}
-
-int Font::getUTF16TextLenght(unsigned short int *text) const
-{
-     return cc_wcslen(text);
-}
-
-unsigned short * Font::trimUTF16Text(unsigned short int *text, int newBegin, int newEnd) const
-{
-    if ( newBegin < 0 || newEnd <= 0 )
-        return 0;
-    
-    if ( newBegin >= newEnd )
-        return 0;
-    
-    if (newEnd >= cc_wcslen(text))
-        return 0;
-    
-    int newLenght = newEnd - newBegin + 2;
-    unsigned short* trimmedString = new unsigned short[newLenght];
-    
-    for(int c = 0; c < (newLenght - 1); ++c)
-    {
-        trimmedString[c] = text[newBegin + c];
-    }
-    
-    // last char
-    trimmedString[newLenght-1] = 0x0000;
-    
-    // done
-    return trimmedString;
-}

 NS_CC_END

--- a/cocos/2d/CCFont.h
+++ b/cocos/2d/CCFont.h
@ -41,16 +41,12 @@ class CC_DLL Font : public Ref
 public:
    virtual  FontAtlas *createFontAtlas() = 0;

-    virtual int* getHorizontalKerningForTextUTF16(unsigned short *text, int &outNumLetters) const = 0;
+    virtual int* getHorizontalKerningForTextUTF16(const std::u16string& text, int &outNumLetters) const = 0;
    virtual const char* getCurrentGlyphCollection() const;
    
    
    virtual int getFontMaxHeight() const { return 0; }
    
-    virtual int getUTF16TextLenght(unsigned short int *text) const;
-    virtual unsigned short * getUTF16Text(const char *text, int &outNumLetters) const;
-    virtual unsigned short * trimUTF16Text(unsigned short int *text, int newBegin, int newEnd) const;
-    
 protected:
    
    Font();
--- a/cocos/2d/CCFontAtlas.cpp
+++ b/cocos/2d/CCFontAtlas.cpp
@ -202,7 +202,7 @@ void FontAtlas::addLetterDefinition(const FontLetterDefinition &letterDefinition
    _fontLetterDefinitions[letterDefinition.letteCharUTF16] = letterDefinition;
 }

-bool FontAtlas::getLetterDefinitionForChar(unsigned short  letteCharUTF16, FontLetterDefinition &outDefinition)
+bool FontAtlas::getLetterDefinitionForChar(char16_t letteCharUTF16, FontLetterDefinition &outDefinition)
 {
    auto outIterator = _fontLetterDefinitions.find(letteCharUTF16);

@ -218,13 +218,13 @@ bool FontAtlas::getLetterDefinitionForChar(unsigned short  letteCharUTF16, FontL
    }
 }

-bool FontAtlas::prepareLetterDefinitions(unsigned short *utf16String)
+bool FontAtlas::prepareLetterDefinitions(const std::u16string& utf16String)
 {
    FontFreeType* fontTTf = dynamic_cast<FontFreeType*>(_font);
-    if(fontTTf == nullptr || utf16String == nullptr)
+    if(fontTTf == nullptr)
        return false;

-    int length = cc_wcslen(utf16String);
+    size_t length = utf16String.length();

    float offsetAdjust = _letterPadding / 2;  
    long bitmapWidth;
@ -240,7 +240,7 @@ bool FontAtlas::prepareLetterDefinitions(unsigned short *utf16String)

    float startY = _currentPageOrigY;

-    for (int i = 0; i < length; ++i)
+    for (size_t i = 0; i < length; ++i)
    {
        auto outIterator = _fontLetterDefinitions.find(utf16String[i]);

--- a/cocos/2d/CCFontAtlas.h
+++ b/cocos/2d/CCFontAtlas.h
@ -25,10 +25,11 @@
 #ifndef _CCFontAtlas_h_
 #define _CCFontAtlas_h_

-#include <unordered_map>
 #include "base/CCPlatformMacros.h"
 #include "base/CCRef.h"
 #include "CCStdC.h"
+#include <string>
+#include <unordered_map>

 NS_CC_BEGIN

@ -71,9 +72,9 @@ public:
    virtual ~FontAtlas();
    
    void addLetterDefinition(const FontLetterDefinition &letterDefinition);
-    bool getLetterDefinitionForChar(unsigned short  letteCharUTF16, FontLetterDefinition &outDefinition);
+    bool getLetterDefinitionForChar(char16_t letteCharUTF16, FontLetterDefinition &outDefinition);
    
-    bool prepareLetterDefinitions(unsigned short  *utf16String);
+    bool prepareLetterDefinitions(const std::u16string& utf16String);

    inline const std::unordered_map<ssize_t, Texture2D*>& getTextures() const{ return _atlasTextures;}
    void  addTexture(Texture2D *texture, int slot);
--- a/cocos/2d/CCFontCharMap.cpp
+++ b/cocos/2d/CCFontCharMap.cpp
@ -99,12 +99,9 @@ FontCharMap::~FontCharMap()

 }

-int * FontCharMap::getHorizontalKerningForTextUTF16(unsigned short *text, int &outNumLetters) const
+int * FontCharMap::getHorizontalKerningForTextUTF16(const std::u16string& text, int &outNumLetters) const
 {
-    if (!text)
-        return 0;
-    
-    outNumLetters = cc_wcslen(text);
+    outNumLetters = static_cast<int>(text.length());
    
    if (!outNumLetters)
        return 0;
--- a/cocos/2d/CCFontCharMap.h
+++ b/cocos/2d/CCFontCharMap.h
@ -37,7 +37,7 @@ public:
    static FontCharMap * create(Texture2D* texture, int itemWidth, int itemHeight, int startCharMap);
    static FontCharMap * create(const std::string& plistFile);
    
-    virtual int* getHorizontalKerningForTextUTF16(unsigned short *text, int &outNumLetters) const override;
+    virtual int* getHorizontalKerningForTextUTF16(const std::u16string& text, int &outNumLetters) const override;
    virtual FontAtlas *createFontAtlas() override;
    
 protected:    
--- a/cocos/2d/CCFontFNT.cpp
+++ b/cocos/2d/CCFontFNT.cpp
@ -711,12 +711,9 @@ void FontFNT::purgeCachedData()
    }
 }

-int * FontFNT::getHorizontalKerningForTextUTF16(unsigned short *text, int &outNumLetters) const
+int * FontFNT::getHorizontalKerningForTextUTF16(const std::u16string& text, int &outNumLetters) const
 {
-    if (!text)
-        return 0;
-    
-    outNumLetters = cc_wcslen(text);
+    outNumLetters = static_cast<int>(text.length());
    
    if (!outNumLetters)
        return 0;
--- a/cocos/2d/CCFontFNT.h
+++ b/cocos/2d/CCFontFNT.h
@ -42,7 +42,7 @@ public:
    Removes from memory the cached configurations and the atlas name dictionary.
    */
    static void purgeCachedData();
-    virtual int* getHorizontalKerningForTextUTF16(unsigned short *text, int &outNumLetters) const override;
+    virtual int* getHorizontalKerningForTextUTF16(const std::u16string& text, int &outNumLetters) const override;
    virtual FontAtlas *createFontAtlas() override;
    
 protected:
--- a/cocos/2d/CCFontFreeType.cpp
+++ b/cocos/2d/CCFontFreeType.cpp
@ -175,20 +175,22 @@ FontAtlas * FontFreeType::createFontAtlas()
    FontAtlas *atlas = new FontAtlas(*this);
    if (_usedGlyphs != GlyphCollection::DYNAMIC)
    {
-        unsigned short* utf16 = cc_utf8_to_utf16(getCurrentGlyphCollection());
-        atlas->prepareLetterDefinitions(utf16);
-        CC_SAFE_DELETE_ARRAY(utf16);
+        std::u16string utf16;
+        if (StringUtils::UTF8ToUTF16(getCurrentGlyphCollection(), utf16))
+        {
+            atlas->prepareLetterDefinitions(utf16);
+        }
    }
    this->release();
    return atlas;
 }

-int * FontFreeType::getHorizontalKerningForTextUTF16(unsigned short *text, int &outNumLetters) const
+int * FontFreeType::getHorizontalKerningForTextUTF16(const std::u16string& text, int &outNumLetters) const
 {
-    if (!text || !_fontRef)
+    if (!_fontRef)
        return nullptr;
    
-    outNumLetters = cc_wcslen(text);
+    outNumLetters = static_cast<int>(text.length());

    if (!outNumLetters)
        return nullptr;
--- a/cocos/2d/CCFontFreeType.h
+++ b/cocos/2d/CCFontFreeType.h
@ -62,7 +62,7 @@ public:
    void     renderCharAt(unsigned char *dest,int posX, int posY, unsigned char* bitmap,long bitmapWidth,long bitmapHeight); 

    virtual FontAtlas   * createFontAtlas() override;
-    virtual int         * getHorizontalKerningForTextUTF16(unsigned short *text, int &outNumLetters) const override;
+    virtual int         * getHorizontalKerningForTextUTF16(const std::u16string& text, int &outNumLetters) const override;
    
    unsigned char       * getGlyphBitmap(unsigned short theChar, long &outWidth, long &outHeight, Rect &outRect,int &xAdvance);
    
--- a/cocos/2d/CCLabel.cpp
+++ b/cocos/2d/CCLabel.cpp
@ -247,8 +247,6 @@ Label::Label(FontAtlas *atlas /* = nullptr */, TextHAlignment hAlignment /* = Te
 , _labelDimensions(Size::ZERO)
 , _hAlignment(hAlignment)
 , _vAlignment(vAlignment)
-, _currentUTF16String(nullptr)
-, _originalUTF16String(nullptr)
 , _horizontalKernings(nullptr)
 , _fontAtlas(atlas)
 , _isOpacityModifyRGB(false)
@ -287,9 +285,7 @@ Label::Label(FontAtlas *atlas /* = nullptr */, TextHAlignment hAlignment /* = Te
 }

 Label::~Label()
-{   
-    delete [] _currentUTF16String;
-    delete [] _originalUTF16String;
+{
    delete [] _horizontalKernings;

    if (_fontAtlas)
@ -305,7 +301,7 @@ void Label::reset()
    TTFConfig temp;
    _fontConfig = temp;

-    _fontDirty = false;
+    _systemFontDirty = false;
    _systemFont = "Helvetica";
    _systemFontSize = 12;

@ -476,6 +472,12 @@ void Label::setString(const std::string& text)
 {
    _originalUTF8String = text;
    _contentDirty = true;
+
+    std::u16string utf16String;
+    if (StringUtils::UTF8ToUTF16(_originalUTF8String, utf16String))
+    {
+        _currentUTF16String  = utf16String;
+    }
 }

 void Label::setAlignment(TextHAlignment hAlignment,TextVAlignment vAlignment)
@ -574,7 +576,7 @@ float Label::getScaleX() const

 void Label::alignText()
 {
-    if (_fontAtlas == nullptr || _currentUTF16String == nullptr)
+    if (_fontAtlas == nullptr || _currentUTF16String.empty())
    {
        return;
    }
@ -603,7 +605,7 @@ void Label::alignText()
    if(_labelWidth > 0 || (_currNumLines > 1 && _hAlignment != TextHAlignment::LEFT))
        LabelTextFormatter::alignText(this);

-    int strLen = cc_wcslen(_currentUTF16String);
+    int strLen = static_cast<int>(_currentUTF16String.length());
    Rect uvRect;
    Sprite* letterSprite;
    for(const auto &child : _children) {
@ -633,7 +635,7 @@ void Label::alignText()
    updateColor();
 }

-bool Label::computeHorizontalKernings(unsigned short int *stringToRender)
+bool Label::computeHorizontalKernings(const std::u16string& stringToRender)
 {
    if (_horizontalKernings)
    {
@ -650,41 +652,6 @@ bool Label::computeHorizontalKernings(unsigned short int *stringToRender)
        return true;
 }

-bool Label::setOriginalString(unsigned short *stringToSet)
-{
-    if (_originalUTF16String)
-    {
-        delete [] _originalUTF16String;
-    }
-
-    int newStringLenght = cc_wcslen(stringToSet);
-    _originalUTF16String = new unsigned short int [newStringLenght + 1];
-    memset(_originalUTF16String, 0, (newStringLenght + 1) * 2);
-    memcpy(_originalUTF16String, stringToSet, (newStringLenght * 2));
-    _originalUTF16String[newStringLenght] = 0;
-
-    return true;
-}
-
-bool Label::setCurrentString(unsigned short *stringToSet)
-{
-    // set the new string
-    if (_currentUTF16String)
-    {
-        delete [] _currentUTF16String;
-    }
-
-    _currentUTF16String  = stringToSet;
-    computeStringNumLines();
-
-    // compute the advances
-    if (_fontAtlas)
-    {
-        computeHorizontalKernings(stringToSet);
-    }
-    return true;
-}
-
 void Label::updateQuads()
 {
    int index;
@ -959,9 +926,17 @@ void Label::setFontDefinition(const FontDefinition& textDefinition)

 void Label::updateContent()
 {
-    auto utf16String = cc_utf8_to_utf16(_originalUTF8String.c_str());
-    setCurrentString(utf16String);
-    setOriginalString(utf16String);
+    std::u16string utf16String;
+    if (StringUtils::UTF8ToUTF16(_originalUTF8String, utf16String))
+    {
+        _currentUTF16String  = utf16String;
+    }
+
+    computeStringNumLines();
+    if (_fontAtlas)
+    {
+        computeHorizontalKernings(_currentUTF16String);
+    }

    if (_textSprite)
    {
@ -1036,7 +1011,7 @@ void Label::updateFont()
    }

    _contentDirty = true;
-    _fontDirty = false;
+    _systemFontDirty = false;
 }

 void Label::drawTextSprite(Renderer *renderer, bool parentTransformUpdated)
@ -1075,7 +1050,7 @@ void Label::visit(Renderer *renderer, const Matrix &parentTransform, bool parent
    {
        return;
    }
-    if (_fontDirty)
+    if (_systemFontDirty)
    {
        updateFont();
    }
@ -1136,7 +1111,7 @@ void Label::setSystemFontName(const std::string& systemFont)
    if (systemFont != _systemFont)
    {
        _systemFont = systemFont;
-        _fontDirty = true;
+        _systemFontDirty = true;
    }
 }

@ -1145,16 +1120,15 @@ void Label::setSystemFontSize(float fontSize)
    if (_systemFontSize != fontSize)
    {
        _systemFontSize = fontSize;
-        _fontDirty = true;
+        _systemFontDirty = true;
    }
 }

 ///// PROTOCOL STUFF
 Sprite * Label::getLetter(int letterIndex)
 {
-    if (_fontDirty)
+    if (_systemFontDirty || _currentLabelType == LabelType::STRING_TEXTURE)
    {
-        updateFont();
        return nullptr;
    }

@ -1203,15 +1177,15 @@ void Label::computeStringNumLines()
 {
    int quantityOfLines = 1;

-    int stringLen = _currentUTF16String ? cc_wcslen(_currentUTF16String) : -1;
-    if (stringLen < 1)
+    if (_currentUTF16String.empty())
    {
-        _currNumLines = stringLen;
+        _currNumLines = 0;
        return;
    }

    // count number of lines
-    for (int i = 0; i < stringLen - 1; ++i)
+    size_t stringLen = _currentUTF16String.length();
+    for (size_t i = 0; i < stringLen-1; ++i)
    {
        if (_currentUTF16String[i] == '\n')
        {
@ -1224,7 +1198,7 @@ void Label::computeStringNumLines()

 int Label::getStringLength() const
 {
-    return _currentUTF16String ? cc_wcslen(_currentUTF16String) : (int)_originalUTF8String.length();
+    return static_cast<int>(_currentUTF16String.length());
 }

 // RGBA protocol
@ -1323,12 +1297,14 @@ void Label::updateColor()

 std::string Label::getDescription() const
 {
-    return StringUtils::format("<Label | Tag = %d, Label = '%s'>", _tag, cc_utf16_to_utf8(_currentUTF16String,-1,nullptr,nullptr));
+    std::string utf8str;
+    StringUtils::UTF16ToUTF8(_currentUTF16String, utf8str);
+    return StringUtils::format("<Label | Tag = %d, Label = '%s'>", _tag, utf8str.c_str());
 }

 const Size& Label::getContentSize() const
 {
-    if (_fontDirty)
+    if (_systemFontDirty)
    {
        const_cast<Label*>(this)->updateFont();
    }
--- a/cocos/2d/CCLabel.h
+++ b/cocos/2d/CCLabel.h
@ -286,9 +286,8 @@ protected:
    
    virtual void alignText();
    
-    bool computeHorizontalKernings(unsigned short int *stringToRender);
-    bool setCurrentString(unsigned short *stringToSet);
-    bool setOriginalString(unsigned short *stringToSet);
+    bool computeHorizontalKernings(const std::u16string& stringToRender);
+
    void computeStringNumLines();

    void updateQuads();
@ -311,7 +310,7 @@ protected:
    bool _isOpacityModifyRGB;
    bool _contentDirty;

-    bool _fontDirty;
+    bool _systemFontDirty;
    std::string _systemFont;
    float         _systemFontSize;
    LabelType _currentLabelType;
@ -344,8 +343,7 @@ protected:
    TextVAlignment _vAlignment;

    int           _currNumLines;
-    unsigned short int * _currentUTF16String;
-    unsigned short int * _originalUTF16String;
+    std::u16string _currentUTF16String;
    std::string          _originalUTF8String;

    float _fontScale;
--- a/cocos/2d/CCLabelTextFormatter.cpp
+++ b/cocos/2d/CCLabelTextFormatter.cpp
@ -30,21 +30,17 @@
 #include "base/CCDirector.h"
 #include "2d/CCLabel.h"

-using namespace std;
-
 NS_CC_BEGIN

 bool LabelTextFormatter::multilineText(Label *theLabel)
 {
-    //int strLen = theLabel->getStringLength();
    auto limit = theLabel->_limitShowCount;
-
    auto strWhole = theLabel->_currentUTF16String;

-    vector<unsigned short> multiline_string;
+    std::vector<char16_t> multiline_string;
    multiline_string.reserve( limit );

-    vector<unsigned short> last_word;
+    std::vector<char16_t> last_word;
    last_word.reserve( 25 );

    bool   isStartOfLine  = false, isStartOfWord = false;
@ -70,7 +66,7 @@ bool LabelTextFormatter::multilineText(Label *theLabel)
            tIndex = j+skip+justSkipped;
            if (strWhole[tIndex-1] == '\n')
            {
-                cc_utf8_trim_ws(&last_word);
+                StringUtils::trimUTF16Vector(last_word);

                last_word.push_back('\n');
                multiline_string.insert(multiline_string.end(), last_word.begin(), last_word.end());
@ -93,7 +89,7 @@ bool LabelTextFormatter::multilineText(Label *theLabel)
        if (tIndex >= limit)
            break;

-        unsigned short character = strWhole[tIndex];
+        char16_t character = strWhole[tIndex];

        if (!isStartOfWord)
        {
@ -109,15 +105,15 @@ bool LabelTextFormatter::multilineText(Label *theLabel)
        
        // 1) Whitespace.
        // 2) This character is non-CJK, but the last character is CJK
-        bool isspace = isspace_unicode(character);
+        bool isspace = StringUtils::isUnicodeSpace(character);
        bool isCJK = false;
        if(!isspace)
        {
-            isCJK = iscjk_unicode(character);
+            isCJK = StringUtils::isCJKUnicode(character);
        }

        if (isspace ||
-            (!last_word.empty() && iscjk_unicode(last_word.back()) && !isCJK))
+            (!last_word.empty() && StringUtils::isCJKUnicode(last_word.back()) && !isCJK))
        {
            // if current character is white space, put it into the current word
            if (isspace) last_word.push_back(character);
@ -139,9 +135,9 @@ bool LabelTextFormatter::multilineText(Label *theLabel)
            {
                last_word.push_back(character);
                
-                int found = cc_utf8_find_last_not_char(multiline_string, ' ');
+                int found = StringUtils::getIndexOfLastNotChar16(multiline_string, ' ');
                if (found != -1)
-                    cc_utf8_trim_ws(&multiline_string);
+                    StringUtils::trimUTF16Vector(multiline_string);
                else
                    multiline_string.clear();

@ -153,7 +149,7 @@ bool LabelTextFormatter::multilineText(Label *theLabel)
            }
            else
            {
-                cc_utf8_trim_ws(&last_word);
+                StringUtils::trimUTF16Vector(last_word);

                last_word.push_back('\n');
                
@ -175,16 +171,11 @@ bool LabelTextFormatter::multilineText(Label *theLabel)

    multiline_string.insert(multiline_string.end(), last_word.begin(), last_word.end());

-    size_t size = multiline_string.size();
-    unsigned short* strNew = new unsigned short[size + 1];
-
-    for (size_t j = 0; j < size; ++j)
-    {
-        strNew[j] = multiline_string[j];
-    }
-
-    strNew[size] = 0;
-    theLabel->setCurrentString(strNew);
+    std::u16string strNew(multiline_string.begin(), multiline_string.end());
+    
+    theLabel->_currentUTF16String = strNew;
+    theLabel->computeStringNumLines();
+    theLabel->computeHorizontalKernings(theLabel->_currentUTF16String);

    return true;
 }
@ -194,8 +185,8 @@ bool LabelTextFormatter::alignText(Label *theLabel)
    int i = 0;
    
    int lineNumber = 0;
-    int strLen = cc_wcslen(theLabel->_currentUTF16String);
-    vector<unsigned short> lastLine;
+    int strLen = static_cast<int>(theLabel->_currentUTF16String.length());
+    std::vector<char16_t> lastLine;
    auto strWhole = theLabel->_currentUTF16String;

    if (theLabel->_labelWidth > theLabel->_contentSize.width)
@ -205,7 +196,7 @@ bool LabelTextFormatter::alignText(Label *theLabel)

    for (int ctr = 0; ctr <= strLen; ++ctr)
    { 
-        unsigned short currentChar = strWhole[ctr];
+        char16_t currentChar = strWhole[ctr];

        if (currentChar == '\n' || currentChar == 0)
        {
@ -334,7 +325,7 @@ bool LabelTextFormatter::createStringSprites(Label *theLabel)
    
    for (unsigned int i = 0; i < stringLen; i++)
    {
-        unsigned short c    = strWhole[i];
+        char16_t c    = strWhole[i];
        if (fontAtlas->getLetterDefinitionForChar(c, tempDefinition))
        {
            charXOffset         = tempDefinition.offsetX;
--- a/cocos/2d/CMakeLists.txt
+++ b/cocos/2d/CMakeLists.txt
@ -26,6 +26,10 @@ set(COCOS_2D_PLATFORM_SRC

 endif()

+include_directories(
+  ../external/ConvertUTF
+)
+

 set(COCOS_2D_SRC
  2d/ccFPSImages.c
@ -110,5 +114,7 @@ set(COCOS_2D_SRC
  2d/platform/CCFileUtils.cpp
  2d/platform/CCImage.cpp
  ../external/edtaa3func/edtaa3func.cpp
+  ../external/ConvertUTF/ConvertUTFWrapper.cpp
+  ../external/ConvertUTF/ConvertUTF.c
 )

--- a/cocos/2d/ccUTF8.cpp
+++ b/cocos/2d/ccUTF8.cpp
@ -1,127 +1,36 @@
-/* 
- * This file uses some implementations of gutf8.c in glib.
- *
- * gutf8.c - Operations on UTF-8 strings.
- *
- * Copyright (C) 1999      Tom Tromey
- * Copyright (C) 2000      Red Hat, Inc.
- * Copyright (c) 2013-2014 Chukong Technologies Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- */
+/****************************************************************************
+ Copyright (c) 2014 cocos2d-x.org
+ Copyright (c) 2014 Chukong Technologies Inc.
+
+ http://www.cocos2d-x.org
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+ ****************************************************************************/

 #include "ccUTF8.h"
 #include "2d/platform/CCCommon.h"
 #include "base/CCConsole.h"
+#include "ConvertUTF.h"

 NS_CC_BEGIN

-int cc_wcslen(const unsigned short* str)
-{
-    int i=0;
-    while(*str++) i++;
-    return i;
-}
-
-/* Code from GLIB gutf8.c starts here. */
-
-#define UTF8_COMPUTE(Char, Mask, Len)        \
-if (Char < 128)                \
-{                        \
-Len = 1;                    \
-Mask = 0x7f;                \
-}                        \
-else if ((Char & 0xe0) == 0xc0)        \
-{                        \
-Len = 2;                    \
-Mask = 0x1f;                \
-}                        \
-else if ((Char & 0xf0) == 0xe0)        \
-{                        \
-Len = 3;                    \
-Mask = 0x0f;                \
-}                        \
-else if ((Char & 0xf8) == 0xf0)        \
-{                        \
-Len = 4;                    \
-Mask = 0x07;                \
-}                        \
-else if ((Char & 0xfc) == 0xf8)        \
-{                        \
-Len = 5;                    \
-Mask = 0x03;                \
-}                        \
-else if ((Char & 0xfe) == 0xfc)        \
-{                        \
-Len = 6;                    \
-Mask = 0x01;                \
-}                        \
-else                        \
-Len = -1;
-
-#define UTF8_LENGTH(Char)            \
-((Char) < 0x80 ? 1 :                \
-((Char) < 0x800 ? 2 :            \
-((Char) < 0x10000 ? 3 :            \
-((Char) < 0x200000 ? 4 :            \
-((Char) < 0x4000000 ? 5 : 6)))))
-
-
-#define UTF8_GET(Result, Chars, Count, Mask, Len)    \
-(Result) = (Chars)[0] & (Mask);            \
-for ((Count) = 1; (Count) < (Len); ++(Count))        \
-{                            \
-if (((Chars)[(Count)] & 0xc0) != 0x80)        \
-{                        \
-(Result) = -1;                \
-break;                    \
-}                        \
-(Result) <<= 6;                    \
-(Result) |= ((Chars)[(Count)] & 0x3f);        \
-}
-
-#define UNICODE_VALID(Char)            \
-((Char) < 0x110000 &&                \
-(((Char) & 0xFFFFF800) != 0xD800) &&        \
-((Char) < 0xFDD0 || (Char) > 0xFDEF) &&    \
-((Char) & 0xFFFE) != 0xFFFE)
-
-
-static const char utf8_skip_data[256] = {
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    2, 2, 2, 2, 2, 2, 2,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5,
-    5, 5, 5, 6, 6, 1, 1
-};
-
-static const char *const g_utf8_skip = utf8_skip_data;
-
-#define cc_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(unsigned char *)(p)])
+namespace StringUtils {

 /*
 * @str:    the string to search through.
@ -129,14 +38,14 @@ static const char *const g_utf8_skip = utf8_skip_data;
 *
 * Return value: the index of the last character that is not c.
 * */
-unsigned int cc_utf8_find_last_not_char(std::vector<unsigned short> str, unsigned short c)
+unsigned int getIndexOfLastNotChar16(const std::vector<char16_t>& str, char16_t c)
 {
    int len = static_cast<int>(str.size());
-    
+
    int i = len - 1;
    for (; i >= 0; --i)
        if (str[i] != c) return i;
-    
+
    return i;
 }

@ -148,13 +57,13 @@ unsigned int cc_utf8_find_last_not_char(std::vector<unsigned short> str, unsigne
 *
 * Return value: the trimmed string.
 * */
-static void cc_utf8_trim_from(std::vector<unsigned short>* str, int index)
+static void trimUTF16VectorFromIndex(std::vector<char16_t>& str, int index)
 {
-    int size = static_cast<int>(str->size());
+    int size = static_cast<int>(str.size());
    if (index >= size || index < 0)
        return;
-    
-    str->erase(str->begin() + index, str->begin() + size);
+
+    str.erase(str.begin() + index, str.begin() + size);
 }

 /*
@ -164,14 +73,14 @@ static void cc_utf8_trim_from(std::vector<unsigned short>* str, int index)
 *
 * Return value: weather the character is a whitespace character.
 * */
-bool isspace_unicode(unsigned short ch)
+bool isUnicodeSpace(char16_t ch)
 {
    return  (ch >= 0x0009 && ch <= 0x000D) || ch == 0x0020 || ch == 0x0085 || ch == 0x00A0 || ch == 0x1680
    || (ch >= 0x2000 && ch <= 0x200A) || ch == 0x2028 || ch == 0x2029 || ch == 0x202F
    ||  ch == 0x205F || ch == 0x3000;
 }

-bool iscjk_unicode(unsigned short ch)
+bool isCJKUnicode(char16_t ch)
 {
    return (ch >= 0x4E00 && ch <= 0x9FBF)   // CJK Unified Ideographs
        || (ch >= 0x2E80 && ch <= 0x2FDF)   // CJK Radicals Supplement & Kangxi Radicals
@ -183,136 +92,147 @@ bool iscjk_unicode(unsigned short ch)
        || (ch >= 0x31C0 && ch <= 0x4DFF);  // Other exiensions
 }

-void cc_utf8_trim_ws(std::vector<unsigned short>* str)
+void trimUTF16Vector(std::vector<char16_t>& str)
 {
-    int len = static_cast<int>(str->size());
-    
+    int len = static_cast<int>(str.size());
+
    if ( len <= 0 )
        return;
-    
+
    int last_index = len - 1;
-    
+
    // Only start trimming if the last character is whitespace..
-    if (isspace_unicode((*str)[last_index]))
+    if (isUnicodeSpace(str[last_index]))
    {
        for (int i = last_index - 1; i >= 0; --i)
        {
-            if (isspace_unicode((*str)[i]))
+            if (isUnicodeSpace(str[i]))
                last_index = i;
            else
                break;
        }
-        
-        cc_utf8_trim_from(str, last_index);
+
+        trimUTF16VectorFromIndex(str, last_index);
    }
 }

-/*
- * cc_utf8_strlen:
- * @p: pointer to the start of a UTF-8 encoded string.
- * @max: the maximum number of bytes to examine. If @max
- *       is less than 0, then the string is assumed to be
- *       null-terminated. If @max is 0, @p will not be examined and
- *       may be %nullptr.
- *
- * Returns the length of the string in characters.
- *
- * Return value: the length of the string in characters
- **/
-long
-cc_utf8_strlen (const char * p, int max)
+bool UTF8ToUTF16(const std::string& utf8, std::u16string& outUtf16)
 {
-    long len = 0;
-    const char *start = p;
-    
-    if (!(p != nullptr || max == 0))
+    if (utf8.empty())
    {
-        return 0;
+        outUtf16.clear();
+        return true;
    }
+
+    bool ret = false;
    
-    if (max < 0)
+    const size_t utf16Bytes = (utf8.length()+1) * sizeof(char16_t);
+    char16_t* utf16 = (char16_t*)malloc(utf16Bytes);
+    memset(utf16, 0, utf16Bytes);
+
+    char* utf16ptr = reinterpret_cast<char*>(utf16);
+    const UTF8* error = nullptr;
+
+    if (llvm::ConvertUTF8toWide(2, utf8, utf16ptr, error))
    {
-        while (*p)
-        {
-            p = cc_utf8_next_char (p);
-            ++len;
-        }
+        outUtf16 = utf16;
+        ret = true;
    }
-    else
-    {
-        if (max == 0 || !*p)
-            return 0;
-        
-        p = cc_utf8_next_char (p);
-        
-        while (p - start < max && *p)
-        {
-            ++len;
-            p = cc_utf8_next_char (p);
-        }
-        
-        /* only do the last len increment if we got a complete
-         * char (don't count partial chars)
-         */
-        if (p - start == max)
-            ++len;
-    }
-    
-    return len;
+
+    free(utf16);
+
+    return ret;
 }

-/*
- * g_utf8_get_char:
- * @p: a pointer to Unicode character encoded as UTF-8
- *
- * Converts a sequence of bytes encoded as UTF-8 to a Unicode character.
- * If @p does not point to a valid UTF-8 encoded character, results are
- * undefined. If you are not sure that the bytes are complete
- * valid Unicode characters, you should use g_utf8_get_char_validated()
- * instead.
- *
- * Return value: the resulting character
- **/
-static unsigned int
-cc_utf8_get_char (const char * p)
+bool UTF16ToUTF8(const std::u16string& utf16, std::string& outUtf8)
 {
-    int i, mask = 0, len;
-    unsigned int result;
-    unsigned char c = (unsigned char) *p;
-    
-    UTF8_COMPUTE (c, mask, len);
-    if (len == -1)
-        return (unsigned int) - 1;
-    UTF8_GET (result, p, i, mask, len);
-    
-    return result;
+    if (utf16.empty())
+    {
+        outUtf8.clear();
+        return true;
+    }
+
+    return llvm::convertUTF16ToUTF8String(utf16, outUtf8);
+}
+
+std::vector<char16_t> getChar16VectorFromUTF16String(const std::u16string& utf16)
+{
+    std::vector<char16_t> ret;
+    size_t len = utf16.length();
+    ret.reserve(len);
+    for (size_t i = 0; i < len; ++i)
+    {
+        ret.push_back(utf16[i]);
+    }
+    return ret;
+}
+
+long getCharacterCountInUTF8String(const std::string& utf8)
+{
+    return getUTF8StringLength((const UTF8*)utf8.c_str());
+}
+
+} //namespace StringUtils {
+
+
+int cc_wcslen(const unsigned short* str)
+{
+    if (str == nullptr)
+        return -1;
+    int i=0;
+    while(*str++) i++;
+    return i;
+}
+
+void cc_utf8_trim_ws(std::vector<unsigned short>* str)
+{
+    if (str == nullptr)
+        return;
+    // unsigned short and char16_t are both 2 bytes
+    std::vector<char16_t>* ret = reinterpret_cast<std::vector<char16_t>*>(str);
+    StringUtils::trimUTF16Vector(*ret);
+}
+
+bool isspace_unicode(unsigned short ch)
+{
+    return StringUtils::isUnicodeSpace(ch);
 }


-unsigned short* cc_utf8_to_utf16(const char* str_old, int length/* = -1 */, int* rUtf16Size/* = nullptr */)
+bool iscjk_unicode(unsigned short ch)
 {
-    long len = cc_utf8_strlen(str_old, length);
-    if (rUtf16Size != nullptr) {
-        *rUtf16Size = static_cast<int>(len);
-    }
-    
-    unsigned short* str_new = new unsigned short[len + 1];
-    str_new[len] = 0;
-    
-    for (int i = 0; i < len; ++i)
+    return StringUtils::isCJKUnicode(ch);
+}
+
+
+long cc_utf8_strlen (const char * p, int max)
+{
+    CC_UNUSED_PARAM(max);
+    if (p == nullptr)
+        return -1;
+    return StringUtils::getCharacterCountInUTF8String(p);
+}
+
+unsigned int cc_utf8_find_last_not_char(const std::vector<unsigned short>& str, unsigned short c)
+{
+    std::vector<char16_t> char16Vector;
+    for (const auto& e : str)
    {
-        str_new[i] = cc_utf8_get_char(str_old);
-        str_old = cc_utf8_next_char(str_old);
+        char16Vector.push_back(e);
    }
    
-    return str_new;
+    return StringUtils::getIndexOfLastNotChar16(char16Vector, c);
 }

 std::vector<unsigned short> cc_utf16_vec_from_utf16_str(const unsigned short* str)
 {
-    int len = cc_wcslen(str);
    std::vector<unsigned short> str_new;
    
+    if (str == nullptr)
+        return str_new;
+    
+    int len = cc_wcslen(str);
+    
    for (int i = 0; i < len; ++i)
    {
        str_new.push_back(str[i]);
@ -320,209 +240,60 @@ std::vector<unsigned short> cc_utf16_vec_from_utf16_str(const unsigned short* st
    return str_new;
 }

-/**
- * cc_unichar_to_utf8:
- * @c: a ISO10646 character code
- * @outbuf: output buffer, must have at least 6 bytes of space.
- *       If %nullptr, the length will be computed and returned
- *       and nothing will be written to @outbuf.
- *
- * Converts a single character to UTF-8.
- *
- * Return value: number of bytes written
- **/
-int
-cc_unichar_to_utf8 (unsigned int c,
-                   char   *outbuf)
+unsigned short* cc_utf8_to_utf16(const char* str_old, int length/* = -1*/, int* rUtf16Size/* = nullptr*/)
 {
-    int len = 0;
-    int first;
-    int i;
+    if (str_old == nullptr)
+        return nullptr;
    
-    if (c < 0x80)
-    {
-        first = 0;
-        len = 1;
-    }
-    else if (c < 0x800)
-    {
-        first = 0xc0;
-        len = 2;
-    }
-    else if (c < 0x10000)
-    {
-        first = 0xe0;
-        len = 3;
-    }
-    else if (c < 0x200000)
-    {
-        first = 0xf0;
-        len = 4;
-    }
-    else if (c < 0x4000000)
-    {
-        first = 0xf8;
-        len = 5;
-    }
-    else
-    {
-        first = 0xfc;
-        len = 6;
-    }
+    unsigned short* ret = nullptr;
    
-    if (outbuf)
+    std::u16string outUtf16;
+    bool succeed = StringUtils::UTF8ToUTF16(str_old, outUtf16);
+    
+    if (succeed)
    {
-        for (i = len - 1; i > 0; --i)
+        ret = new unsigned short[outUtf16.length() + 1];
+        ret[outUtf16.length()] = 0;
+        memcpy(ret, outUtf16.data(), outUtf16.length() * sizeof(unsigned short));
+        if (rUtf16Size)
        {
-            outbuf[i] = (c & 0x3f) | 0x80;
-            c >>= 6;
+            *rUtf16Size = static_cast<int>(outUtf16.length());
        }
-        outbuf[0] = c | first;
    }
    
-    return len;
+    return ret;
 }

-#define SURROGATE_VALUE(h,l) (((h) - 0xd800) * 0x400 + (l) - 0xdc00 + 0x10000)
-
-/**
- * cc_utf16_to_utf8:
- * @str: a UTF-16 encoded string
- * @len: the maximum length of @str to use. If @len < 0, then
- *       the string is terminated with a 0 character.
- * @items_read: location to store number of words read, or %nullptr.
- *              If %nullptr, then %G_CONVERT_ERROR_PARTIAL_INPUT will be
- *              returned in case @str contains a trailing partial
- *              character. If an error occurs then the index of the
- *              invalid input is stored here.
- * @items_written: location to store number of bytes written, or %nullptr.
- *                 The value stored here does not include the trailing
- *                 0 byte.
- * @error: location to store the error occuring, or %nullptr to ignore
- *         errors. Any of the errors in #GConvertError other than
- *         %G_CONVERT_ERROR_NO_CONVERSION may occur.
- *
- * Convert a string from UTF-16 to UTF-8. The result will be
- * terminated with a 0 byte.
- *
- * Return value: a pointer to a newly allocated UTF-8 string.
- *               This value must be freed with free(). If an
- *               error occurs, %nullptr will be returned and
- *               @error set.
- **/
-char *
-cc_utf16_to_utf8 (const unsigned short  *str,
-                 int             len,
-                 long            *items_read,
-                 long            *items_written)
+char * cc_utf16_to_utf8 (const unsigned short  *str,
+                  int             len,
+                  long            *items_read,
+                  long            *items_written)
 {
-    /* This function and g_utf16_to_ucs4 are almost exactly identical - The lines that differ
-     * are marked.
-     */
-    const unsigned short *in;
-    char *out;
-    char *result = nullptr;
-    int n_bytes;
-    unsigned int high_surrogate;
+    if (str == nullptr)
+        return nullptr;
    
-    if (str == 0) return nullptr;
    
-    n_bytes = 0;
-    in = str;
-    high_surrogate = 0;
-    while ((len < 0 || in - str < len) && *in)
+    std::u16string utf16;
+    int utf16Len = len < 0 ? cc_wcslen(str) : len;
+    
+    for (int i = 0; i < utf16Len; ++i)
    {
-        unsigned short c = *in;
-        unsigned int wc;
-        
-        if (c >= 0xdc00 && c < 0xe000) /* low surrogate */
-        {
-            if (high_surrogate)
-            {
-                wc = SURROGATE_VALUE (high_surrogate, c);
-                high_surrogate = 0;
-            }
-            else
-            {
-                CCLOGERROR("Invalid sequence in conversion input");
-                goto err_out;
-            }
-        }
-        else
-        {
-            if (high_surrogate)
-            {
-                CCLOGERROR("Invalid sequence in conversion input");
-                goto err_out;
-            }
-            
-            if (c >= 0xd800 && c < 0xdc00) /* high surrogate */
-            {
-                high_surrogate = c;
-                goto next1;
-            }
-            else
-                wc = c;
-        }
-        
-        /********** DIFFERENT for UTF8/UCS4 **********/
-        n_bytes += UTF8_LENGTH (wc);
-        
-    next1:
-        in++;
+        utf16.push_back(str[i]);
    }
    
-    if (high_surrogate && !items_read)
-    {        
-        CCLOGERROR("Partial character sequence at end of input");
-        goto err_out;
-    }
+    char* ret = nullptr;
+    std::string outUtf8;
+    bool succeed = StringUtils::UTF16ToUTF8(utf16, outUtf8);
    
-    /* At this point, everything is valid, and we just need to convert
-     */
-    /********** DIFFERENT for UTF8/UCS4 **********/
-    result = new char[n_bytes + 1];
-    
-    high_surrogate = 0;
-    out = result;
-    in = str;
-    while (out < result + n_bytes)
+    if (succeed)
    {
-        unsigned short c = *in;
-        unsigned int wc;
-        
-        if (c >= 0xdc00 && c < 0xe000) /* low surrogate */
-        {
-            wc = SURROGATE_VALUE (high_surrogate, c);
-            high_surrogate = 0;
-        }
-        else if (c >= 0xd800 && c < 0xdc00) /* high surrogate */
-        {
-            high_surrogate = c;
-            goto next2;
-        }
-        else
-            wc = c;
-        
-        /********** DIFFERENT for UTF8/UCS4 **********/
-        out += cc_unichar_to_utf8 (wc, out);
-        
-    next2:
-        in++;
+        ret = new char[outUtf8.length() + 1];
+        ret[outUtf8.length()] = '\0';
+        memcpy(ret, outUtf8.data(), outUtf8.length());
    }
    
-    /********** DIFFERENT for UTF8/UCS4 **********/
-    *out = '\0';
-    
-    if (items_written)
-    /********** DIFFERENT for UTF8/UCS4 **********/
-        *items_written = out - result;
-    
-err_out:
-    if (items_read)
-        *items_read = in - str;
-    
-    return result;
+    return ret;
 }

+
 NS_CC_END
--- a/cocos/2d/ccUTF8.h
+++ b/cocos/2d/ccUTF8.h
@ -1,113 +1,217 @@
-/*
- * Copyright (C) 1999      Tom Tromey
- * Copyright (C) 2000      Red Hat, Inc.
- * Copyright (c) 2013-2014 Chukong Technologies Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- */
+/****************************************************************************
+ Copyright (c) 2014 cocos2d-x.org
+ Copyright (c) 2014 Chukong Technologies Inc.
+
+ http://www.cocos2d-x.org
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+ ****************************************************************************/

 #ifndef __cocos2dx__ccUTF8__
 #define __cocos2dx__ccUTF8__

 #include "base/CCPlatformMacros.h"
 #include <vector>
+#include <string>

 NS_CC_BEGIN

-CC_DLL int cc_wcslen(const unsigned short* str);
+namespace StringUtils {

-CC_DLL void cc_utf8_trim_ws(std::vector<unsigned short>* str);
+/**
+ *  @brief Converts utf8 string to utf16 string
+ *  @param utf8 The utf8 string to be converted
+ *  @param outUtf16 The output utf16 string
+ *  @return true if succeed, otherwise false
+ *  @note Please check the return value before using \p outUtf16
+ *  e.g.
+ *  @code
+ *    std::u16string utf16;
+ *    bool ret = StringUtils::UTF8ToUTF16("你好hello", utf16);
+ *    if (ret) {
+ *        do_some_thing_with_utf16(utf16);
+ *    }
+ *  @endcode
+ */
+CC_DLL bool UTF8ToUTF16(const std::string& utf8, std::u16string& outUtf16);
+
+/**
+ *  @brief Converts utf16 string to utf8 string
+ *  @param utf16 The utf16 string to be converted
+ *  @param outUtf8 The output utf8 string
+ *  @return true if succeed, otherwise false
+ *  @note Please check the return value before using \p outUtf8
+ *  e.g.
+ *  @code
+ *    std::string utf8;
+ *    bool ret = StringUtils::UTF16ToUTF8(u"\u4f60\u597d", utf16);
+ *    if (ret) {
+ *        do_some_thing_with_utf8(utf8);
+ *    }
+ *  @endcode
+ */
+CC_DLL bool UTF16ToUTF8(const std::u16string& utf16, std::string& outUtf8);
+
+/**
+ *  @brief Trims the unicode spaces at the end of char16_t vector
+ */
+CC_DLL void trimUTF16Vector(std::vector<char16_t>& str);
+
+/**
+ *  @brief Whether the character is a whitespace character.
+ *
+ *  @param ch    the unicode character
+ *  @returns     whether the character is a white space character.
+ *
+ *  @see http://en.wikipedia.org/wiki/Whitespace_character#Unicode
+ *
+ */
+CC_DLL bool isUnicodeSpace(char16_t ch);
+
+/**
+ *  @brief Whether the character is a Chinese/Japanese/Korean character.
+ *
+ *  @param ch    the unicode character
+ *  @returns     whether the character is a Chinese character.
+ *
+ *  @see http://www.searchtb.com/2012/04/chinese_encode.html
+ *  @see http://tieba.baidu.com/p/748765987
+ *
+ */
+CC_DLL bool isCJKUnicode(char16_t ch);
+
+/**
+ *  @brief Returns the length of the string in characters.
+ *
+ *  @param utf8 an UTF-8 encoded string.
+ *  @returns the length of the string in characters
+ */
+CC_DLL long getCharacterCountInUTF8String(const std::string& utf8);
+
+/**
+ *  @brief Gets the index of the last character that is not equal to the character given.
+ *
+ *  @param str   the string to be searched.
+ *  @param c     the character to be searched for.
+ *
+ *  @returns the index of the last character that is not \p c.
+ *
+ */
+CC_DLL unsigned int getIndexOfLastNotChar16(const std::vector<char16_t>& str, char16_t c);
+
+/**
+ *  @brief Gets char16_t vector from a given utf16 string
+ */
+CC_DLL std::vector<char16_t> getChar16VectorFromUTF16String(const std::u16string& utf16);
+
+} // namespace StringUtils {
+
+/**
+ * Returns the character count in UTF16 string
+ * @param str pointer to the start of a UTF-16 encoded string. It must be an NULL terminal UTF8 string.
+ * @deprecated Please use c++11 `std::u16string::length` instead, don't use `unsigned short*` directly
+ */
+CC_DEPRECATED_ATTRIBUTE CC_DLL int cc_wcslen(const unsigned short* str);
+
+/** Trims the space characters at the end of UTF8 string 
+ *  @deprecated Please use `StringUtils::trimUTF16Vector` instead
+ */
+
+CC_DEPRECATED_ATTRIBUTE void cc_utf8_trim_ws(std::vector<unsigned short>* str);

 /**
 * Whether the character is a whitespace character.
 *
 * @param ch    the unicode character
 * @returns     whether the character is a white space character.
+ * @deprecated Please use `StringUtils::isUnicodeSpace` instead
 *
 * @see http://en.wikipedia.org/wiki/Whitespace_character#Unicode
 * */
-CC_DLL bool isspace_unicode(unsigned short ch);
+CC_DEPRECATED_ATTRIBUTE bool isspace_unicode(unsigned short ch);

 /**
 * Whether the character is a Chinese/Japanese/Korean character.
 *
 * @param ch    the unicode character
 * @returns     whether the character is a Chinese character.
+ * @deprecated Please use `StringUtils::isCJKUnicode` instead
 *
 * @see http://www.searchtb.com/2012/04/chinese_encode.html
 * @see http://tieba.baidu.com/p/748765987
 * */
-CC_DLL bool iscjk_unicode(unsigned short ch);
+CC_DEPRECATED_ATTRIBUTE bool iscjk_unicode(unsigned short ch);

 /**
 * Returns the length of the string in characters.
 *
- * @param p     pointer to the start of a UTF-8 encoded string.
- * @param max   the maximum number of bytes to examine. If \p max is less than
- *              0, then the string is assumed to be null-terminated. If \p max
- *              is 0, \p p will not be examined and my be %nullptr.
- *
+ * @param p pointer to the start of a UTF-8 encoded string. It must be an NULL terminal UTF8 string.
+ * @param max Not used from 3.1, just keep it for backward compatibility
+ * @deprecated Please use `StringUtils::getCharacterCountInUTF8String` instead
 * @returns the length of the string in characters
 **/
-CC_DLL long
-cc_utf8_strlen (const char * p, int max);
+CC_DEPRECATED_ATTRIBUTE long cc_utf8_strlen (const char * p, int max = -1);

 /**
 * Find the last character that is not equal to the character given.
 *
 * @param str   the string to be searched.
 * @param c     the character to be searched for.
- *
+ * @deprecated Please use `StringUtils::getIndexOfLastNotChar16` instead
 * @returns the index of the last character that is not \p c.
 * */
-CC_DLL unsigned int cc_utf8_find_last_not_char(std::vector<unsigned short> str, unsigned short c);
-
-CC_DLL std::vector<unsigned short> cc_utf16_vec_from_utf16_str(const unsigned short* str);
+CC_DEPRECATED_ATTRIBUTE unsigned int cc_utf8_find_last_not_char(const std::vector<unsigned short>& str, unsigned short c);

 /**
- * Creates a utf8 string from a cstring.
+ *  @brief Gets `unsigned short` vector from a given utf16 string
+ *  @deprecated Please use `StringUtils::getChar16VectorFromUTF16String` instead
+ */
+CC_DEPRECATED_ATTRIBUTE std::vector<unsigned short> cc_utf16_vec_from_utf16_str(const unsigned short* str);
+
+/**
+ * Creates an utf8 string from a c string. The result will be null terminated.
 *
- * @param str_old   pointer to the start of a C string.
- *
- * @returns the newly created utf8 string.
+ * @param str_old pointer to the start of a C string. It must be an NULL terminal UTF8 string.
+ * @param length  not used from 3.1, keep it just for backward compatibility
+ * @param rUtf16Size The character count in the return UTF16 string.
+ * @deprecated Please use `StringUtils::UTF8ToUTF16` instead
+ * @returns the newly created utf16 string, it must be released with `delete[]`,
+ *          If an error occurs, %NULL will be returned.
 * */
-CC_DLL unsigned short* cc_utf8_to_utf16(const char* str_old, int length = -1, int* rUtf16Size = nullptr);
+CC_DEPRECATED_ATTRIBUTE unsigned short* cc_utf8_to_utf16(const char* str_old, int length = -1, int* rUtf16Size = nullptr);

 /**
- * Convert a string from UTF-16 to UTF-8. The result will be null terminated.
- *
- * @param str   a UTF-16 encoded string
- * @param len   the maximum length of \p str to use. If \p len < 0, then the
- *              string is null terminated.
- * @param items_read    location to store number of words read, or %nullptr.
- *                      If %nullptr, then %G_CONVERT_ERROR_PARTIAL_INPUT will be
- *                      returned in case \p str contains a trailing partial
- *                      character. If an error occurs then the index of the
- *                      invalid input is stored here.
- * @param items_written location to store number of bytes written, or %nullptr.
- *                      The value stored here does not include the trailing null.
+ * Converts a string from UTF-16 to UTF-8. The result will be null terminated.
 *
+ * @param utf16 an UTF-16 encoded string, It must be an NULL terminal UTF16 string.
+ * @param len not used from 3.1, keep it just for backward compatibility
+ * @param items_read     not used from 3.1, keep it just for backward compatibility
+ * @param items_written  not used from 3.1, keep it just for backward compatibility
+ * @deprecated Please use `StringUtils::UTF16ToUTF8` instead
 * @returns a pointer to a newly allocated UTF-8 string. This value must be
- *          freed with free(). If an error occurs, %nullptr will be returned.
+ *          released with `delete[]`. If an error occurs, %NULL will be returned.
 **/
-CC_DLL char *
-cc_utf16_to_utf8 (const unsigned short  *str,
-                  int             len,
-                  long            *items_read,
-                  long            *items_written);
+CC_DEPRECATED_ATTRIBUTE char * cc_utf16_to_utf8 (const unsigned short  *str,
+                  int             len = -1,
+                  long            *items_read = nullptr,
+                  long            *items_written = nullptr);
+

 NS_CC_END

--- a/cocos/2d/cocos2d.vcxproj
+++ b/cocos/2d/cocos2d.vcxproj
@ -23,8 +23,8 @@
    <PlatformToolset Condition="'$(VisualStudioVersion)' == '10.0'">v100</PlatformToolset>
    <PlatformToolset Condition="'$(VisualStudioVersion)' == '11.0'">v110</PlatformToolset>
    <PlatformToolset Condition="'$(VisualStudioVersion)' == '11.0' and exists('$(MSBuildProgramFiles32)\Microsoft SDKs\Windows\v7.1A')">v110_xp</PlatformToolset>
-	<PlatformToolset Condition="'$(VisualStudioVersion)' == '12.0'">v120</PlatformToolset>
-	<PlatformToolset Condition="'$(VisualStudioVersion)' == '12.0' and exists('$(MSBuildProgramFiles32)\Microsoft SDKs\Windows\v7.1A')">v120_xp</PlatformToolset>	
+    <PlatformToolset Condition="'$(VisualStudioVersion)' == '12.0'">v120</PlatformToolset>
+    <PlatformToolset Condition="'$(VisualStudioVersion)' == '12.0' and exists('$(MSBuildProgramFiles32)\Microsoft SDKs\Windows\v7.1A')">v120_xp</PlatformToolset>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
    <ConfigurationType>StaticLibrary</ConfigurationType>
@ -32,8 +32,8 @@
    <PlatformToolset Condition="'$(VisualStudioVersion)' == '10.0'">v100</PlatformToolset>
    <PlatformToolset Condition="'$(VisualStudioVersion)' == '11.0'">v110</PlatformToolset>
    <PlatformToolset Condition="'$(VisualStudioVersion)' == '11.0' and exists('$(MSBuildProgramFiles32)\Microsoft SDKs\Windows\v7.1A')">v110_xp</PlatformToolset>
-	<PlatformToolset Condition="'$(VisualStudioVersion)' == '12.0'">v120</PlatformToolset>
-	<PlatformToolset Condition="'$(VisualStudioVersion)' == '12.0' and exists('$(MSBuildProgramFiles32)\Microsoft SDKs\Windows\v7.1A')">v120_xp</PlatformToolset>	
+    <PlatformToolset Condition="'$(VisualStudioVersion)' == '12.0'">v120</PlatformToolset>
+    <PlatformToolset Condition="'$(VisualStudioVersion)' == '12.0' and exists('$(MSBuildProgramFiles32)\Microsoft SDKs\Windows\v7.1A')">v120_xp</PlatformToolset>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
  <ImportGroup Label="ExtensionSettings">
@ -77,7 +77,7 @@
    </PreBuildEvent>
    <ClCompile>
      <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>$(MSBuildProgramFiles32)\Microsoft SDKs\Windows\v7.1A\include;$(EngineRoot)external\sqlite3\include;$(EngineRoot)external\unzip;$(EngineRoot)external\edtaa3func;$(EngineRoot)external\tinyxml2;$(EngineRoot)external\png\include\win32;$(EngineRoot)external\jpeg\include\win32;$(EngineRoot)external\tiff\include\win32;$(EngineRoot)external\webp\include\win32;$(EngineRoot)external\freetype2\include\win32;$(EngineRoot)external\win32-specific\icon\include;$(EngineRoot)external\win32-specific\zlib\include;$(EngineRoot)external\chipmunk\include\chipmunk;$(EngineRoot)external\xxhash;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>$(MSBuildProgramFiles32)\Microsoft SDKs\Windows\v7.1A\include;$(EngineRoot)external\sqlite3\include;$(EngineRoot)external\unzip;$(EngineRoot)external\edtaa3func;$(EngineRoot)external\tinyxml2;$(EngineRoot)external\png\include\win32;$(EngineRoot)external\jpeg\include\win32;$(EngineRoot)external\tiff\include\win32;$(EngineRoot)external\webp\include\win32;$(EngineRoot)external\freetype2\include\win32;$(EngineRoot)external\win32-specific\icon\include;$(EngineRoot)external\win32-specific\zlib\include;$(EngineRoot)external\chipmunk\include\chipmunk;$(EngineRoot)external\xxhash;$(EngineRoot)external\ConvertUTF;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
      <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_LIB;COCOS2DXWIN32_EXPORTS;GL_GLEXT_PROTOTYPES;COCOS2D_DEBUG=1;_CRT_SECURE_NO_WARNINGS;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <MinimalRebuild>false</MinimalRebuild>
      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
@ -125,7 +125,7 @@ xcopy /Y /Q "$(ProjectDir)..\..\external\win32-specific\gles\prebuilt\*.*" "$(Ou
      </Command>
    </PreBuildEvent>
    <ClCompile>
-      <AdditionalIncludeDirectories>$(MSBuildProgramFiles32)\Microsoft SDKs\Windows\v7.1A\include;$(EngineRoot)external\sqlite3\include;$(EngineRoot)external\unzip;$(EngineRoot)external\edtaa3func;$(EngineRoot)external\tinyxml2;$(EngineRoot)external\png\include\win32;$(EngineRoot)external\jpeg\include\win32;$(EngineRoot)external\tiff\include\win32;$(EngineRoot)external\webp\include\win32;$(EngineRoot)external\freetype2\include\win32;$(EngineRoot)external\win32-specific\icon\include;$(EngineRoot)external\win32-specific\zlib\include;$(EngineRoot)external\chipmunk\include\chipmunk;$(EngineRoot)external\xxhash;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>$(MSBuildProgramFiles32)\Microsoft SDKs\Windows\v7.1A\include;$(EngineRoot)external\sqlite3\include;$(EngineRoot)external\unzip;$(EngineRoot)external\edtaa3func;$(EngineRoot)external\tinyxml2;$(EngineRoot)external\png\include\win32;$(EngineRoot)external\jpeg\include\win32;$(EngineRoot)external\tiff\include\win32;$(EngineRoot)external\webp\include\win32;$(EngineRoot)external\freetype2\include\win32;$(EngineRoot)external\win32-specific\icon\include;$(EngineRoot)external\win32-specific\zlib\include;$(EngineRoot)external\chipmunk\include\chipmunk;$(EngineRoot)external\xxhash;$(EngineRoot)external\ConvertUTF;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
      <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_LIB;COCOS2DXWIN32_EXPORTS;GL_GLEXT_PROTOTYPES;_CRT_SECURE_NO_WARNINGS;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
      <PrecompiledHeader>
@ -168,6 +168,8 @@ xcopy /Y /Q "$(ProjectDir)..\..\external\win32-specific\gles\prebuilt\*.*" "$(Ou
    </PostBuildEvent>
  </ItemDefinitionGroup>
  <ItemGroup>
+    <ClCompile Include="..\..\external\ConvertUTF\ConvertUTF.c" />
+    <ClCompile Include="..\..\external\ConvertUTF\ConvertUTFWrapper.cpp" />
    <ClCompile Include="..\..\external\edtaa3func\edtaa3func.cpp" />
    <ClCompile Include="..\..\external\tinyxml2\tinyxml2.cpp" />
    <ClCompile Include="..\..\external\unzip\ioapi.cpp" />
@ -331,6 +333,7 @@ xcopy /Y /Q "$(ProjectDir)..\..\external\win32-specific\gles\prebuilt\*.*" "$(Ou
    <ClCompile Include="TGAlib.cpp" />
  </ItemGroup>
  <ItemGroup>
+    <ClInclude Include="..\..\external\ConvertUTF\ConvertUTF.h" />
    <ClInclude Include="..\..\external\edtaa3func\edtaa3func.h" />
    <ClInclude Include="..\..\external\tinyxml2\tinyxml2.h" />
    <ClInclude Include="..\..\external\unzip\ioapi.h" />
--- a/cocos/2d/cocos2d.vcxproj.filters
+++ b/cocos/2d/cocos2d.vcxproj.filters
@ -100,8 +100,8 @@
    <Filter Include="math">
      <UniqueIdentifier>{02a21a86-8f65-441b-ae13-11dec1c45ee5}</UniqueIdentifier>
    </Filter>
-    <Filter Include="renderer\shaders">
-      <UniqueIdentifier>{438e71df-3684-4619-9659-10e07ed6cd62}</UniqueIdentifier>
+    <Filter Include="ConvertUTF">
+      <UniqueIdentifier>{6c1e4a6b-c168-436b-aa63-0af7f4caebf9}</UniqueIdentifier>
    </Filter>
  </ItemGroup>
  <ItemGroup>
@ -406,7 +406,6 @@
    <ClCompile Include="platform\CCImage.cpp">
      <Filter>platform</Filter>
    </ClCompile>
-    <ClCompile Include="CCTweenFunction.cpp" />
    <ClCompile Include="..\..\external\xxhash\xxhash.c">
      <Filter>xxhash</Filter>
    </ClCompile>
@ -582,6 +581,15 @@
    <ClCompile Include="..\renderer\CCGLProgramCache.cpp">
      <Filter>renderer</Filter>
    </ClCompile>
+    <ClCompile Include="..\..\external\ConvertUTF\ConvertUTF.c">
+      <Filter>ConvertUTF</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\external\ConvertUTF\ConvertUTFWrapper.cpp">
+      <Filter>ConvertUTF</Filter>
+    </ClCompile>
+    <ClCompile Include="CCTweenFunction.cpp">
+      <Filter>actions</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\physics\CCPhysicsBody.h">
@ -915,7 +923,6 @@
    <ClInclude Include="platform\desktop\CCGLView.h">
      <Filter>platform\desktop</Filter>
    </ClInclude>
-    <ClInclude Include="CCTweenFunction.h" />
    <ClInclude Include="..\..\external\xxhash\xxhash.h">
      <Filter>xxhash</Filter>
    </ClInclude>
@ -1130,6 +1137,12 @@
    <ClInclude Include="..\renderer\CCGLProgramCache.h">
      <Filter>renderer</Filter>
    </ClInclude>
+    <ClInclude Include="..\..\external\ConvertUTF\ConvertUTF.h">
+      <Filter>ConvertUTF</Filter>
+    </ClInclude>
+    <ClInclude Include="CCTweenFunction.h">
+      <Filter>actions</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <Text Include="..\math\CMakeLists.txt">
--- a/cocos/2d/platform/linux/CCFileUtilsLinux.cpp
+++ b/cocos/2d/platform/linux/CCFileUtilsLinux.cpp
@ -119,4 +119,4 @@ bool FileUtilsLinux::isFileExistInternal(const std::string& strFilePath) const

 NS_CC_END

-#endif CC_TARGET_PLATFORM == CC_PLATFORM_LINUX
+#endif // CC_TARGET_PLATFORM == CC_PLATFORM_LINUX
--- a/cocos/Android.mk
+++ b/cocos/Android.mk
@ -159,6 +159,8 @@ physics/chipmunk/CCPhysicsContactInfo_chipmunk.cpp \
 physics/chipmunk/CCPhysicsJointInfo_chipmunk.cpp \
 physics/chipmunk/CCPhysicsShapeInfo_chipmunk.cpp \
 physics/chipmunk/CCPhysicsWorldInfo_chipmunk.cpp \
+../external/ConvertUTF/ConvertUTFWrapper.cpp \
+../external/ConvertUTF/ConvertUTF.c \
 ../external/tinyxml2/tinyxml2.cpp \
 ../external/unzip/ioapi.cpp \
 ../external/unzip/unzip.cpp \
@ -182,7 +184,8 @@ LOCAL_C_INCLUDES := $(LOCAL_PATH) \
                    $(LOCAL_PATH)/../external/unzip \
                    $(LOCAL_PATH)/../external/chipmunk/include/chipmunk \
                    $(LOCAL_PATH)/../external/edtaa3func \
-                    $(LOCAL_PATH)/../external/xxhash
+                    $(LOCAL_PATH)/../external/xxhash \
+                    $(LOCAL_PATH)/../external/ConvertUTF


 LOCAL_LDLIBS := -lGLESv2 \
--- a/cocos/deprecated/CCString.cpp
+++ b/cocos/deprecated/CCString.cpp
@ -272,4 +272,29 @@ __String* __String::clone() const
    return __String::create(_string);
 }

+namespace StringUtils {
+
+std::string format(const char* format, ...)
+{
+#define CC_MAX_STRING_LENGTH (1024*100)
+    
+    std::string ret;
+    
+    va_list ap;
+    va_start(ap, format);
+    
+    char* buf = (char*)malloc(CC_MAX_STRING_LENGTH);
+    if (buf != nullptr)
+    {
+        vsnprintf(buf, CC_MAX_STRING_LENGTH, format, ap);
+        ret = buf;
+        free(buf);
+    }
+    va_end(ap);
+    
+    return ret;
+}
+
+} // namespace StringUtils {
+    
 NS_CC_END
--- a/cocos/deprecated/CCString.h
+++ b/cocos/deprecated/CCString.h
@ -205,40 +205,19 @@ struct StringCompare : public std::binary_function<__String *, __String *, bool>
 #define StringMake(str) String::create(str)
 #define ccs             StringMake

-class StringUtils
+namespace StringUtils {
+
+template<typename T>
+std::string toString(T arg)
 {
-public:
+    std::stringstream ss;
+    ss << arg;
+    return ss.str();
+}
+
+std::string format(const char* format, ...) CC_FORMAT_PRINTF(1, 2);
    
-    template<typename T>
-    static std::string toString(T arg)
-    {
-        std::stringstream ss;
-        ss << arg;
-        return ss.str();
-    }
-    
-    static std::string format(const char* format, ...) CC_FORMAT_PRINTF(1, 2)
-    {
-        #define CC_MAX_STRING_LENGTH (1024*100)
-        
-        std::string ret;
-        
-        va_list ap;
-        va_start(ap, format);
-        
-        char* buf = (char*)malloc(CC_MAX_STRING_LENGTH);
-        if (buf != nullptr)
-        {
-            vsnprintf(buf, CC_MAX_STRING_LENGTH, format, ap);
-            ret = buf;
-            free(buf);
-        }
-        va_end(ap);
-        
-        return ret;
-    }
-    
-};
+} // namespace StringUtils {

 // end of data_structure group
 /// @}
--- a/external/ConvertUTF/ConvertUTF.c
+++ b/external/ConvertUTF/ConvertUTF.c
@ -0,0 +1,589 @@
+/*===--- ConvertUTF.c - Universal Character Names conversions ---------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===------------------------------------------------------------------------=*/
+/*
+ * Copyright 2001-2004 Unicode, Inc.
+ * 
+ * Disclaimer
+ * 
+ * This source code is provided as is by Unicode, Inc. No claims are
+ * made as to fitness for any particular purpose. No warranties of any
+ * kind are expressed or implied. The recipient agrees to determine
+ * applicability of information provided. If this file has been
+ * purchased on magnetic or optical media from Unicode, Inc., the
+ * sole remedy for any claim will be exchange of defective media
+ * within 90 days of receipt.
+ * 
+ * Limitations on Rights to Redistribute This Code
+ * 
+ * Unicode, Inc. hereby grants the right to freely use the information
+ * supplied in this file in the creation of products supporting the
+ * Unicode Standard, and to make copies of this file in any form
+ * for internal or external distribution as long as this notice
+ * remains attached.
+ */
+
+/* ---------------------------------------------------------------------
+
+    Conversions between UTF32, UTF-16, and UTF-8. Source code file.
+    Author: Mark E. Davis, 1994.
+    Rev History: Rick McGowan, fixes & updates May 2001.
+    Sept 2001: fixed const & error conditions per
+        mods suggested by S. Parent & A. Lillich.
+    June 2002: Tim Dodd added detection and handling of incomplete
+        source sequences, enhanced error detection, added casts
+        to eliminate compiler warnings.
+    July 2003: slight mods to back out aggressive FFFE detection.
+    Jan 2004: updated switches in from-UTF8 conversions.
+    Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.
+
+    See the header file "ConvertUTF.h" for complete documentation.
+
+------------------------------------------------------------------------ */
+
+
+#include "ConvertUTF.h"
+#ifdef CVTUTF_DEBUG
+#include <stdio.h>
+#endif
+
+#include <string.h>
+
+static const int halfShift  = 10; /* used for shifting by 10 bits */
+
+static const UTF32 halfBase = 0x0010000UL;
+static const UTF32 halfMask = 0x3FFUL;
+
+#define UNI_SUR_HIGH_START  (UTF32)0xD800
+#define UNI_SUR_HIGH_END    (UTF32)0xDBFF
+#define UNI_SUR_LOW_START   (UTF32)0xDC00
+#define UNI_SUR_LOW_END     (UTF32)0xDFFF
+#define false      0
+#define true        1
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Index into the table below with the first byte of a UTF-8 sequence to
+ * get the number of trailing bytes that are supposed to follow it.
+ * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
+ * left as-is for anyone who may want to do such conversion, which was
+ * allowed in earlier algorithms.
+ */
+static const char trailingBytesForUTF8[256] = {
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
+};
+
+/*
+ * Magic values subtracted from a buffer value during UTF8 conversion.
+ * This table contains as many values as there might be trailing bytes
+ * in a UTF-8 sequence.
+ */
+static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 
+                     0x03C82080UL, 0xFA082080UL, 0x82082080UL };
+
+/*
+ * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
+ * into the first byte, depending on how many bytes follow.  There are
+ * as many entries in this table as there are UTF-8 sequence types.
+ * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
+ * for *legal* UTF-8 will be 4 or fewer bytes total.
+ */
+static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+
+/* --------------------------------------------------------------------- */
+
+/* The interface converts a whole buffer to avoid function-call overhead.
+ * Constants have been gathered. Loops & conditionals have been removed as
+ * much as possible for efficiency, in favor of drop-through switches.
+ * (See "Note A" at the bottom of the file for equivalent code.)
+ * If your compiler supports it, the "isLegalUTF8" call can be turned
+ * into an inline function.
+ */
+
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF32toUTF16 (
+        const UTF32** sourceStart, const UTF32* sourceEnd, 
+        UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF32* source = *sourceStart;
+    UTF16* target = *targetStart;
+    while (source < sourceEnd) {
+        UTF32 ch;
+        if (target >= targetEnd) {
+            result = targetExhausted; break;
+        }
+        ch = *source++;
+        if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
+            /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
+            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+                if (flags == strictConversion) {
+                    --source; /* return to the illegal value itself */
+                    result = sourceIllegal;
+                    break;
+                } else {
+                    *target++ = UNI_REPLACEMENT_CHAR;
+                }
+            } else {
+                *target++ = (UTF16)ch; /* normal case */
+            }
+        } else if (ch > UNI_MAX_LEGAL_UTF32) {
+            if (flags == strictConversion) {
+                result = sourceIllegal;
+            } else {
+                *target++ = UNI_REPLACEMENT_CHAR;
+            }
+        } else {
+            /* target is a character in range 0xFFFF - 0x10FFFF. */
+            if (target + 1 >= targetEnd) {
+                --source; /* Back up source pointer! */
+                result = targetExhausted; break;
+            }
+            ch -= halfBase;
+            *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
+            *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
+        }
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+}
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF16toUTF32 (
+        const UTF16** sourceStart, const UTF16* sourceEnd, 
+        UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF16* source = *sourceStart;
+    UTF32* target = *targetStart;
+    UTF32 ch, ch2;
+    while (source < sourceEnd) {
+        const UTF16* oldSource = source; /*  In case we have to back up because of target overflow. */
+        ch = *source++;
+        /* If we have a surrogate pair, convert to UTF32 first. */
+        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+            /* If the 16 bits following the high surrogate are in the source buffer... */
+            if (source < sourceEnd) {
+                ch2 = *source;
+                /* If it's a low surrogate, convert to UTF32. */
+                if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
+                    ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+                        + (ch2 - UNI_SUR_LOW_START) + halfBase;
+                    ++source;
+                } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
+                    --source; /* return to the illegal value itself */
+                    result = sourceIllegal;
+                    break;
+                }
+            } else { /* We don't have the 16 bits following the high surrogate. */
+                --source; /* return to the high surrogate */
+                result = sourceExhausted;
+                break;
+            }
+        } else if (flags == strictConversion) {
+            /* UTF-16 surrogate values are illegal in UTF-32 */
+            if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
+                --source; /* return to the illegal value itself */
+                result = sourceIllegal;
+                break;
+            }
+        }
+        if (target >= targetEnd) {
+            source = oldSource; /* Back up source pointer! */
+            result = targetExhausted; break;
+        }
+        *target++ = ch;
+    }
+    *sourceStart = source;
+    *targetStart = target;
+#ifdef CVTUTF_DEBUG
+if (result == sourceIllegal) {
+    fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
+    fflush(stderr);
+}
+#endif
+    return result;
+}
+ConversionResult ConvertUTF16toUTF8 (
+        const UTF16** sourceStart, const UTF16* sourceEnd, 
+        UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF16* source = *sourceStart;
+    UTF8* target = *targetStart;
+    while (source < sourceEnd) {
+        UTF32 ch;
+        unsigned short bytesToWrite = 0;
+        const UTF32 byteMask = 0xBF;
+        const UTF32 byteMark = 0x80; 
+        const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
+        ch = *source++;
+        /* If we have a surrogate pair, convert to UTF32 first. */
+        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+            /* If the 16 bits following the high surrogate are in the source buffer... */
+            if (source < sourceEnd) {
+                UTF32 ch2 = *source;
+                /* If it's a low surrogate, convert to UTF32. */
+                if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
+                    ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+                        + (ch2 - UNI_SUR_LOW_START) + halfBase;
+                    ++source;
+                } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
+                    --source; /* return to the illegal value itself */
+                    result = sourceIllegal;
+                    break;
+                }
+            } else { /* We don't have the 16 bits following the high surrogate. */
+                --source; /* return to the high surrogate */
+                result = sourceExhausted;
+                break;
+            }
+        } else if (flags == strictConversion) {
+            /* UTF-16 surrogate values are illegal in UTF-32 */
+            if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
+                --source; /* return to the illegal value itself */
+                result = sourceIllegal;
+                break;
+            }
+        }
+        /* Figure out how many bytes the result will require */
+        if (ch < (UTF32)0x80) {      bytesToWrite = 1;
+        } else if (ch < (UTF32)0x800) {     bytesToWrite = 2;
+        } else if (ch < (UTF32)0x10000) {   bytesToWrite = 3;
+        } else if (ch < (UTF32)0x110000) {  bytesToWrite = 4;
+        } else {                            bytesToWrite = 3;
+                                            ch = UNI_REPLACEMENT_CHAR;
+        }
+
+        target += bytesToWrite;
+        if (target > targetEnd) {
+            source = oldSource; /* Back up source pointer! */
+            target -= bytesToWrite; result = targetExhausted; break;
+        }
+        switch (bytesToWrite) { /* note: everything falls through. */
+            case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+            case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+            case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+            case 1: *--target =  (UTF8)(ch | firstByteMark[bytesToWrite]);
+        }
+        target += bytesToWrite;
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+}
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF32toUTF8 (
+        const UTF32** sourceStart, const UTF32* sourceEnd, 
+        UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF32* source = *sourceStart;
+    UTF8* target = *targetStart;
+    while (source < sourceEnd) {
+        UTF32 ch;
+        unsigned short bytesToWrite = 0;
+        const UTF32 byteMask = 0xBF;
+        const UTF32 byteMark = 0x80; 
+        ch = *source++;
+        if (flags == strictConversion ) {
+            /* UTF-16 surrogate values are illegal in UTF-32 */
+            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+                --source; /* return to the illegal value itself */
+                result = sourceIllegal;
+                break;
+            }
+        }
+        /*
+         * Figure out how many bytes the result will require. Turn any
+         * illegally large UTF32 things (> Plane 17) into replacement chars.
+         */
+        if (ch < (UTF32)0x80) {      bytesToWrite = 1;
+        } else if (ch < (UTF32)0x800) {     bytesToWrite = 2;
+        } else if (ch < (UTF32)0x10000) {   bytesToWrite = 3;
+        } else if (ch <= UNI_MAX_LEGAL_UTF32) {  bytesToWrite = 4;
+        } else {                            bytesToWrite = 3;
+                                            ch = UNI_REPLACEMENT_CHAR;
+                                            result = sourceIllegal;
+        }
+        
+        target += bytesToWrite;
+        if (target > targetEnd) {
+            --source; /* Back up source pointer! */
+            target -= bytesToWrite; result = targetExhausted; break;
+        }
+        switch (bytesToWrite) { /* note: everything falls through. */
+            case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+            case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+            case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+            case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
+        }
+        target += bytesToWrite;
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Utility routine to tell whether a sequence of bytes is legal UTF-8.
+ * This must be called with the length pre-determined by the first byte.
+ * If not calling this from ConvertUTF8to*, then the length can be set by:
+ *  length = trailingBytesForUTF8[*source]+1;
+ * and the sequence is illegal right away if there aren't that many bytes
+ * available.
+ * If presented with a length > 4, this returns false.  The Unicode
+ * definition of UTF-8 goes up to 4-byte sequences.
+ */
+
+static Boolean isLegalUTF8(const UTF8 *source, int length) {
+    UTF8 a;
+    const UTF8 *srcptr = source+length;
+    switch (length) {
+    default: return false;
+        /* Everything else falls through when "true"... */
+    case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
+    case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
+    case 2: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
+
+        switch (*source) {
+            /* no fall-through in this inner switch */
+            case 0xE0: if (a < 0xA0) return false; break;
+            case 0xED: if (a > 0x9F) return false; break;
+            case 0xF0: if (a < 0x90) return false; break;
+            case 0xF4: if (a > 0x8F) return false; break;
+            default:   if (a < 0x80) return false;
+        }
+
+    case 1: if (*source >= 0x80 && *source < 0xC2) return false;
+    }
+    if (*source > 0xF4) return false;
+    return true;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Exported function to return whether a UTF-8 sequence is legal or not.
+ * This is not used here; it's just exported.
+ */
+Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
+    int length = trailingBytesForUTF8[*source]+1;
+    if (length > sourceEnd - source) {
+        return false;
+    }
+    return isLegalUTF8(source, length);
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Exported function to return the total number of bytes in a codepoint
+ * represented in UTF-8, given the value of the first byte.
+ */
+unsigned getNumBytesForUTF8(UTF8 first) {
+  return trailingBytesForUTF8[first] + 1;
+}
+
+int getUTF8StringLength(const UTF8* utf8)
+{
+    const UTF8** source = &utf8;
+    const UTF8* sourceEnd = utf8 + strlen((const char*)utf8);
+    int ret = 0;
+    while (*source != sourceEnd) {
+        int length = trailingBytesForUTF8[**source] + 1;
+        if (length > sourceEnd - *source || !isLegalUTF8(*source, length))
+            return 0;
+        *source += length;
+        ++ret;
+    }
+    return ret;
+}
+
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Exported function to return whether a UTF-8 string is legal or not.
+ * This is not used here; it's just exported.
+ */
+Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) {
+    while (*source != sourceEnd) {
+        int length = trailingBytesForUTF8[**source] + 1;
+        if (length > sourceEnd - *source || !isLegalUTF8(*source, length))
+            return false;
+        *source += length;
+    }
+    return true;
+}
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF8toUTF16 (
+        const UTF8** sourceStart, const UTF8* sourceEnd, 
+        UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF8* source = *sourceStart;
+    UTF16* target = *targetStart;
+    while (source < sourceEnd) {
+        UTF32 ch = 0;
+        unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
+        if (extraBytesToRead >= sourceEnd - source) {
+            result = sourceExhausted; break;
+        }
+        /* Do this check whether lenient or strict */
+        if (!isLegalUTF8(source, extraBytesToRead+1)) {
+            result = sourceIllegal;
+            break;
+        }
+        /*
+         * The cases all fall through. See "Note A" below.
+         */
+        switch (extraBytesToRead) {
+            case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+            case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+            case 3: ch += *source++; ch <<= 6;
+            case 2: ch += *source++; ch <<= 6;
+            case 1: ch += *source++; ch <<= 6;
+            case 0: ch += *source++;
+        }
+        ch -= offsetsFromUTF8[extraBytesToRead];
+
+        if (target >= targetEnd) {
+            source -= (extraBytesToRead+1); /* Back up source pointer! */
+            result = targetExhausted; break;
+        }
+        if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
+            /* UTF-16 surrogate values are illegal in UTF-32 */
+            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+                if (flags == strictConversion) {
+                    source -= (extraBytesToRead+1); /* return to the illegal value itself */
+                    result = sourceIllegal;
+                    break;
+                } else {
+                    *target++ = UNI_REPLACEMENT_CHAR;
+                }
+            } else {
+                *target++ = (UTF16)ch; /* normal case */
+            }
+        } else if (ch > UNI_MAX_UTF16) {
+            if (flags == strictConversion) {
+                result = sourceIllegal;
+                source -= (extraBytesToRead+1); /* return to the start */
+                break; /* Bail out; shouldn't continue */
+            } else {
+                *target++ = UNI_REPLACEMENT_CHAR;
+            }
+        } else {
+            /* target is a character in range 0xFFFF - 0x10FFFF. */
+            if (target + 1 >= targetEnd) {
+                source -= (extraBytesToRead+1); /* Back up source pointer! */
+                result = targetExhausted; break;
+            }
+            ch -= halfBase;
+            *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
+            *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
+        }
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+}
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF8toUTF32 (
+        const UTF8** sourceStart, const UTF8* sourceEnd, 
+        UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF8* source = *sourceStart;
+    UTF32* target = *targetStart;
+    while (source < sourceEnd) {
+        UTF32 ch = 0;
+        unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
+        if (extraBytesToRead >= sourceEnd - source) {
+            result = sourceExhausted; break;
+        }
+        /* Do this check whether lenient or strict */
+        if (!isLegalUTF8(source, extraBytesToRead+1)) {
+            result = sourceIllegal;
+            break;
+        }
+        /*
+         * The cases all fall through. See "Note A" below.
+         */
+        switch (extraBytesToRead) {
+            case 5: ch += *source++; ch <<= 6;
+            case 4: ch += *source++; ch <<= 6;
+            case 3: ch += *source++; ch <<= 6;
+            case 2: ch += *source++; ch <<= 6;
+            case 1: ch += *source++; ch <<= 6;
+            case 0: ch += *source++;
+        }
+        ch -= offsetsFromUTF8[extraBytesToRead];
+
+        if (target >= targetEnd) {
+            source -= (extraBytesToRead+1); /* Back up the source pointer! */
+            result = targetExhausted; break;
+        }
+        if (ch <= UNI_MAX_LEGAL_UTF32) {
+            /*
+             * UTF-16 surrogate values are illegal in UTF-32, and anything
+             * over Plane 17 (> 0x10FFFF) is illegal.
+             */
+            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+                if (flags == strictConversion) {
+                    source -= (extraBytesToRead+1); /* return to the illegal value itself */
+                    result = sourceIllegal;
+                    break;
+                } else {
+                    *target++ = UNI_REPLACEMENT_CHAR;
+                }
+            } else {
+                *target++ = ch;
+            }
+        } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
+            result = sourceIllegal;
+            *target++ = UNI_REPLACEMENT_CHAR;
+        }
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+}
+
+/* ---------------------------------------------------------------------
+
+    Note A.
+    The fall-through switches in UTF-8 reading code save a
+    temp variable, some decrements & conditionals.  The switches
+    are equivalent to the following loop:
+        {
+            int tmpBytesToRead = extraBytesToRead+1;
+            do {
+                ch += *source++;
+                --tmpBytesToRead;
+                if (tmpBytesToRead) ch <<= 6;
+            } while (tmpBytesToRead > 0);
+        }
+    In UTF-8 writing code, the switches on "bytesToWrite" are
+    similarly unrolled loops.
+
+   --------------------------------------------------------------------- */
--- a/external/ConvertUTF/ConvertUTF.h
+++ b/external/ConvertUTF/ConvertUTF.h
@ -0,0 +1,254 @@
+/*===--- ConvertUTF.h - Universal Character Names conversions ---------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *==------------------------------------------------------------------------==*/
+/*
+ * Copyright 2001-2004 Unicode, Inc.
+ *
+ * Disclaimer
+ *
+ * This source code is provided as is by Unicode, Inc. No claims are
+ * made as to fitness for any particular purpose. No warranties of any
+ * kind are expressed or implied. The recipient agrees to determine
+ * applicability of information provided. If this file has been
+ * purchased on magnetic or optical media from Unicode, Inc., the
+ * sole remedy for any claim will be exchange of defective media
+ * within 90 days of receipt.
+ *
+ * Limitations on Rights to Redistribute This Code
+ *
+ * Unicode, Inc. hereby grants the right to freely use the information
+ * supplied in this file in the creation of products supporting the
+ * Unicode Standard, and to make copies of this file in any form
+ * for internal or external distribution as long as this notice
+ * remains attached.
+ */
+
+/* ---------------------------------------------------------------------
+
+    Conversions between UTF32, UTF-16, and UTF-8.  Header file.
+
+    Several funtions are included here, forming a complete set of
+    conversions between the three formats.  UTF-7 is not included
+    here, but is handled in a separate source file.
+
+    Each of these routines takes pointers to input buffers and output
+    buffers.  The input buffers are const.
+
+    Each routine converts the text between *sourceStart and sourceEnd,
+    putting the result into the buffer between *targetStart and
+    targetEnd. Note: the end pointers are *after* the last item: e.g.
+    *(sourceEnd - 1) is the last item.
+
+    The return result indicates whether the conversion was successful,
+    and if not, whether the problem was in the source or target buffers.
+    (Only the first encountered problem is indicated.)
+
+    After the conversion, *sourceStart and *targetStart are both
+    updated to point to the end of last text successfully converted in
+    the respective buffers.
+
+    Input parameters:
+        sourceStart - pointer to a pointer to the source buffer.
+                The contents of this are modified on return so that
+                it points at the next thing to be converted.
+        targetStart - similarly, pointer to pointer to the target buffer.
+        sourceEnd, targetEnd - respectively pointers to the ends of the
+                two buffers, for overflow checking only.
+
+    These conversion functions take a ConversionFlags argument. When this
+    flag is set to strict, both irregular sequences and isolated surrogates
+    will cause an error.  When the flag is set to lenient, both irregular
+    sequences and isolated surrogates are converted.
+
+    Whether the flag is strict or lenient, all illegal sequences will cause
+    an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>,
+    or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code
+    must check for illegal sequences.
+
+    When the flag is set to lenient, characters over 0x10FFFF are converted
+    to the replacement character; otherwise (when the flag is set to strict)
+    they constitute an error.
+
+    Output parameters:
+        The value "sourceIllegal" is returned from some routines if the input
+        sequence is malformed.  When "sourceIllegal" is returned, the source
+        value will point to the illegal value that caused the problem. E.g.,
+        in UTF-8 when a sequence is malformed, it points to the start of the
+        malformed sequence.
+
+    Author: Mark E. Davis, 1994.
+    Rev History: Rick McGowan, fixes & updates May 2001.
+         Fixes & updates, Sept 2001.
+
+------------------------------------------------------------------------ */
+
+#ifndef LLVM_SUPPORT_CONVERTUTF_H
+#define LLVM_SUPPORT_CONVERTUTF_H
+
+#include <stddef.h>   /* ptrdiff_t */
+/* ---------------------------------------------------------------------
+    The following 4 definitions are compiler-specific.
+    The C standard does not guarantee that wchar_t has at least
+    16 bits, so wchar_t is no less portable than unsigned short!
+    All should be unsigned values to avoid sign extension during
+    bit mask & shift operations.
+------------------------------------------------------------------------ */
+
+typedef unsigned int    UTF32;  /* at least 32 bits */
+typedef unsigned short  UTF16;  /* at least 16 bits */
+typedef unsigned char   UTF8;   /* typically 8 bits */
+typedef unsigned char   Boolean; /* 0 or 1 */
+
+/* Some fundamental constants */
+#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
+#define UNI_MAX_BMP (UTF32)0x0000FFFF
+#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
+#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
+#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
+
+#define UNI_MAX_UTF8_BYTES_PER_CODE_POINT 4
+
+#define UNI_UTF16_BYTE_ORDER_MARK_NATIVE  0xFEFF
+#define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED 0xFFFE
+
+typedef enum {
+  conversionOK,           /* conversion successful */
+  sourceExhausted,        /* partial character in source, but hit end */
+  targetExhausted,        /* insuff. room in target for conversion */
+  sourceIllegal           /* source sequence is illegal/malformed */
+} ConversionResult;
+
+typedef enum {
+  strictConversion = 0,
+  lenientConversion
+} ConversionFlags;
+
+/* This is for C++ and does no harm in C */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ConversionResult ConvertUTF8toUTF16 (
+  const UTF8** sourceStart, const UTF8* sourceEnd,
+  UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
+
+ConversionResult ConvertUTF8toUTF32 (
+  const UTF8** sourceStart, const UTF8* sourceEnd,
+  UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
+
+ConversionResult ConvertUTF16toUTF8 (
+  const UTF16** sourceStart, const UTF16* sourceEnd,
+  UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
+
+ConversionResult ConvertUTF32toUTF8 (
+  const UTF32** sourceStart, const UTF32* sourceEnd,
+  UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
+
+ConversionResult ConvertUTF16toUTF32 (
+  const UTF16** sourceStart, const UTF16* sourceEnd,
+  UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
+
+ConversionResult ConvertUTF32toUTF16 (
+  const UTF32** sourceStart, const UTF32* sourceEnd,
+  UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
+
+Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
+
+Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd);
+
+unsigned getNumBytesForUTF8(UTF8 firstByte);
+    
+int getUTF8StringLength(const UTF8* utf8);
+
+#ifdef __cplusplus
+}
+
+/*************************************************************************/
+/* Below are LLVM-specific wrappers of the functions above. */
+
+//#include "llvm/ADT/ArrayRef.h"
+//#include "llvm/ADT/StringRef.h"
+
+#include <vector>
+#include <string>
+
+namespace llvm {
+
+/**
+ * Convert an UTF8 StringRef to UTF8, UTF16, or UTF32 depending on
+ * WideCharWidth. The converted data is written to ResultPtr, which needs to
+ * point to at least WideCharWidth * (Source.Size() + 1) bytes. On success,
+ * ResultPtr will point one after the end of the copied string. On failure,
+ * ResultPtr will not be changed, and ErrorPtr will be set to the location of
+ * the first character which could not be converted.
+ * \return true on success.
+ */
+bool ConvertUTF8toWide(unsigned WideCharWidth, const std::string& Source,
+                       char *&ResultPtr, const UTF8 *&ErrorPtr);
+
+/**
+ * Convert an Unicode code point to UTF8 sequence.
+ *
+ * \param Source a Unicode code point.
+ * \param [in,out] ResultPtr pointer to the output buffer, needs to be at least
+ * \c UNI_MAX_UTF8_BYTES_PER_CODE_POINT bytes.  On success \c ResultPtr is
+ * updated one past end of the converted sequence.
+ *
+ * \returns true on success.
+ */
+bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr);
+
+/**
+ * Convert the first UTF8 sequence in the given source buffer to a UTF32
+ * code point.
+ *
+ * \param [in,out] source A pointer to the source buffer. If the conversion
+ * succeeds, this pointer will be updated to point to the byte just past the
+ * end of the converted sequence.
+ * \param sourceEnd A pointer just past the end of the source buffer.
+ * \param [out] target The converted code
+ * \param flags Whether the conversion is strict or lenient.
+ *
+ * \returns conversionOK on success
+ *
+ * \sa ConvertUTF8toUTF32
+ */
+static inline ConversionResult convertUTF8Sequence(const UTF8 **source,
+                                                   const UTF8 *sourceEnd,
+                                                   UTF32 *target,
+                                                   ConversionFlags flags) {
+  if (*source == sourceEnd)
+    return sourceExhausted;
+  unsigned size = getNumBytesForUTF8(**source);
+  if ((ptrdiff_t)size > sourceEnd - *source)
+    return sourceExhausted;
+  return ConvertUTF8toUTF32(source, *source + size, &target, target + 1, flags);
+}
+
+/**
+ * Returns true if a blob of text starts with a UTF-16 big or little endian byte
+ * order mark.
+ */
+bool hasUTF16ByteOrderMark(const char* SrcBytes, size_t len);
+
+/**
+ * Converts a stream of raw bytes assumed to be UTF16 into a UTF8 std::string.
+ *
+ * \param [in] SrcBytes A buffer of what is assumed to be UTF-16 encoded text.
+ * \param [out] Out Converted UTF-8 is stored here on success.
+ * \returns true on success
+ */
+bool convertUTF16ToUTF8String(const std::u16string& utf16, std::string &Out);
+
+} /* end namespace llvm */
+
+#endif
+
+/* --------------------------------------------------------------------- */
+
+#endif
--- a/external/ConvertUTF/ConvertUTFWrapper.cpp
+++ b/external/ConvertUTF/ConvertUTFWrapper.cpp
@ -0,0 +1,144 @@
+//===-- ConvertUTFWrapper.cpp - Wrap ConvertUTF.h with clang data types -----===
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ConvertUTF.h"
+//#include "llvm/Support/SwapByteOrder.h"
+#include <string>
+#include <vector>
+#include <stdint.h>  // uint16_t
+#include <assert.h>
+#include <memory.h>
+
+namespace llvm {
+
+bool ConvertUTF8toWide(unsigned WideCharWidth, const std::string& Source,
+                       char *&ResultPtr, const UTF8 *&ErrorPtr) {
+  assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4);
+  ConversionResult result = conversionOK;
+  // Copy the character span over.
+  if (WideCharWidth == 1) {
+    const UTF8 *Pos = reinterpret_cast<const UTF8*>(Source.data());
+    if (!isLegalUTF8String(&Pos, reinterpret_cast<const UTF8*>(Source.data() + Source.length()))) {
+      result = sourceIllegal;
+      ErrorPtr = Pos;
+    } else {
+      memcpy(ResultPtr, Source.data(), Source.size());
+      ResultPtr += Source.size();
+    }
+  } else if (WideCharWidth == 2) {
+    const UTF8 *sourceStart = (const UTF8*)Source.data();
+    // FIXME: Make the type of the result buffer correct instead of
+    // using reinterpret_cast.
+    UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr);
+    ConversionFlags flags = strictConversion;
+    result = ConvertUTF8toUTF16(
+        &sourceStart, sourceStart + Source.size(),
+        &targetStart, targetStart + 2*Source.size(), flags);
+    if (result == conversionOK)
+      ResultPtr = reinterpret_cast<char*>(targetStart);
+    else
+      ErrorPtr = sourceStart;
+  } else if (WideCharWidth == 4) {
+    const UTF8 *sourceStart = (const UTF8*)Source.data();
+    // FIXME: Make the type of the result buffer correct instead of
+    // using reinterpret_cast.
+    UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr);
+    ConversionFlags flags = strictConversion;
+    result = ConvertUTF8toUTF32(
+        &sourceStart, sourceStart + Source.size(),
+        &targetStart, targetStart + 4*Source.size(), flags);
+    if (result == conversionOK)
+      ResultPtr = reinterpret_cast<char*>(targetStart);
+    else
+      ErrorPtr = sourceStart;
+  }
+  assert((result != targetExhausted)
+         && "ConvertUTF8toUTFXX exhausted target buffer");
+  return result == conversionOK;
+}
+
+bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr) {
+  const UTF32 *SourceStart = &Source;
+  const UTF32 *SourceEnd = SourceStart + 1;
+  UTF8 *TargetStart = reinterpret_cast<UTF8 *>(ResultPtr);
+  UTF8 *TargetEnd = TargetStart + 4;
+  ConversionResult CR = ConvertUTF32toUTF8(&SourceStart, SourceEnd,
+                                           &TargetStart, TargetEnd,
+                                           strictConversion);
+  if (CR != conversionOK)
+    return false;
+
+  ResultPtr = reinterpret_cast<char*>(TargetStart);
+  return true;
+}
+
+bool hasUTF16ByteOrderMark(const char* S, size_t len) {
+  return (len >= 2 &&
+          ((S[0] == '\xff' && S[1] == '\xfe') ||
+           (S[0] == '\xfe' && S[1] == '\xff')));
+}
+    
+/// SwapByteOrder_16 - This function returns a byte-swapped representation of
+/// the 16-bit argument.
+inline uint16_t SwapByteOrder_16(uint16_t value) {
+#if defined(_MSC_VER) && !defined(_DEBUG)
+    // The DLL version of the runtime lacks these functions (bug!?), but in a
+    // release build they're replaced with BSWAP instructions anyway.
+    return _byteswap_ushort(value);
+#else
+    uint16_t Hi = value << 8;
+    uint16_t Lo = value >> 8;
+    return Hi | Lo;
+#endif
+}
+
+bool convertUTF16ToUTF8String(const std::u16string& utf16, std::string &Out) {
+  assert(Out.empty());
+
+  // Avoid OOB by returning early on empty input.
+  if (utf16.empty())
+    return true;
+
+  const UTF16 *Src = reinterpret_cast<const UTF16 *>(utf16.data());
+  const UTF16 *SrcEnd = reinterpret_cast<const UTF16 *>(utf16.data() + utf16.length());
+
+  // Byteswap if necessary.
+  std::vector<UTF16> ByteSwapped;
+  if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_SWAPPED) {
+    ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);
+    for (size_t I = 0, E = ByteSwapped.size(); I != E; ++I)
+      ByteSwapped[I] = SwapByteOrder_16(ByteSwapped[I]);
+    Src = &ByteSwapped[0];
+    SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1;
+  }
+
+  // Skip the BOM for conversion.
+  if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_NATIVE)
+    Src++;
+
+  // Just allocate enough space up front.  We'll shrink it later.
+  Out.resize(utf16.length() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT + 1);
+  UTF8 *Dst = reinterpret_cast<UTF8 *>(&Out[0]);
+  UTF8 *DstEnd = Dst + Out.size();
+
+  ConversionResult CR =
+      ConvertUTF16toUTF8(&Src, SrcEnd, &Dst, DstEnd, strictConversion);
+  assert(CR != targetExhausted);
+
+  if (CR != conversionOK) {
+    Out.clear();
+    return false;
+  }
+
+  Out.resize(reinterpret_cast<char *>(Dst) - &Out[0]);
+  return true;
+}
+
+} // end namespace llvm
+
--- a/licenses/LICENSE_llvm.txt
+++ b/licenses/LICENSE_llvm.txt
@ -0,0 +1,43 @@
+==============================================================================
+LLVM Release License
+==============================================================================
+University of Illinois/NCSA
+Open Source License
+
+Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign.
+All rights reserved.
+
+Developed by:
+
+    LLVM Team
+
+    University of Illinois at Urbana-Champaign
+
+    http://llvm.org
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal with
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimers.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimers in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the names of the LLVM Team, University of Illinois at
+      Urbana-Champaign, nor the names of its contributors may be used to
+      endorse or promote products derived from this Software without specific
+      prior written permission.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+SOFTWARE.
--- a/licenses/LICENSE_unicode.txt
+++ b/licenses/LICENSE_unicode.txt
@ -0,0 +1,80 @@
+/*
+ * Copyright 2001-2004 Unicode, Inc.
+ *
+ * Disclaimer
+ *
+ * This source code is provided as is by Unicode, Inc. No claims are
+ * made as to fitness for any particular purpose. No warranties of any
+ * kind are expressed or implied. The recipient agrees to determine
+ * applicability of information provided. If this file has been
+ * purchased on magnetic or optical media from Unicode, Inc., the
+ * sole remedy for any claim will be exchange of defective media
+ * within 90 days of receipt.
+ *
+ * Limitations on Rights to Redistribute This Code
+ *
+ * Unicode, Inc. hereby grants the right to freely use the information
+ * supplied in this file in the creation of products supporting the
+ * Unicode Standard, and to make copies of this file in any form
+ * for internal or external distribution as long as this notice
+ * remains attached.
+ */
+
+/* ---------------------------------------------------------------------
+
+    Conversions between UTF32, UTF-16, and UTF-8.  Header file.
+
+    Several funtions are included here, forming a complete set of
+    conversions between the three formats.  UTF-7 is not included
+    here, but is handled in a separate source file.
+
+    Each of these routines takes pointers to input buffers and output
+    buffers.  The input buffers are const.
+
+    Each routine converts the text between *sourceStart and sourceEnd,
+    putting the result into the buffer between *targetStart and
+    targetEnd. Note: the end pointers are *after* the last item: e.g.
+    *(sourceEnd - 1) is the last item.
+
+    The return result indicates whether the conversion was successful,
+    and if not, whether the problem was in the source or target buffers.
+    (Only the first encountered problem is indicated.)
+
+    After the conversion, *sourceStart and *targetStart are both
+    updated to point to the end of last text successfully converted in
+    the respective buffers.
+
+    Input parameters:
+        sourceStart - pointer to a pointer to the source buffer.
+                The contents of this are modified on return so that
+                it points at the next thing to be converted.
+        targetStart - similarly, pointer to pointer to the target buffer.
+        sourceEnd, targetEnd - respectively pointers to the ends of the
+                two buffers, for overflow checking only.
+
+    These conversion functions take a ConversionFlags argument. When this
+    flag is set to strict, both irregular sequences and isolated surrogates
+    will cause an error.  When the flag is set to lenient, both irregular
+    sequences and isolated surrogates are converted.
+
+    Whether the flag is strict or lenient, all illegal sequences will cause
+    an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>,
+    or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code
+    must check for illegal sequences.
+
+    When the flag is set to lenient, characters over 0x10FFFF are converted
+    to the replacement character; otherwise (when the flag is set to strict)
+    they constitute an error.
+
+    Output parameters:
+        The value "sourceIllegal" is returned from some routines if the input
+        sequence is malformed.  When "sourceIllegal" is returned, the source
+        value will point to the illegal value that caused the problem. E.g.,
+        in UTF-8 when a sequence is malformed, it points to the start of the
+        malformed sequence.
+
+    Author: Mark E. Davis, 1994.
+    Rev History: Rick McGowan, fixes & updates May 2001.
+         Fixes & updates, Sept 2001.
+
+------------------------------------------------------------------------ */
--- a/tests/cpp-tests/Classes/UnitTest/UnitTest.cpp
+++ b/tests/cpp-tests/Classes/UnitTest/UnitTest.cpp
@ -7,7 +7,8 @@ static std::function<Layer*()> createFunctions[] = {
    CL(TemplateVectorTest),
    CL(TemplateMapTest),
    CL(ValueTest),
-    CL(RefPtrTest)
+    CL(RefPtrTest),
+    CL(UTFConversionTest)
 };

 static int sceneIdx = -1;
@ -659,3 +660,159 @@ void ValueTest::constFunc(const Value& value) const
 {
    
 }
+
+// UTFConversionTest
+
+static const int TEST_CODE_NUM = 11;
+
+static const char16_t __utf16Code[] =
+{
+    0x3042,
+    0x3044,
+    0x3046,
+    0x3048,
+    0x304A,
+    0x3042,
+    0x3044,
+    0x3046,
+    0x3048,
+    0x304A,
+    0x0041,
+    0x0000,
+};
+
+// to avoid Xcode error, char => unsigned char
+// If you use this table, please cast manually as (const char *).
+static const unsigned char __utf8Code[] =
+{
+    0xE3,0x81,0x82,
+    0xE3,0x81,0x84,
+    0xE3,0x81,0x86,
+    0xE3,0x81,0x88,
+    0xE3,0x81,0x8A,
+    0xE3,0x81,0x82,
+    0xE3,0x81,0x84,
+    0xE3,0x81,0x86,
+    0xE3,0x81,0x88,
+    0xE3,0x81,0x8A,
+    0x41,
+    0x00,
+};
+
+
+static const char16_t WHITE_SPACE_CODE[] =
+{
+    0x0009,
+    0x000A,
+    0x000B,
+    0x000C,
+    0x000D,
+    0x0020,
+    0x0085,
+    0x00A0,
+    0x1680,
+    0x2000,
+    0x2001,
+    0x2002,
+    0x2003,
+    0x2004,
+    0x2005,
+    0x2006,
+    0x2007,
+    0x2008,
+    0x2009,
+    0x200A,
+    0x2028,
+    0x2029,
+    0x202F,
+    0x205F,
+    0x3000
+};
+
+static void doUTFConversion()
+{
+    bool isSuccess = false;
+    
+    std::string originalUTF8 = (const char*)__utf8Code;
+    std::u16string originalUTF16 = __utf16Code;
+    
+    //---------------------------
+    std::string utf8Str;
+    isSuccess = StringUtils::UTF16ToUTF8(originalUTF16, utf8Str);
+    
+    if (isSuccess)
+    {
+        isSuccess = memcmp(utf8Str.data(), originalUTF8.data(), originalUTF8.length()+1)==0;
+    }
+    
+    CCASSERT(isSuccess, "StringUtils::UTF16ToUTF8 failed");
+    
+    //---------------------------
+    std::u16string utf16Str;
+    isSuccess = StringUtils::UTF8ToUTF16(originalUTF8, utf16Str);
+    
+    if (isSuccess)
+    {
+        isSuccess = memcmp(utf16Str.data(), originalUTF16.data(), originalUTF16.length()+1)==0;
+    }
+    
+    CCASSERT(isSuccess && (utf16Str.length() == TEST_CODE_NUM), "StringUtils::UTF8ToUTF16 failed");
+    
+    //---------------------------
+    auto vec1 = StringUtils::getChar16VectorFromUTF16String(originalUTF16);
+    
+    CCASSERT(vec1.size() == originalUTF16.length(), "StringUtils::getChar16VectorFromUTF16String failed");
+    
+    //---------------------------
+    std::vector<char16_t> vec2( vec1 );
+    vec2.push_back(0x2009);
+    vec2.push_back(0x2009);
+    vec2.push_back(0x2009);
+    vec2.push_back(0x2009);
+    
+    std::vector<char16_t> vec3( vec2 );
+    StringUtils::trimUTF16Vector(vec2);
+    
+    CCASSERT(vec1.size() == vec2.size(), "StringUtils::trimUTF16Vector failed");
+    
+    for (size_t i = 0; i < vec2.size(); i++ )
+    {
+        CCASSERT(vec1.at(i) == vec2.at(i), "StringUtils::trimUTF16Vector failed");
+    }
+    
+    //---------------------------
+    CCASSERT(StringUtils::getCharacterCountInUTF8String(originalUTF8) == TEST_CODE_NUM, "StringUtils::getCharacterCountInUTF8String failed");
+    
+    //---------------------------
+    int lastIndex = StringUtils::getIndexOfLastNotChar16(vec3, 0x2009);
+    CCASSERT(lastIndex == (vec1.size()-1), "StringUtils::getIndexOfLastNotChar16 failed");
+    
+    //---------------------------
+    CCASSERT(originalUTF16.length() == TEST_CODE_NUM, "The length of the original utf16 string isn't equal to TEST_CODE_NUM");
+    
+    //---------------------------
+    size_t whiteCodeNum = sizeof(WHITE_SPACE_CODE) / sizeof(WHITE_SPACE_CODE[0]);
+    for( size_t i = 0; i < whiteCodeNum; i++ )
+    {
+        CCASSERT(StringUtils::isUnicodeSpace(WHITE_SPACE_CODE[i]), "StringUtils::isUnicodeSpace failed");
+    }
+    
+    CCASSERT(!StringUtils::isUnicodeSpace(0xFFFF), "StringUtils::isUnicodeSpace failed");
+    
+    CCASSERT(!StringUtils::isCJKUnicode(0xFFFF) && StringUtils::isCJKUnicode(0x3100), "StringUtils::isCJKUnicode failed");
+}
+
+void UTFConversionTest::onEnter()
+{
+    UnitTestDemo::onEnter();
+
+    for (int i = 0; i < 10000; ++i)
+    {
+        doUTFConversion();
+    }
+}
+
+std::string UTFConversionTest::subtitle() const
+{
+    return "UTF8 <-> UTF16 Conversion Test, no crash";
+}
--- a/tests/cpp-tests/Classes/UnitTest/UnitTest.h
+++ b/tests/cpp-tests/Classes/UnitTest/UnitTest.h
@ -53,4 +53,12 @@ public:
    void constFunc(const Value& value) const;
 };

+class UTFConversionTest : public UnitTestDemo
+{
+public:
+    CREATE_FUNC(UTFConversionTest);
+    virtual void onEnter() override;
+    virtual std::string subtitle() const override;
+};
+
 #endif /* __UNIT_TEST__ */