mirror of https://github.com/axmolengine/axmol.git
Merge pull request #6635 from dumganhar/iss4660-unicode
issue #4660: ccUTF8 uses utf8-utf16 conversion in llvm3.4
This commit is contained in:
commit
9ab2dae212
|
@ -98,6 +98,10 @@
|
|||
1A12775A18DFCC4F0005F345 /* CCTweenFunction.h in Headers */ = {isa = PBXBuildFile; fileRef = 2986667918B1B079000E39CA /* CCTweenFunction.h */; };
|
||||
1A12775B18DFCC540005F345 /* CCTweenFunction.h in Headers */ = {isa = PBXBuildFile; fileRef = 2986667918B1B079000E39CA /* CCTweenFunction.h */; };
|
||||
1A12775C18DFCC590005F345 /* CCTweenFunction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2986667818B1B079000E39CA /* CCTweenFunction.cpp */; };
|
||||
1A1645B0191B726C008C7C7F /* ConvertUTF.c in Sources */ = {isa = PBXBuildFile; fileRef = 1A1645AE191B726C008C7C7F /* ConvertUTF.c */; };
|
||||
1A1645B1191B726C008C7C7F /* ConvertUTF.c in Sources */ = {isa = PBXBuildFile; fileRef = 1A1645AE191B726C008C7C7F /* ConvertUTF.c */; };
|
||||
1A1645B2191B726C008C7C7F /* ConvertUTFWrapper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1A1645AF191B726C008C7C7F /* ConvertUTFWrapper.cpp */; };
|
||||
1A1645B3191B726C008C7C7F /* ConvertUTFWrapper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1A1645AF191B726C008C7C7F /* ConvertUTFWrapper.cpp */; };
|
||||
1A570061180BC5A10088DEC7 /* CCAction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1A570047180BC5A10088DEC7 /* CCAction.cpp */; };
|
||||
1A570062180BC5A10088DEC7 /* CCAction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1A570047180BC5A10088DEC7 /* CCAction.cpp */; };
|
||||
1A570063180BC5A10088DEC7 /* CCAction.h in Headers */ = {isa = PBXBuildFile; fileRef = 1A570048180BC5A10088DEC7 /* CCAction.h */; };
|
||||
|
@ -765,6 +769,8 @@
|
|||
1ABA68AF1888D700007D1BB4 /* CCFontCharMap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ABA68AC1888D700007D1BB4 /* CCFontCharMap.cpp */; };
|
||||
1ABA68B01888D700007D1BB4 /* CCFontCharMap.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ABA68AD1888D700007D1BB4 /* CCFontCharMap.h */; };
|
||||
1ABA68B11888D700007D1BB4 /* CCFontCharMap.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ABA68AD1888D700007D1BB4 /* CCFontCharMap.h */; };
|
||||
1AC0269C1914068200FA920D /* ConvertUTF.h in Headers */ = {isa = PBXBuildFile; fileRef = 1AC026991914068200FA920D /* ConvertUTF.h */; };
|
||||
1AC0269D1914068200FA920D /* ConvertUTF.h in Headers */ = {isa = PBXBuildFile; fileRef = 1AC026991914068200FA920D /* ConvertUTF.h */; };
|
||||
1AD71DA9180E26E600808F54 /* CCBAnimationManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1AD71CFA180E26E600808F54 /* CCBAnimationManager.cpp */; };
|
||||
1AD71DAA180E26E600808F54 /* CCBAnimationManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1AD71CFA180E26E600808F54 /* CCBAnimationManager.cpp */; };
|
||||
1AD71DAB180E26E600808F54 /* CCBAnimationManager.h in Headers */ = {isa = PBXBuildFile; fileRef = 1AD71CFB180E26E600808F54 /* CCBAnimationManager.h */; };
|
||||
|
@ -1870,6 +1876,8 @@
|
|||
1A0DB7301823827C0025743D /* CCGL.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCGL.h; sourceTree = "<group>"; };
|
||||
1A0DB7311823827C0025743D /* CCEAGLView.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCEAGLView.h; sourceTree = "<group>"; };
|
||||
1A0DB7351823828F0025743D /* CCGL.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCGL.h; sourceTree = "<group>"; };
|
||||
1A1645AE191B726C008C7C7F /* ConvertUTF.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = ConvertUTF.c; sourceTree = "<group>"; };
|
||||
1A1645AF191B726C008C7C7F /* ConvertUTFWrapper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ConvertUTFWrapper.cpp; sourceTree = "<group>"; };
|
||||
1A570047180BC5A10088DEC7 /* CCAction.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CCAction.cpp; sourceTree = "<group>"; };
|
||||
1A570048180BC5A10088DEC7 /* CCAction.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCAction.h; sourceTree = "<group>"; };
|
||||
1A570049180BC5A10088DEC7 /* CCActionCamera.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CCActionCamera.cpp; sourceTree = "<group>"; };
|
||||
|
@ -2121,6 +2129,7 @@
|
|||
1AAF584E180E40B9000584C8 /* LocalStorageAndroid.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = LocalStorageAndroid.cpp; sourceTree = "<group>"; };
|
||||
1ABA68AC1888D700007D1BB4 /* CCFontCharMap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CCFontCharMap.cpp; sourceTree = "<group>"; };
|
||||
1ABA68AD1888D700007D1BB4 /* CCFontCharMap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCFontCharMap.h; sourceTree = "<group>"; };
|
||||
1AC026991914068200FA920D /* ConvertUTF.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ConvertUTF.h; sourceTree = "<group>"; };
|
||||
1AD71CFA180E26E600808F54 /* CCBAnimationManager.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CCBAnimationManager.cpp; sourceTree = "<group>"; };
|
||||
1AD71CFB180E26E600808F54 /* CCBAnimationManager.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCBAnimationManager.h; sourceTree = "<group>"; };
|
||||
1AD71CFC180E26E600808F54 /* CCBFileLoader.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CCBFileLoader.cpp; sourceTree = "<group>"; };
|
||||
|
@ -3367,6 +3376,7 @@
|
|||
1A57033E180BD0490088DEC7 /* external */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
1AC026971914068200FA920D /* ConvertUTF */,
|
||||
46C02E0418E91123004B7456 /* xxhash */,
|
||||
46A168B21807AF9C005B8026 /* Box2D */,
|
||||
46A1693A1807AFD6005B8026 /* chipmunk */,
|
||||
|
@ -3784,6 +3794,17 @@
|
|||
path = "local-storage";
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
1AC026971914068200FA920D /* ConvertUTF */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
1A1645AE191B726C008C7C7F /* ConvertUTF.c */,
|
||||
1AC026991914068200FA920D /* ConvertUTF.h */,
|
||||
1A1645AF191B726C008C7C7F /* ConvertUTFWrapper.cpp */,
|
||||
);
|
||||
name = ConvertUTF;
|
||||
path = ../external/ConvertUTF;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
1AD71CF7180E26E600808F54 /* editor-support */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
|
@ -4992,6 +5013,7 @@
|
|||
50FCEB9918C72017004AD434 /* CheckBoxReader.h in Headers */,
|
||||
50E6D33E18E174130051CA34 /* UIVBox.h in Headers */,
|
||||
500DC9AC19106300007B91BF /* ZipUtils.h in Headers */,
|
||||
1AC0269C1914068200FA920D /* ConvertUTF.h in Headers */,
|
||||
1A57019F180BCB590088DEC7 /* CCFont.h in Headers */,
|
||||
1A5701A3180BCB590088DEC7 /* CCFontAtlas.h in Headers */,
|
||||
1A01C68618F57BE800EFE3A6 /* CCArray.h in Headers */,
|
||||
|
@ -5556,6 +5578,7 @@
|
|||
5034CA44191D591100CE6051 /* ccShader_Label.vert in Headers */,
|
||||
1A5702F5180BCE750088DEC7 /* CCTMXObjectGroup.h in Headers */,
|
||||
1A5702F9180BCE750088DEC7 /* CCTMXTiledMap.h in Headers */,
|
||||
1AC0269D1914068200FA920D /* ConvertUTF.h in Headers */,
|
||||
500DC9A119106300007B91BF /* CCVector.h in Headers */,
|
||||
1A5702FD180BCE750088DEC7 /* CCTMXXMLParser.h in Headers */,
|
||||
1A570303180BCE890088DEC7 /* CCParallaxNode.h in Headers */,
|
||||
|
@ -6085,6 +6108,7 @@
|
|||
2AC795DB1862870F005EC8E1 /* SkeletonBounds.cpp in Sources */,
|
||||
2AC795DC1862870F005EC8E1 /* Event.cpp in Sources */,
|
||||
1A01C68A18F57BE800EFE3A6 /* CCDeprecated.cpp in Sources */,
|
||||
1A1645B0191B726C008C7C7F /* ConvertUTF.c in Sources */,
|
||||
500DC93219106300007B91BF /* CCAutoreleasePool.cpp in Sources */,
|
||||
2905FA5618CF08D100240AA3 /* UILayout.cpp in Sources */,
|
||||
2AC795DD1862870F005EC8E1 /* EventData.cpp in Sources */,
|
||||
|
@ -6185,6 +6209,7 @@
|
|||
500DC9B619106E6D007B91BF /* TransformUtils.cpp in Sources */,
|
||||
1A5701EE180BCB8C0088DEC7 /* CCTransitionProgress.cpp in Sources */,
|
||||
1A5701F7180BCBAD0088DEC7 /* CCMenu.cpp in Sources */,
|
||||
1A1645B2191B726C008C7C7F /* ConvertUTFWrapper.cpp in Sources */,
|
||||
1A5701FB180BCBAD0088DEC7 /* CCMenuItem.cpp in Sources */,
|
||||
1A570202180BCBD40088DEC7 /* CCClippingNode.cpp in Sources */,
|
||||
06CAAACF186AD7FC0012A414 /* TriggerBase.cpp in Sources */,
|
||||
|
@ -6793,12 +6818,14 @@
|
|||
1A8C59EC180E930E00EF57C3 /* CCSkin.cpp in Sources */,
|
||||
2905FA4718CF08D100240AA3 /* UIButton.cpp in Sources */,
|
||||
1A8C59F0180E930E00EF57C3 /* CCSpriteFrameCacheHelper.cpp in Sources */,
|
||||
1A1645B1191B726C008C7C7F /* ConvertUTF.c in Sources */,
|
||||
B2AF2FA218EBAEAE00C5807C /* Vector2.cpp in Sources */,
|
||||
500DC8D219105F7D007B91BF /* CCAffineTransform.cpp in Sources */,
|
||||
1A8C59F4180E930E00EF57C3 /* CCSSceneReader.cpp in Sources */,
|
||||
1A8C59F8180E930E00EF57C3 /* CCTransformHelp.cpp in Sources */,
|
||||
1A8C59FC180E930E00EF57C3 /* CCTween.cpp in Sources */,
|
||||
2905FA5318CF08D100240AA3 /* UIImageView.cpp in Sources */,
|
||||
1A1645B3191B726C008C7C7F /* ConvertUTFWrapper.cpp in Sources */,
|
||||
1A8C5A04180E930E00EF57C3 /* CCUtilMath.cpp in Sources */,
|
||||
2905FA7518CF08D100240AA3 /* UIScrollView.cpp in Sources */,
|
||||
1A8C5A0E180E930E00EF57C3 /* DictionaryHelper.cpp in Sources */,
|
||||
|
|
|
@ -100,47 +100,6 @@ const char * Font::getCurrentGlyphCollection() const
|
|||
}
|
||||
}
|
||||
|
||||
unsigned short* Font::getUTF16Text(const char *text, int &outNumLetters) const
|
||||
{
|
||||
unsigned short* utf16String = cc_utf8_to_utf16(text);
|
||||
|
||||
if(!utf16String)
|
||||
return 0;
|
||||
|
||||
outNumLetters = cc_wcslen(utf16String);
|
||||
return utf16String;
|
||||
}
|
||||
|
||||
int Font::getUTF16TextLenght(unsigned short int *text) const
|
||||
{
|
||||
return cc_wcslen(text);
|
||||
}
|
||||
|
||||
unsigned short * Font::trimUTF16Text(unsigned short int *text, int newBegin, int newEnd) const
|
||||
{
|
||||
if ( newBegin < 0 || newEnd <= 0 )
|
||||
return 0;
|
||||
|
||||
if ( newBegin >= newEnd )
|
||||
return 0;
|
||||
|
||||
if (newEnd >= cc_wcslen(text))
|
||||
return 0;
|
||||
|
||||
int newLenght = newEnd - newBegin + 2;
|
||||
unsigned short* trimmedString = new unsigned short[newLenght];
|
||||
|
||||
for(int c = 0; c < (newLenght - 1); ++c)
|
||||
{
|
||||
trimmedString[c] = text[newBegin + c];
|
||||
}
|
||||
|
||||
// last char
|
||||
trimmedString[newLenght-1] = 0x0000;
|
||||
|
||||
// done
|
||||
return trimmedString;
|
||||
}
|
||||
|
||||
NS_CC_END
|
||||
|
||||
|
|
|
@ -41,16 +41,12 @@ class CC_DLL Font : public Ref
|
|||
public:
|
||||
virtual FontAtlas *createFontAtlas() = 0;
|
||||
|
||||
virtual int* getHorizontalKerningForTextUTF16(unsigned short *text, int &outNumLetters) const = 0;
|
||||
virtual int* getHorizontalKerningForTextUTF16(const std::u16string& text, int &outNumLetters) const = 0;
|
||||
virtual const char* getCurrentGlyphCollection() const;
|
||||
|
||||
|
||||
virtual int getFontMaxHeight() const { return 0; }
|
||||
|
||||
virtual int getUTF16TextLenght(unsigned short int *text) const;
|
||||
virtual unsigned short * getUTF16Text(const char *text, int &outNumLetters) const;
|
||||
virtual unsigned short * trimUTF16Text(unsigned short int *text, int newBegin, int newEnd) const;
|
||||
|
||||
protected:
|
||||
|
||||
Font();
|
||||
|
|
|
@ -202,7 +202,7 @@ void FontAtlas::addLetterDefinition(const FontLetterDefinition &letterDefinition
|
|||
_fontLetterDefinitions[letterDefinition.letteCharUTF16] = letterDefinition;
|
||||
}
|
||||
|
||||
bool FontAtlas::getLetterDefinitionForChar(unsigned short letteCharUTF16, FontLetterDefinition &outDefinition)
|
||||
bool FontAtlas::getLetterDefinitionForChar(char16_t letteCharUTF16, FontLetterDefinition &outDefinition)
|
||||
{
|
||||
auto outIterator = _fontLetterDefinitions.find(letteCharUTF16);
|
||||
|
||||
|
@ -218,13 +218,13 @@ bool FontAtlas::getLetterDefinitionForChar(unsigned short letteCharUTF16, FontL
|
|||
}
|
||||
}
|
||||
|
||||
bool FontAtlas::prepareLetterDefinitions(unsigned short *utf16String)
|
||||
bool FontAtlas::prepareLetterDefinitions(const std::u16string& utf16String)
|
||||
{
|
||||
FontFreeType* fontTTf = dynamic_cast<FontFreeType*>(_font);
|
||||
if(fontTTf == nullptr || utf16String == nullptr)
|
||||
if(fontTTf == nullptr)
|
||||
return false;
|
||||
|
||||
int length = cc_wcslen(utf16String);
|
||||
size_t length = utf16String.length();
|
||||
|
||||
float offsetAdjust = _letterPadding / 2;
|
||||
long bitmapWidth;
|
||||
|
@ -240,7 +240,7 @@ bool FontAtlas::prepareLetterDefinitions(unsigned short *utf16String)
|
|||
|
||||
float startY = _currentPageOrigY;
|
||||
|
||||
for (int i = 0; i < length; ++i)
|
||||
for (size_t i = 0; i < length; ++i)
|
||||
{
|
||||
auto outIterator = _fontLetterDefinitions.find(utf16String[i]);
|
||||
|
||||
|
|
|
@ -25,10 +25,11 @@
|
|||
#ifndef _CCFontAtlas_h_
|
||||
#define _CCFontAtlas_h_
|
||||
|
||||
#include <unordered_map>
|
||||
#include "base/CCPlatformMacros.h"
|
||||
#include "base/CCRef.h"
|
||||
#include "CCStdC.h"
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
NS_CC_BEGIN
|
||||
|
||||
|
@ -71,9 +72,9 @@ public:
|
|||
virtual ~FontAtlas();
|
||||
|
||||
void addLetterDefinition(const FontLetterDefinition &letterDefinition);
|
||||
bool getLetterDefinitionForChar(unsigned short letteCharUTF16, FontLetterDefinition &outDefinition);
|
||||
bool getLetterDefinitionForChar(char16_t letteCharUTF16, FontLetterDefinition &outDefinition);
|
||||
|
||||
bool prepareLetterDefinitions(unsigned short *utf16String);
|
||||
bool prepareLetterDefinitions(const std::u16string& utf16String);
|
||||
|
||||
inline const std::unordered_map<ssize_t, Texture2D*>& getTextures() const{ return _atlasTextures;}
|
||||
void addTexture(Texture2D *texture, int slot);
|
||||
|
|
|
@ -99,12 +99,9 @@ FontCharMap::~FontCharMap()
|
|||
|
||||
}
|
||||
|
||||
int * FontCharMap::getHorizontalKerningForTextUTF16(unsigned short *text, int &outNumLetters) const
|
||||
int * FontCharMap::getHorizontalKerningForTextUTF16(const std::u16string& text, int &outNumLetters) const
|
||||
{
|
||||
if (!text)
|
||||
return 0;
|
||||
|
||||
outNumLetters = cc_wcslen(text);
|
||||
outNumLetters = static_cast<int>(text.length());
|
||||
|
||||
if (!outNumLetters)
|
||||
return 0;
|
||||
|
|
|
@ -37,7 +37,7 @@ public:
|
|||
static FontCharMap * create(Texture2D* texture, int itemWidth, int itemHeight, int startCharMap);
|
||||
static FontCharMap * create(const std::string& plistFile);
|
||||
|
||||
virtual int* getHorizontalKerningForTextUTF16(unsigned short *text, int &outNumLetters) const override;
|
||||
virtual int* getHorizontalKerningForTextUTF16(const std::u16string& text, int &outNumLetters) const override;
|
||||
virtual FontAtlas *createFontAtlas() override;
|
||||
|
||||
protected:
|
||||
|
|
|
@ -711,12 +711,9 @@ void FontFNT::purgeCachedData()
|
|||
}
|
||||
}
|
||||
|
||||
int * FontFNT::getHorizontalKerningForTextUTF16(unsigned short *text, int &outNumLetters) const
|
||||
int * FontFNT::getHorizontalKerningForTextUTF16(const std::u16string& text, int &outNumLetters) const
|
||||
{
|
||||
if (!text)
|
||||
return 0;
|
||||
|
||||
outNumLetters = cc_wcslen(text);
|
||||
outNumLetters = static_cast<int>(text.length());
|
||||
|
||||
if (!outNumLetters)
|
||||
return 0;
|
||||
|
|
|
@ -42,7 +42,7 @@ public:
|
|||
Removes from memory the cached configurations and the atlas name dictionary.
|
||||
*/
|
||||
static void purgeCachedData();
|
||||
virtual int* getHorizontalKerningForTextUTF16(unsigned short *text, int &outNumLetters) const override;
|
||||
virtual int* getHorizontalKerningForTextUTF16(const std::u16string& text, int &outNumLetters) const override;
|
||||
virtual FontAtlas *createFontAtlas() override;
|
||||
|
||||
protected:
|
||||
|
|
|
@ -175,20 +175,22 @@ FontAtlas * FontFreeType::createFontAtlas()
|
|||
FontAtlas *atlas = new FontAtlas(*this);
|
||||
if (_usedGlyphs != GlyphCollection::DYNAMIC)
|
||||
{
|
||||
unsigned short* utf16 = cc_utf8_to_utf16(getCurrentGlyphCollection());
|
||||
atlas->prepareLetterDefinitions(utf16);
|
||||
CC_SAFE_DELETE_ARRAY(utf16);
|
||||
std::u16string utf16;
|
||||
if (StringUtils::UTF8ToUTF16(getCurrentGlyphCollection(), utf16))
|
||||
{
|
||||
atlas->prepareLetterDefinitions(utf16);
|
||||
}
|
||||
}
|
||||
this->release();
|
||||
return atlas;
|
||||
}
|
||||
|
||||
int * FontFreeType::getHorizontalKerningForTextUTF16(unsigned short *text, int &outNumLetters) const
|
||||
int * FontFreeType::getHorizontalKerningForTextUTF16(const std::u16string& text, int &outNumLetters) const
|
||||
{
|
||||
if (!text || !_fontRef)
|
||||
if (!_fontRef)
|
||||
return nullptr;
|
||||
|
||||
outNumLetters = cc_wcslen(text);
|
||||
outNumLetters = static_cast<int>(text.length());
|
||||
|
||||
if (!outNumLetters)
|
||||
return nullptr;
|
||||
|
|
|
@ -62,7 +62,7 @@ public:
|
|||
void renderCharAt(unsigned char *dest,int posX, int posY, unsigned char* bitmap,long bitmapWidth,long bitmapHeight);
|
||||
|
||||
virtual FontAtlas * createFontAtlas() override;
|
||||
virtual int * getHorizontalKerningForTextUTF16(unsigned short *text, int &outNumLetters) const override;
|
||||
virtual int * getHorizontalKerningForTextUTF16(const std::u16string& text, int &outNumLetters) const override;
|
||||
|
||||
unsigned char * getGlyphBitmap(unsigned short theChar, long &outWidth, long &outHeight, Rect &outRect,int &xAdvance);
|
||||
|
||||
|
|
|
@ -247,8 +247,6 @@ Label::Label(FontAtlas *atlas /* = nullptr */, TextHAlignment hAlignment /* = Te
|
|||
, _labelDimensions(Size::ZERO)
|
||||
, _hAlignment(hAlignment)
|
||||
, _vAlignment(vAlignment)
|
||||
, _currentUTF16String(nullptr)
|
||||
, _originalUTF16String(nullptr)
|
||||
, _horizontalKernings(nullptr)
|
||||
, _fontAtlas(atlas)
|
||||
, _isOpacityModifyRGB(false)
|
||||
|
@ -287,9 +285,7 @@ Label::Label(FontAtlas *atlas /* = nullptr */, TextHAlignment hAlignment /* = Te
|
|||
}
|
||||
|
||||
Label::~Label()
|
||||
{
|
||||
delete [] _currentUTF16String;
|
||||
delete [] _originalUTF16String;
|
||||
{
|
||||
delete [] _horizontalKernings;
|
||||
|
||||
if (_fontAtlas)
|
||||
|
@ -305,7 +301,7 @@ void Label::reset()
|
|||
TTFConfig temp;
|
||||
_fontConfig = temp;
|
||||
|
||||
_fontDirty = false;
|
||||
_systemFontDirty = false;
|
||||
_systemFont = "Helvetica";
|
||||
_systemFontSize = 12;
|
||||
|
||||
|
@ -476,6 +472,12 @@ void Label::setString(const std::string& text)
|
|||
{
|
||||
_originalUTF8String = text;
|
||||
_contentDirty = true;
|
||||
|
||||
std::u16string utf16String;
|
||||
if (StringUtils::UTF8ToUTF16(_originalUTF8String, utf16String))
|
||||
{
|
||||
_currentUTF16String = utf16String;
|
||||
}
|
||||
}
|
||||
|
||||
void Label::setAlignment(TextHAlignment hAlignment,TextVAlignment vAlignment)
|
||||
|
@ -574,7 +576,7 @@ float Label::getScaleX() const
|
|||
|
||||
void Label::alignText()
|
||||
{
|
||||
if (_fontAtlas == nullptr || _currentUTF16String == nullptr)
|
||||
if (_fontAtlas == nullptr || _currentUTF16String.empty())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -603,7 +605,7 @@ void Label::alignText()
|
|||
if(_labelWidth > 0 || (_currNumLines > 1 && _hAlignment != TextHAlignment::LEFT))
|
||||
LabelTextFormatter::alignText(this);
|
||||
|
||||
int strLen = cc_wcslen(_currentUTF16String);
|
||||
int strLen = static_cast<int>(_currentUTF16String.length());
|
||||
Rect uvRect;
|
||||
Sprite* letterSprite;
|
||||
for(const auto &child : _children) {
|
||||
|
@ -633,7 +635,7 @@ void Label::alignText()
|
|||
updateColor();
|
||||
}
|
||||
|
||||
bool Label::computeHorizontalKernings(unsigned short int *stringToRender)
|
||||
bool Label::computeHorizontalKernings(const std::u16string& stringToRender)
|
||||
{
|
||||
if (_horizontalKernings)
|
||||
{
|
||||
|
@ -650,41 +652,6 @@ bool Label::computeHorizontalKernings(unsigned short int *stringToRender)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool Label::setOriginalString(unsigned short *stringToSet)
|
||||
{
|
||||
if (_originalUTF16String)
|
||||
{
|
||||
delete [] _originalUTF16String;
|
||||
}
|
||||
|
||||
int newStringLenght = cc_wcslen(stringToSet);
|
||||
_originalUTF16String = new unsigned short int [newStringLenght + 1];
|
||||
memset(_originalUTF16String, 0, (newStringLenght + 1) * 2);
|
||||
memcpy(_originalUTF16String, stringToSet, (newStringLenght * 2));
|
||||
_originalUTF16String[newStringLenght] = 0;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Label::setCurrentString(unsigned short *stringToSet)
|
||||
{
|
||||
// set the new string
|
||||
if (_currentUTF16String)
|
||||
{
|
||||
delete [] _currentUTF16String;
|
||||
}
|
||||
|
||||
_currentUTF16String = stringToSet;
|
||||
computeStringNumLines();
|
||||
|
||||
// compute the advances
|
||||
if (_fontAtlas)
|
||||
{
|
||||
computeHorizontalKernings(stringToSet);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void Label::updateQuads()
|
||||
{
|
||||
int index;
|
||||
|
@ -959,9 +926,17 @@ void Label::setFontDefinition(const FontDefinition& textDefinition)
|
|||
|
||||
void Label::updateContent()
|
||||
{
|
||||
auto utf16String = cc_utf8_to_utf16(_originalUTF8String.c_str());
|
||||
setCurrentString(utf16String);
|
||||
setOriginalString(utf16String);
|
||||
std::u16string utf16String;
|
||||
if (StringUtils::UTF8ToUTF16(_originalUTF8String, utf16String))
|
||||
{
|
||||
_currentUTF16String = utf16String;
|
||||
}
|
||||
|
||||
computeStringNumLines();
|
||||
if (_fontAtlas)
|
||||
{
|
||||
computeHorizontalKernings(_currentUTF16String);
|
||||
}
|
||||
|
||||
if (_textSprite)
|
||||
{
|
||||
|
@ -1036,7 +1011,7 @@ void Label::updateFont()
|
|||
}
|
||||
|
||||
_contentDirty = true;
|
||||
_fontDirty = false;
|
||||
_systemFontDirty = false;
|
||||
}
|
||||
|
||||
void Label::drawTextSprite(Renderer *renderer, bool parentTransformUpdated)
|
||||
|
@ -1075,7 +1050,7 @@ void Label::visit(Renderer *renderer, const Matrix &parentTransform, bool parent
|
|||
{
|
||||
return;
|
||||
}
|
||||
if (_fontDirty)
|
||||
if (_systemFontDirty)
|
||||
{
|
||||
updateFont();
|
||||
}
|
||||
|
@ -1136,7 +1111,7 @@ void Label::setSystemFontName(const std::string& systemFont)
|
|||
if (systemFont != _systemFont)
|
||||
{
|
||||
_systemFont = systemFont;
|
||||
_fontDirty = true;
|
||||
_systemFontDirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1145,16 +1120,15 @@ void Label::setSystemFontSize(float fontSize)
|
|||
if (_systemFontSize != fontSize)
|
||||
{
|
||||
_systemFontSize = fontSize;
|
||||
_fontDirty = true;
|
||||
_systemFontDirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
///// PROTOCOL STUFF
|
||||
Sprite * Label::getLetter(int letterIndex)
|
||||
{
|
||||
if (_fontDirty)
|
||||
if (_systemFontDirty || _currentLabelType == LabelType::STRING_TEXTURE)
|
||||
{
|
||||
updateFont();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -1203,15 +1177,15 @@ void Label::computeStringNumLines()
|
|||
{
|
||||
int quantityOfLines = 1;
|
||||
|
||||
int stringLen = _currentUTF16String ? cc_wcslen(_currentUTF16String) : -1;
|
||||
if (stringLen < 1)
|
||||
if (_currentUTF16String.empty())
|
||||
{
|
||||
_currNumLines = stringLen;
|
||||
_currNumLines = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
// count number of lines
|
||||
for (int i = 0; i < stringLen - 1; ++i)
|
||||
size_t stringLen = _currentUTF16String.length();
|
||||
for (size_t i = 0; i < stringLen-1; ++i)
|
||||
{
|
||||
if (_currentUTF16String[i] == '\n')
|
||||
{
|
||||
|
@ -1224,7 +1198,7 @@ void Label::computeStringNumLines()
|
|||
|
||||
int Label::getStringLength() const
|
||||
{
|
||||
return _currentUTF16String ? cc_wcslen(_currentUTF16String) : (int)_originalUTF8String.length();
|
||||
return static_cast<int>(_currentUTF16String.length());
|
||||
}
|
||||
|
||||
// RGBA protocol
|
||||
|
@ -1323,12 +1297,14 @@ void Label::updateColor()
|
|||
|
||||
std::string Label::getDescription() const
|
||||
{
|
||||
return StringUtils::format("<Label | Tag = %d, Label = '%s'>", _tag, cc_utf16_to_utf8(_currentUTF16String,-1,nullptr,nullptr));
|
||||
std::string utf8str;
|
||||
StringUtils::UTF16ToUTF8(_currentUTF16String, utf8str);
|
||||
return StringUtils::format("<Label | Tag = %d, Label = '%s'>", _tag, utf8str.c_str());
|
||||
}
|
||||
|
||||
const Size& Label::getContentSize() const
|
||||
{
|
||||
if (_fontDirty)
|
||||
if (_systemFontDirty)
|
||||
{
|
||||
const_cast<Label*>(this)->updateFont();
|
||||
}
|
||||
|
|
|
@ -286,9 +286,8 @@ protected:
|
|||
|
||||
virtual void alignText();
|
||||
|
||||
bool computeHorizontalKernings(unsigned short int *stringToRender);
|
||||
bool setCurrentString(unsigned short *stringToSet);
|
||||
bool setOriginalString(unsigned short *stringToSet);
|
||||
bool computeHorizontalKernings(const std::u16string& stringToRender);
|
||||
|
||||
void computeStringNumLines();
|
||||
|
||||
void updateQuads();
|
||||
|
@ -311,7 +310,7 @@ protected:
|
|||
bool _isOpacityModifyRGB;
|
||||
bool _contentDirty;
|
||||
|
||||
bool _fontDirty;
|
||||
bool _systemFontDirty;
|
||||
std::string _systemFont;
|
||||
float _systemFontSize;
|
||||
LabelType _currentLabelType;
|
||||
|
@ -344,8 +343,7 @@ protected:
|
|||
TextVAlignment _vAlignment;
|
||||
|
||||
int _currNumLines;
|
||||
unsigned short int * _currentUTF16String;
|
||||
unsigned short int * _originalUTF16String;
|
||||
std::u16string _currentUTF16String;
|
||||
std::string _originalUTF8String;
|
||||
|
||||
float _fontScale;
|
||||
|
|
|
@ -30,21 +30,17 @@
|
|||
#include "base/CCDirector.h"
|
||||
#include "2d/CCLabel.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
NS_CC_BEGIN
|
||||
|
||||
bool LabelTextFormatter::multilineText(Label *theLabel)
|
||||
{
|
||||
//int strLen = theLabel->getStringLength();
|
||||
auto limit = theLabel->_limitShowCount;
|
||||
|
||||
auto strWhole = theLabel->_currentUTF16String;
|
||||
|
||||
vector<unsigned short> multiline_string;
|
||||
std::vector<char16_t> multiline_string;
|
||||
multiline_string.reserve( limit );
|
||||
|
||||
vector<unsigned short> last_word;
|
||||
std::vector<char16_t> last_word;
|
||||
last_word.reserve( 25 );
|
||||
|
||||
bool isStartOfLine = false, isStartOfWord = false;
|
||||
|
@ -70,7 +66,7 @@ bool LabelTextFormatter::multilineText(Label *theLabel)
|
|||
tIndex = j+skip+justSkipped;
|
||||
if (strWhole[tIndex-1] == '\n')
|
||||
{
|
||||
cc_utf8_trim_ws(&last_word);
|
||||
StringUtils::trimUTF16Vector(last_word);
|
||||
|
||||
last_word.push_back('\n');
|
||||
multiline_string.insert(multiline_string.end(), last_word.begin(), last_word.end());
|
||||
|
@ -93,7 +89,7 @@ bool LabelTextFormatter::multilineText(Label *theLabel)
|
|||
if (tIndex >= limit)
|
||||
break;
|
||||
|
||||
unsigned short character = strWhole[tIndex];
|
||||
char16_t character = strWhole[tIndex];
|
||||
|
||||
if (!isStartOfWord)
|
||||
{
|
||||
|
@ -109,15 +105,15 @@ bool LabelTextFormatter::multilineText(Label *theLabel)
|
|||
|
||||
// 1) Whitespace.
|
||||
// 2) This character is non-CJK, but the last character is CJK
|
||||
bool isspace = isspace_unicode(character);
|
||||
bool isspace = StringUtils::isUnicodeSpace(character);
|
||||
bool isCJK = false;
|
||||
if(!isspace)
|
||||
{
|
||||
isCJK = iscjk_unicode(character);
|
||||
isCJK = StringUtils::isCJKUnicode(character);
|
||||
}
|
||||
|
||||
if (isspace ||
|
||||
(!last_word.empty() && iscjk_unicode(last_word.back()) && !isCJK))
|
||||
(!last_word.empty() && StringUtils::isCJKUnicode(last_word.back()) && !isCJK))
|
||||
{
|
||||
// if current character is white space, put it into the current word
|
||||
if (isspace) last_word.push_back(character);
|
||||
|
@ -139,9 +135,9 @@ bool LabelTextFormatter::multilineText(Label *theLabel)
|
|||
{
|
||||
last_word.push_back(character);
|
||||
|
||||
int found = cc_utf8_find_last_not_char(multiline_string, ' ');
|
||||
int found = StringUtils::getIndexOfLastNotChar16(multiline_string, ' ');
|
||||
if (found != -1)
|
||||
cc_utf8_trim_ws(&multiline_string);
|
||||
StringUtils::trimUTF16Vector(multiline_string);
|
||||
else
|
||||
multiline_string.clear();
|
||||
|
||||
|
@ -153,7 +149,7 @@ bool LabelTextFormatter::multilineText(Label *theLabel)
|
|||
}
|
||||
else
|
||||
{
|
||||
cc_utf8_trim_ws(&last_word);
|
||||
StringUtils::trimUTF16Vector(last_word);
|
||||
|
||||
last_word.push_back('\n');
|
||||
|
||||
|
@ -175,16 +171,11 @@ bool LabelTextFormatter::multilineText(Label *theLabel)
|
|||
|
||||
multiline_string.insert(multiline_string.end(), last_word.begin(), last_word.end());
|
||||
|
||||
size_t size = multiline_string.size();
|
||||
unsigned short* strNew = new unsigned short[size + 1];
|
||||
|
||||
for (size_t j = 0; j < size; ++j)
|
||||
{
|
||||
strNew[j] = multiline_string[j];
|
||||
}
|
||||
|
||||
strNew[size] = 0;
|
||||
theLabel->setCurrentString(strNew);
|
||||
std::u16string strNew(multiline_string.begin(), multiline_string.end());
|
||||
|
||||
theLabel->_currentUTF16String = strNew;
|
||||
theLabel->computeStringNumLines();
|
||||
theLabel->computeHorizontalKernings(theLabel->_currentUTF16String);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -194,8 +185,8 @@ bool LabelTextFormatter::alignText(Label *theLabel)
|
|||
int i = 0;
|
||||
|
||||
int lineNumber = 0;
|
||||
int strLen = cc_wcslen(theLabel->_currentUTF16String);
|
||||
vector<unsigned short> lastLine;
|
||||
int strLen = static_cast<int>(theLabel->_currentUTF16String.length());
|
||||
std::vector<char16_t> lastLine;
|
||||
auto strWhole = theLabel->_currentUTF16String;
|
||||
|
||||
if (theLabel->_labelWidth > theLabel->_contentSize.width)
|
||||
|
@ -205,7 +196,7 @@ bool LabelTextFormatter::alignText(Label *theLabel)
|
|||
|
||||
for (int ctr = 0; ctr <= strLen; ++ctr)
|
||||
{
|
||||
unsigned short currentChar = strWhole[ctr];
|
||||
char16_t currentChar = strWhole[ctr];
|
||||
|
||||
if (currentChar == '\n' || currentChar == 0)
|
||||
{
|
||||
|
@ -334,7 +325,7 @@ bool LabelTextFormatter::createStringSprites(Label *theLabel)
|
|||
|
||||
for (unsigned int i = 0; i < stringLen; i++)
|
||||
{
|
||||
unsigned short c = strWhole[i];
|
||||
char16_t c = strWhole[i];
|
||||
if (fontAtlas->getLetterDefinitionForChar(c, tempDefinition))
|
||||
{
|
||||
charXOffset = tempDefinition.offsetX;
|
||||
|
|
|
@ -26,6 +26,10 @@ set(COCOS_2D_PLATFORM_SRC
|
|||
|
||||
endif()
|
||||
|
||||
include_directories(
|
||||
../external/ConvertUTF
|
||||
)
|
||||
|
||||
|
||||
set(COCOS_2D_SRC
|
||||
2d/ccFPSImages.c
|
||||
|
@ -110,5 +114,7 @@ set(COCOS_2D_SRC
|
|||
2d/platform/CCFileUtils.cpp
|
||||
2d/platform/CCImage.cpp
|
||||
../external/edtaa3func/edtaa3func.cpp
|
||||
../external/ConvertUTF/ConvertUTFWrapper.cpp
|
||||
../external/ConvertUTF/ConvertUTF.c
|
||||
)
|
||||
|
||||
|
|
|
@ -1,127 +1,36 @@
|
|||
/*
|
||||
* This file uses some implementations of gutf8.c in glib.
|
||||
*
|
||||
* gutf8.c - Operations on UTF-8 strings.
|
||||
*
|
||||
* Copyright (C) 1999 Tom Tromey
|
||||
* Copyright (C) 2000 Red Hat, Inc.
|
||||
* Copyright (c) 2013-2014 Chukong Technologies Inc.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
/****************************************************************************
|
||||
Copyright (c) 2014 cocos2d-x.org
|
||||
Copyright (c) 2014 Chukong Technologies Inc.
|
||||
|
||||
http://www.cocos2d-x.org
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
****************************************************************************/
|
||||
|
||||
#include "ccUTF8.h"
|
||||
#include "2d/platform/CCCommon.h"
|
||||
#include "base/CCConsole.h"
|
||||
#include "ConvertUTF.h"
|
||||
|
||||
NS_CC_BEGIN
|
||||
|
||||
int cc_wcslen(const unsigned short* str)
|
||||
{
|
||||
int i=0;
|
||||
while(*str++) i++;
|
||||
return i;
|
||||
}
|
||||
|
||||
/* Code from GLIB gutf8.c starts here. */
|
||||
|
||||
#define UTF8_COMPUTE(Char, Mask, Len) \
|
||||
if (Char < 128) \
|
||||
{ \
|
||||
Len = 1; \
|
||||
Mask = 0x7f; \
|
||||
} \
|
||||
else if ((Char & 0xe0) == 0xc0) \
|
||||
{ \
|
||||
Len = 2; \
|
||||
Mask = 0x1f; \
|
||||
} \
|
||||
else if ((Char & 0xf0) == 0xe0) \
|
||||
{ \
|
||||
Len = 3; \
|
||||
Mask = 0x0f; \
|
||||
} \
|
||||
else if ((Char & 0xf8) == 0xf0) \
|
||||
{ \
|
||||
Len = 4; \
|
||||
Mask = 0x07; \
|
||||
} \
|
||||
else if ((Char & 0xfc) == 0xf8) \
|
||||
{ \
|
||||
Len = 5; \
|
||||
Mask = 0x03; \
|
||||
} \
|
||||
else if ((Char & 0xfe) == 0xfc) \
|
||||
{ \
|
||||
Len = 6; \
|
||||
Mask = 0x01; \
|
||||
} \
|
||||
else \
|
||||
Len = -1;
|
||||
|
||||
#define UTF8_LENGTH(Char) \
|
||||
((Char) < 0x80 ? 1 : \
|
||||
((Char) < 0x800 ? 2 : \
|
||||
((Char) < 0x10000 ? 3 : \
|
||||
((Char) < 0x200000 ? 4 : \
|
||||
((Char) < 0x4000000 ? 5 : 6)))))
|
||||
|
||||
|
||||
#define UTF8_GET(Result, Chars, Count, Mask, Len) \
|
||||
(Result) = (Chars)[0] & (Mask); \
|
||||
for ((Count) = 1; (Count) < (Len); ++(Count)) \
|
||||
{ \
|
||||
if (((Chars)[(Count)] & 0xc0) != 0x80) \
|
||||
{ \
|
||||
(Result) = -1; \
|
||||
break; \
|
||||
} \
|
||||
(Result) <<= 6; \
|
||||
(Result) |= ((Chars)[(Count)] & 0x3f); \
|
||||
}
|
||||
|
||||
#define UNICODE_VALID(Char) \
|
||||
((Char) < 0x110000 && \
|
||||
(((Char) & 0xFFFFF800) != 0xD800) && \
|
||||
((Char) < 0xFDD0 || (Char) > 0xFDEF) && \
|
||||
((Char) & 0xFFFE) != 0xFFFE)
|
||||
|
||||
|
||||
static const char utf8_skip_data[256] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5,
|
||||
5, 5, 5, 6, 6, 1, 1
|
||||
};
|
||||
|
||||
static const char *const g_utf8_skip = utf8_skip_data;
|
||||
|
||||
#define cc_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(unsigned char *)(p)])
|
||||
namespace StringUtils {
|
||||
|
||||
/*
|
||||
* @str: the string to search through.
|
||||
|
@ -129,14 +38,14 @@ static const char *const g_utf8_skip = utf8_skip_data;
|
|||
*
|
||||
* Return value: the index of the last character that is not c.
|
||||
* */
|
||||
unsigned int cc_utf8_find_last_not_char(std::vector<unsigned short> str, unsigned short c)
|
||||
unsigned int getIndexOfLastNotChar16(const std::vector<char16_t>& str, char16_t c)
|
||||
{
|
||||
int len = static_cast<int>(str.size());
|
||||
|
||||
|
||||
int i = len - 1;
|
||||
for (; i >= 0; --i)
|
||||
if (str[i] != c) return i;
|
||||
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
|
@ -148,13 +57,13 @@ unsigned int cc_utf8_find_last_not_char(std::vector<unsigned short> str, unsigne
|
|||
*
|
||||
* Return value: the trimmed string.
|
||||
* */
|
||||
static void cc_utf8_trim_from(std::vector<unsigned short>* str, int index)
|
||||
static void trimUTF16VectorFromIndex(std::vector<char16_t>& str, int index)
|
||||
{
|
||||
int size = static_cast<int>(str->size());
|
||||
int size = static_cast<int>(str.size());
|
||||
if (index >= size || index < 0)
|
||||
return;
|
||||
|
||||
str->erase(str->begin() + index, str->begin() + size);
|
||||
|
||||
str.erase(str.begin() + index, str.begin() + size);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -164,14 +73,14 @@ static void cc_utf8_trim_from(std::vector<unsigned short>* str, int index)
|
|||
*
|
||||
* Return value: weather the character is a whitespace character.
|
||||
* */
|
||||
bool isspace_unicode(unsigned short ch)
|
||||
bool isUnicodeSpace(char16_t ch)
|
||||
{
|
||||
return (ch >= 0x0009 && ch <= 0x000D) || ch == 0x0020 || ch == 0x0085 || ch == 0x00A0 || ch == 0x1680
|
||||
|| (ch >= 0x2000 && ch <= 0x200A) || ch == 0x2028 || ch == 0x2029 || ch == 0x202F
|
||||
|| ch == 0x205F || ch == 0x3000;
|
||||
}
|
||||
|
||||
bool iscjk_unicode(unsigned short ch)
|
||||
bool isCJKUnicode(char16_t ch)
|
||||
{
|
||||
return (ch >= 0x4E00 && ch <= 0x9FBF) // CJK Unified Ideographs
|
||||
|| (ch >= 0x2E80 && ch <= 0x2FDF) // CJK Radicals Supplement & Kangxi Radicals
|
||||
|
@ -183,136 +92,147 @@ bool iscjk_unicode(unsigned short ch)
|
|||
|| (ch >= 0x31C0 && ch <= 0x4DFF); // Other exiensions
|
||||
}
|
||||
|
||||
void cc_utf8_trim_ws(std::vector<unsigned short>* str)
|
||||
void trimUTF16Vector(std::vector<char16_t>& str)
|
||||
{
|
||||
int len = static_cast<int>(str->size());
|
||||
|
||||
int len = static_cast<int>(str.size());
|
||||
|
||||
if ( len <= 0 )
|
||||
return;
|
||||
|
||||
|
||||
int last_index = len - 1;
|
||||
|
||||
|
||||
// Only start trimming if the last character is whitespace..
|
||||
if (isspace_unicode((*str)[last_index]))
|
||||
if (isUnicodeSpace(str[last_index]))
|
||||
{
|
||||
for (int i = last_index - 1; i >= 0; --i)
|
||||
{
|
||||
if (isspace_unicode((*str)[i]))
|
||||
if (isUnicodeSpace(str[i]))
|
||||
last_index = i;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
cc_utf8_trim_from(str, last_index);
|
||||
|
||||
trimUTF16VectorFromIndex(str, last_index);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* cc_utf8_strlen:
|
||||
* @p: pointer to the start of a UTF-8 encoded string.
|
||||
* @max: the maximum number of bytes to examine. If @max
|
||||
* is less than 0, then the string is assumed to be
|
||||
* null-terminated. If @max is 0, @p will not be examined and
|
||||
* may be %nullptr.
|
||||
*
|
||||
* Returns the length of the string in characters.
|
||||
*
|
||||
* Return value: the length of the string in characters
|
||||
**/
|
||||
long
|
||||
cc_utf8_strlen (const char * p, int max)
|
||||
bool UTF8ToUTF16(const std::string& utf8, std::u16string& outUtf16)
|
||||
{
|
||||
long len = 0;
|
||||
const char *start = p;
|
||||
|
||||
if (!(p != nullptr || max == 0))
|
||||
if (utf8.empty())
|
||||
{
|
||||
return 0;
|
||||
outUtf16.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ret = false;
|
||||
|
||||
if (max < 0)
|
||||
const size_t utf16Bytes = (utf8.length()+1) * sizeof(char16_t);
|
||||
char16_t* utf16 = (char16_t*)malloc(utf16Bytes);
|
||||
memset(utf16, 0, utf16Bytes);
|
||||
|
||||
char* utf16ptr = reinterpret_cast<char*>(utf16);
|
||||
const UTF8* error = nullptr;
|
||||
|
||||
if (llvm::ConvertUTF8toWide(2, utf8, utf16ptr, error))
|
||||
{
|
||||
while (*p)
|
||||
{
|
||||
p = cc_utf8_next_char (p);
|
||||
++len;
|
||||
}
|
||||
outUtf16 = utf16;
|
||||
ret = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (max == 0 || !*p)
|
||||
return 0;
|
||||
|
||||
p = cc_utf8_next_char (p);
|
||||
|
||||
while (p - start < max && *p)
|
||||
{
|
||||
++len;
|
||||
p = cc_utf8_next_char (p);
|
||||
}
|
||||
|
||||
/* only do the last len increment if we got a complete
|
||||
* char (don't count partial chars)
|
||||
*/
|
||||
if (p - start == max)
|
||||
++len;
|
||||
}
|
||||
|
||||
return len;
|
||||
|
||||
free(utf16);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* g_utf8_get_char:
|
||||
* @p: a pointer to Unicode character encoded as UTF-8
|
||||
*
|
||||
* Converts a sequence of bytes encoded as UTF-8 to a Unicode character.
|
||||
* If @p does not point to a valid UTF-8 encoded character, results are
|
||||
* undefined. If you are not sure that the bytes are complete
|
||||
* valid Unicode characters, you should use g_utf8_get_char_validated()
|
||||
* instead.
|
||||
*
|
||||
* Return value: the resulting character
|
||||
**/
|
||||
static unsigned int
|
||||
cc_utf8_get_char (const char * p)
|
||||
bool UTF16ToUTF8(const std::u16string& utf16, std::string& outUtf8)
|
||||
{
|
||||
int i, mask = 0, len;
|
||||
unsigned int result;
|
||||
unsigned char c = (unsigned char) *p;
|
||||
|
||||
UTF8_COMPUTE (c, mask, len);
|
||||
if (len == -1)
|
||||
return (unsigned int) - 1;
|
||||
UTF8_GET (result, p, i, mask, len);
|
||||
|
||||
return result;
|
||||
if (utf16.empty())
|
||||
{
|
||||
outUtf8.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
return llvm::convertUTF16ToUTF8String(utf16, outUtf8);
|
||||
}
|
||||
|
||||
std::vector<char16_t> getChar16VectorFromUTF16String(const std::u16string& utf16)
|
||||
{
|
||||
std::vector<char16_t> ret;
|
||||
size_t len = utf16.length();
|
||||
ret.reserve(len);
|
||||
for (size_t i = 0; i < len; ++i)
|
||||
{
|
||||
ret.push_back(utf16[i]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
long getCharacterCountInUTF8String(const std::string& utf8)
|
||||
{
|
||||
return getUTF8StringLength((const UTF8*)utf8.c_str());
|
||||
}
|
||||
|
||||
} //namespace StringUtils {
|
||||
|
||||
|
||||
int cc_wcslen(const unsigned short* str)
|
||||
{
|
||||
if (str == nullptr)
|
||||
return -1;
|
||||
int i=0;
|
||||
while(*str++) i++;
|
||||
return i;
|
||||
}
|
||||
|
||||
void cc_utf8_trim_ws(std::vector<unsigned short>* str)
|
||||
{
|
||||
if (str == nullptr)
|
||||
return;
|
||||
// unsigned short and char16_t are both 2 bytes
|
||||
std::vector<char16_t>* ret = reinterpret_cast<std::vector<char16_t>*>(str);
|
||||
StringUtils::trimUTF16Vector(*ret);
|
||||
}
|
||||
|
||||
bool isspace_unicode(unsigned short ch)
|
||||
{
|
||||
return StringUtils::isUnicodeSpace(ch);
|
||||
}
|
||||
|
||||
|
||||
unsigned short* cc_utf8_to_utf16(const char* str_old, int length/* = -1 */, int* rUtf16Size/* = nullptr */)
|
||||
bool iscjk_unicode(unsigned short ch)
|
||||
{
|
||||
long len = cc_utf8_strlen(str_old, length);
|
||||
if (rUtf16Size != nullptr) {
|
||||
*rUtf16Size = static_cast<int>(len);
|
||||
}
|
||||
|
||||
unsigned short* str_new = new unsigned short[len + 1];
|
||||
str_new[len] = 0;
|
||||
|
||||
for (int i = 0; i < len; ++i)
|
||||
return StringUtils::isCJKUnicode(ch);
|
||||
}
|
||||
|
||||
|
||||
long cc_utf8_strlen (const char * p, int max)
|
||||
{
|
||||
CC_UNUSED_PARAM(max);
|
||||
if (p == nullptr)
|
||||
return -1;
|
||||
return StringUtils::getCharacterCountInUTF8String(p);
|
||||
}
|
||||
|
||||
unsigned int cc_utf8_find_last_not_char(const std::vector<unsigned short>& str, unsigned short c)
|
||||
{
|
||||
std::vector<char16_t> char16Vector;
|
||||
for (const auto& e : str)
|
||||
{
|
||||
str_new[i] = cc_utf8_get_char(str_old);
|
||||
str_old = cc_utf8_next_char(str_old);
|
||||
char16Vector.push_back(e);
|
||||
}
|
||||
|
||||
return str_new;
|
||||
return StringUtils::getIndexOfLastNotChar16(char16Vector, c);
|
||||
}
|
||||
|
||||
std::vector<unsigned short> cc_utf16_vec_from_utf16_str(const unsigned short* str)
|
||||
{
|
||||
int len = cc_wcslen(str);
|
||||
std::vector<unsigned short> str_new;
|
||||
|
||||
if (str == nullptr)
|
||||
return str_new;
|
||||
|
||||
int len = cc_wcslen(str);
|
||||
|
||||
for (int i = 0; i < len; ++i)
|
||||
{
|
||||
str_new.push_back(str[i]);
|
||||
|
@ -320,209 +240,60 @@ std::vector<unsigned short> cc_utf16_vec_from_utf16_str(const unsigned short* st
|
|||
return str_new;
|
||||
}
|
||||
|
||||
/**
|
||||
* cc_unichar_to_utf8:
|
||||
* @c: a ISO10646 character code
|
||||
* @outbuf: output buffer, must have at least 6 bytes of space.
|
||||
* If %nullptr, the length will be computed and returned
|
||||
* and nothing will be written to @outbuf.
|
||||
*
|
||||
* Converts a single character to UTF-8.
|
||||
*
|
||||
* Return value: number of bytes written
|
||||
**/
|
||||
int
|
||||
cc_unichar_to_utf8 (unsigned int c,
|
||||
char *outbuf)
|
||||
unsigned short* cc_utf8_to_utf16(const char* str_old, int length/* = -1*/, int* rUtf16Size/* = nullptr*/)
|
||||
{
|
||||
int len = 0;
|
||||
int first;
|
||||
int i;
|
||||
if (str_old == nullptr)
|
||||
return nullptr;
|
||||
|
||||
if (c < 0x80)
|
||||
{
|
||||
first = 0;
|
||||
len = 1;
|
||||
}
|
||||
else if (c < 0x800)
|
||||
{
|
||||
first = 0xc0;
|
||||
len = 2;
|
||||
}
|
||||
else if (c < 0x10000)
|
||||
{
|
||||
first = 0xe0;
|
||||
len = 3;
|
||||
}
|
||||
else if (c < 0x200000)
|
||||
{
|
||||
first = 0xf0;
|
||||
len = 4;
|
||||
}
|
||||
else if (c < 0x4000000)
|
||||
{
|
||||
first = 0xf8;
|
||||
len = 5;
|
||||
}
|
||||
else
|
||||
{
|
||||
first = 0xfc;
|
||||
len = 6;
|
||||
}
|
||||
unsigned short* ret = nullptr;
|
||||
|
||||
if (outbuf)
|
||||
std::u16string outUtf16;
|
||||
bool succeed = StringUtils::UTF8ToUTF16(str_old, outUtf16);
|
||||
|
||||
if (succeed)
|
||||
{
|
||||
for (i = len - 1; i > 0; --i)
|
||||
ret = new unsigned short[outUtf16.length() + 1];
|
||||
ret[outUtf16.length()] = 0;
|
||||
memcpy(ret, outUtf16.data(), outUtf16.length() * sizeof(unsigned short));
|
||||
if (rUtf16Size)
|
||||
{
|
||||
outbuf[i] = (c & 0x3f) | 0x80;
|
||||
c >>= 6;
|
||||
*rUtf16Size = static_cast<int>(outUtf16.length());
|
||||
}
|
||||
outbuf[0] = c | first;
|
||||
}
|
||||
|
||||
return len;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define SURROGATE_VALUE(h,l) (((h) - 0xd800) * 0x400 + (l) - 0xdc00 + 0x10000)
|
||||
|
||||
/**
|
||||
* cc_utf16_to_utf8:
|
||||
* @str: a UTF-16 encoded string
|
||||
* @len: the maximum length of @str to use. If @len < 0, then
|
||||
* the string is terminated with a 0 character.
|
||||
* @items_read: location to store number of words read, or %nullptr.
|
||||
* If %nullptr, then %G_CONVERT_ERROR_PARTIAL_INPUT will be
|
||||
* returned in case @str contains a trailing partial
|
||||
* character. If an error occurs then the index of the
|
||||
* invalid input is stored here.
|
||||
* @items_written: location to store number of bytes written, or %nullptr.
|
||||
* The value stored here does not include the trailing
|
||||
* 0 byte.
|
||||
* @error: location to store the error occuring, or %nullptr to ignore
|
||||
* errors. Any of the errors in #GConvertError other than
|
||||
* %G_CONVERT_ERROR_NO_CONVERSION may occur.
|
||||
*
|
||||
* Convert a string from UTF-16 to UTF-8. The result will be
|
||||
* terminated with a 0 byte.
|
||||
*
|
||||
* Return value: a pointer to a newly allocated UTF-8 string.
|
||||
* This value must be freed with free(). If an
|
||||
* error occurs, %nullptr will be returned and
|
||||
* @error set.
|
||||
**/
|
||||
char *
|
||||
cc_utf16_to_utf8 (const unsigned short *str,
|
||||
int len,
|
||||
long *items_read,
|
||||
long *items_written)
|
||||
char * cc_utf16_to_utf8 (const unsigned short *str,
|
||||
int len,
|
||||
long *items_read,
|
||||
long *items_written)
|
||||
{
|
||||
/* This function and g_utf16_to_ucs4 are almost exactly identical - The lines that differ
|
||||
* are marked.
|
||||
*/
|
||||
const unsigned short *in;
|
||||
char *out;
|
||||
char *result = nullptr;
|
||||
int n_bytes;
|
||||
unsigned int high_surrogate;
|
||||
if (str == nullptr)
|
||||
return nullptr;
|
||||
|
||||
if (str == 0) return nullptr;
|
||||
|
||||
n_bytes = 0;
|
||||
in = str;
|
||||
high_surrogate = 0;
|
||||
while ((len < 0 || in - str < len) && *in)
|
||||
std::u16string utf16;
|
||||
int utf16Len = len < 0 ? cc_wcslen(str) : len;
|
||||
|
||||
for (int i = 0; i < utf16Len; ++i)
|
||||
{
|
||||
unsigned short c = *in;
|
||||
unsigned int wc;
|
||||
|
||||
if (c >= 0xdc00 && c < 0xe000) /* low surrogate */
|
||||
{
|
||||
if (high_surrogate)
|
||||
{
|
||||
wc = SURROGATE_VALUE (high_surrogate, c);
|
||||
high_surrogate = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
CCLOGERROR("Invalid sequence in conversion input");
|
||||
goto err_out;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (high_surrogate)
|
||||
{
|
||||
CCLOGERROR("Invalid sequence in conversion input");
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
if (c >= 0xd800 && c < 0xdc00) /* high surrogate */
|
||||
{
|
||||
high_surrogate = c;
|
||||
goto next1;
|
||||
}
|
||||
else
|
||||
wc = c;
|
||||
}
|
||||
|
||||
/********** DIFFERENT for UTF8/UCS4 **********/
|
||||
n_bytes += UTF8_LENGTH (wc);
|
||||
|
||||
next1:
|
||||
in++;
|
||||
utf16.push_back(str[i]);
|
||||
}
|
||||
|
||||
if (high_surrogate && !items_read)
|
||||
{
|
||||
CCLOGERROR("Partial character sequence at end of input");
|
||||
goto err_out;
|
||||
}
|
||||
char* ret = nullptr;
|
||||
std::string outUtf8;
|
||||
bool succeed = StringUtils::UTF16ToUTF8(utf16, outUtf8);
|
||||
|
||||
/* At this point, everything is valid, and we just need to convert
|
||||
*/
|
||||
/********** DIFFERENT for UTF8/UCS4 **********/
|
||||
result = new char[n_bytes + 1];
|
||||
|
||||
high_surrogate = 0;
|
||||
out = result;
|
||||
in = str;
|
||||
while (out < result + n_bytes)
|
||||
if (succeed)
|
||||
{
|
||||
unsigned short c = *in;
|
||||
unsigned int wc;
|
||||
|
||||
if (c >= 0xdc00 && c < 0xe000) /* low surrogate */
|
||||
{
|
||||
wc = SURROGATE_VALUE (high_surrogate, c);
|
||||
high_surrogate = 0;
|
||||
}
|
||||
else if (c >= 0xd800 && c < 0xdc00) /* high surrogate */
|
||||
{
|
||||
high_surrogate = c;
|
||||
goto next2;
|
||||
}
|
||||
else
|
||||
wc = c;
|
||||
|
||||
/********** DIFFERENT for UTF8/UCS4 **********/
|
||||
out += cc_unichar_to_utf8 (wc, out);
|
||||
|
||||
next2:
|
||||
in++;
|
||||
ret = new char[outUtf8.length() + 1];
|
||||
ret[outUtf8.length()] = '\0';
|
||||
memcpy(ret, outUtf8.data(), outUtf8.length());
|
||||
}
|
||||
|
||||
/********** DIFFERENT for UTF8/UCS4 **********/
|
||||
*out = '\0';
|
||||
|
||||
if (items_written)
|
||||
/********** DIFFERENT for UTF8/UCS4 **********/
|
||||
*items_written = out - result;
|
||||
|
||||
err_out:
|
||||
if (items_read)
|
||||
*items_read = in - str;
|
||||
|
||||
return result;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
NS_CC_END
|
||||
|
|
|
@ -1,113 +1,217 @@
|
|||
/*
|
||||
* Copyright (C) 1999 Tom Tromey
|
||||
* Copyright (C) 2000 Red Hat, Inc.
|
||||
* Copyright (c) 2013-2014 Chukong Technologies Inc.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
/****************************************************************************
|
||||
Copyright (c) 2014 cocos2d-x.org
|
||||
Copyright (c) 2014 Chukong Technologies Inc.
|
||||
|
||||
http://www.cocos2d-x.org
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef __cocos2dx__ccUTF8__
|
||||
#define __cocos2dx__ccUTF8__
|
||||
|
||||
#include "base/CCPlatformMacros.h"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
NS_CC_BEGIN
|
||||
|
||||
CC_DLL int cc_wcslen(const unsigned short* str);
|
||||
namespace StringUtils {
|
||||
|
||||
CC_DLL void cc_utf8_trim_ws(std::vector<unsigned short>* str);
|
||||
/**
|
||||
* @brief Converts utf8 string to utf16 string
|
||||
* @param utf8 The utf8 string to be converted
|
||||
* @param outUtf16 The output utf16 string
|
||||
* @return true if succeed, otherwise false
|
||||
* @note Please check the return value before using \p outUtf16
|
||||
* e.g.
|
||||
* @code
|
||||
* std::u16string utf16;
|
||||
* bool ret = StringUtils::UTF8ToUTF16("你好hello", utf16);
|
||||
* if (ret) {
|
||||
* do_some_thing_with_utf16(utf16);
|
||||
* }
|
||||
* @endcode
|
||||
*/
|
||||
CC_DLL bool UTF8ToUTF16(const std::string& utf8, std::u16string& outUtf16);
|
||||
|
||||
/**
|
||||
* @brief Converts utf16 string to utf8 string
|
||||
* @param utf16 The utf16 string to be converted
|
||||
* @param outUtf8 The output utf8 string
|
||||
* @return true if succeed, otherwise false
|
||||
* @note Please check the return value before using \p outUtf8
|
||||
* e.g.
|
||||
* @code
|
||||
* std::string utf8;
|
||||
* bool ret = StringUtils::UTF16ToUTF8(u"\u4f60\u597d", utf16);
|
||||
* if (ret) {
|
||||
* do_some_thing_with_utf8(utf8);
|
||||
* }
|
||||
* @endcode
|
||||
*/
|
||||
CC_DLL bool UTF16ToUTF8(const std::u16string& utf16, std::string& outUtf8);
|
||||
|
||||
/**
|
||||
* @brief Trims the unicode spaces at the end of char16_t vector
|
||||
*/
|
||||
CC_DLL void trimUTF16Vector(std::vector<char16_t>& str);
|
||||
|
||||
/**
|
||||
* @brief Whether the character is a whitespace character.
|
||||
*
|
||||
* @param ch the unicode character
|
||||
* @returns whether the character is a white space character.
|
||||
*
|
||||
* @see http://en.wikipedia.org/wiki/Whitespace_character#Unicode
|
||||
*
|
||||
*/
|
||||
CC_DLL bool isUnicodeSpace(char16_t ch);
|
||||
|
||||
/**
|
||||
* @brief Whether the character is a Chinese/Japanese/Korean character.
|
||||
*
|
||||
* @param ch the unicode character
|
||||
* @returns whether the character is a Chinese character.
|
||||
*
|
||||
* @see http://www.searchtb.com/2012/04/chinese_encode.html
|
||||
* @see http://tieba.baidu.com/p/748765987
|
||||
*
|
||||
*/
|
||||
CC_DLL bool isCJKUnicode(char16_t ch);
|
||||
|
||||
/**
|
||||
* @brief Returns the length of the string in characters.
|
||||
*
|
||||
* @param utf8 an UTF-8 encoded string.
|
||||
* @returns the length of the string in characters
|
||||
*/
|
||||
CC_DLL long getCharacterCountInUTF8String(const std::string& utf8);
|
||||
|
||||
/**
|
||||
* @brief Gets the index of the last character that is not equal to the character given.
|
||||
*
|
||||
* @param str the string to be searched.
|
||||
* @param c the character to be searched for.
|
||||
*
|
||||
* @returns the index of the last character that is not \p c.
|
||||
*
|
||||
*/
|
||||
CC_DLL unsigned int getIndexOfLastNotChar16(const std::vector<char16_t>& str, char16_t c);
|
||||
|
||||
/**
|
||||
* @brief Gets char16_t vector from a given utf16 string
|
||||
*/
|
||||
CC_DLL std::vector<char16_t> getChar16VectorFromUTF16String(const std::u16string& utf16);
|
||||
|
||||
} // namespace StringUtils {
|
||||
|
||||
/**
|
||||
* Returns the character count in UTF16 string
|
||||
* @param str pointer to the start of a UTF-16 encoded string. It must be an NULL terminal UTF8 string.
|
||||
* @deprecated Please use c++11 `std::u16string::length` instead, don't use `unsigned short*` directly
|
||||
*/
|
||||
CC_DEPRECATED_ATTRIBUTE CC_DLL int cc_wcslen(const unsigned short* str);
|
||||
|
||||
/** Trims the space characters at the end of UTF8 string
|
||||
* @deprecated Please use `StringUtils::trimUTF16Vector` instead
|
||||
*/
|
||||
|
||||
CC_DEPRECATED_ATTRIBUTE void cc_utf8_trim_ws(std::vector<unsigned short>* str);
|
||||
|
||||
/**
|
||||
* Whether the character is a whitespace character.
|
||||
*
|
||||
* @param ch the unicode character
|
||||
* @returns whether the character is a white space character.
|
||||
* @deprecated Please use `StringUtils::isUnicodeSpace` instead
|
||||
*
|
||||
* @see http://en.wikipedia.org/wiki/Whitespace_character#Unicode
|
||||
* */
|
||||
CC_DLL bool isspace_unicode(unsigned short ch);
|
||||
CC_DEPRECATED_ATTRIBUTE bool isspace_unicode(unsigned short ch);
|
||||
|
||||
/**
|
||||
* Whether the character is a Chinese/Japanese/Korean character.
|
||||
*
|
||||
* @param ch the unicode character
|
||||
* @returns whether the character is a Chinese character.
|
||||
* @deprecated Please use `StringUtils::isCJKUnicode` instead
|
||||
*
|
||||
* @see http://www.searchtb.com/2012/04/chinese_encode.html
|
||||
* @see http://tieba.baidu.com/p/748765987
|
||||
* */
|
||||
CC_DLL bool iscjk_unicode(unsigned short ch);
|
||||
CC_DEPRECATED_ATTRIBUTE bool iscjk_unicode(unsigned short ch);
|
||||
|
||||
/**
|
||||
* Returns the length of the string in characters.
|
||||
*
|
||||
* @param p pointer to the start of a UTF-8 encoded string.
|
||||
* @param max the maximum number of bytes to examine. If \p max is less than
|
||||
* 0, then the string is assumed to be null-terminated. If \p max
|
||||
* is 0, \p p will not be examined and my be %nullptr.
|
||||
*
|
||||
* @param p pointer to the start of a UTF-8 encoded string. It must be an NULL terminal UTF8 string.
|
||||
* @param max Not used from 3.1, just keep it for backward compatibility
|
||||
* @deprecated Please use `StringUtils::getCharacterCountInUTF8String` instead
|
||||
* @returns the length of the string in characters
|
||||
**/
|
||||
CC_DLL long
|
||||
cc_utf8_strlen (const char * p, int max);
|
||||
CC_DEPRECATED_ATTRIBUTE long cc_utf8_strlen (const char * p, int max = -1);
|
||||
|
||||
/**
|
||||
* Find the last character that is not equal to the character given.
|
||||
*
|
||||
* @param str the string to be searched.
|
||||
* @param c the character to be searched for.
|
||||
*
|
||||
* @deprecated Please use `StringUtils::getIndexOfLastNotChar16` instead
|
||||
* @returns the index of the last character that is not \p c.
|
||||
* */
|
||||
CC_DLL unsigned int cc_utf8_find_last_not_char(std::vector<unsigned short> str, unsigned short c);
|
||||
|
||||
CC_DLL std::vector<unsigned short> cc_utf16_vec_from_utf16_str(const unsigned short* str);
|
||||
CC_DEPRECATED_ATTRIBUTE unsigned int cc_utf8_find_last_not_char(const std::vector<unsigned short>& str, unsigned short c);
|
||||
|
||||
/**
|
||||
* Creates a utf8 string from a cstring.
|
||||
* @brief Gets `unsigned short` vector from a given utf16 string
|
||||
* @deprecated Please use `StringUtils::getChar16VectorFromUTF16String` instead
|
||||
*/
|
||||
CC_DEPRECATED_ATTRIBUTE std::vector<unsigned short> cc_utf16_vec_from_utf16_str(const unsigned short* str);
|
||||
|
||||
/**
|
||||
* Creates an utf8 string from a c string. The result will be null terminated.
|
||||
*
|
||||
* @param str_old pointer to the start of a C string.
|
||||
*
|
||||
* @returns the newly created utf8 string.
|
||||
* @param str_old pointer to the start of a C string. It must be an NULL terminal UTF8 string.
|
||||
* @param length not used from 3.1, keep it just for backward compatibility
|
||||
* @param rUtf16Size The character count in the return UTF16 string.
|
||||
* @deprecated Please use `StringUtils::UTF8ToUTF16` instead
|
||||
* @returns the newly created utf16 string, it must be released with `delete[]`,
|
||||
* If an error occurs, %NULL will be returned.
|
||||
* */
|
||||
CC_DLL unsigned short* cc_utf8_to_utf16(const char* str_old, int length = -1, int* rUtf16Size = nullptr);
|
||||
CC_DEPRECATED_ATTRIBUTE unsigned short* cc_utf8_to_utf16(const char* str_old, int length = -1, int* rUtf16Size = nullptr);
|
||||
|
||||
/**
|
||||
* Convert a string from UTF-16 to UTF-8. The result will be null terminated.
|
||||
*
|
||||
* @param str a UTF-16 encoded string
|
||||
* @param len the maximum length of \p str to use. If \p len < 0, then the
|
||||
* string is null terminated.
|
||||
* @param items_read location to store number of words read, or %nullptr.
|
||||
* If %nullptr, then %G_CONVERT_ERROR_PARTIAL_INPUT will be
|
||||
* returned in case \p str contains a trailing partial
|
||||
* character. If an error occurs then the index of the
|
||||
* invalid input is stored here.
|
||||
* @param items_written location to store number of bytes written, or %nullptr.
|
||||
* The value stored here does not include the trailing null.
|
||||
* Converts a string from UTF-16 to UTF-8. The result will be null terminated.
|
||||
*
|
||||
* @param utf16 an UTF-16 encoded string, It must be an NULL terminal UTF16 string.
|
||||
* @param len not used from 3.1, keep it just for backward compatibility
|
||||
* @param items_read not used from 3.1, keep it just for backward compatibility
|
||||
* @param items_written not used from 3.1, keep it just for backward compatibility
|
||||
* @deprecated Please use `StringUtils::UTF16ToUTF8` instead
|
||||
* @returns a pointer to a newly allocated UTF-8 string. This value must be
|
||||
* freed with free(). If an error occurs, %nullptr will be returned.
|
||||
* released with `delete[]`. If an error occurs, %NULL will be returned.
|
||||
**/
|
||||
CC_DLL char *
|
||||
cc_utf16_to_utf8 (const unsigned short *str,
|
||||
int len,
|
||||
long *items_read,
|
||||
long *items_written);
|
||||
CC_DEPRECATED_ATTRIBUTE char * cc_utf16_to_utf8 (const unsigned short *str,
|
||||
int len = -1,
|
||||
long *items_read = nullptr,
|
||||
long *items_written = nullptr);
|
||||
|
||||
|
||||
NS_CC_END
|
||||
|
||||
|
|
|
@ -23,8 +23,8 @@
|
|||
<PlatformToolset Condition="'$(VisualStudioVersion)' == '10.0'">v100</PlatformToolset>
|
||||
<PlatformToolset Condition="'$(VisualStudioVersion)' == '11.0'">v110</PlatformToolset>
|
||||
<PlatformToolset Condition="'$(VisualStudioVersion)' == '11.0' and exists('$(MSBuildProgramFiles32)\Microsoft SDKs\Windows\v7.1A')">v110_xp</PlatformToolset>
|
||||
<PlatformToolset Condition="'$(VisualStudioVersion)' == '12.0'">v120</PlatformToolset>
|
||||
<PlatformToolset Condition="'$(VisualStudioVersion)' == '12.0' and exists('$(MSBuildProgramFiles32)\Microsoft SDKs\Windows\v7.1A')">v120_xp</PlatformToolset>
|
||||
<PlatformToolset Condition="'$(VisualStudioVersion)' == '12.0'">v120</PlatformToolset>
|
||||
<PlatformToolset Condition="'$(VisualStudioVersion)' == '12.0' and exists('$(MSBuildProgramFiles32)\Microsoft SDKs\Windows\v7.1A')">v120_xp</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
|
@ -32,8 +32,8 @@
|
|||
<PlatformToolset Condition="'$(VisualStudioVersion)' == '10.0'">v100</PlatformToolset>
|
||||
<PlatformToolset Condition="'$(VisualStudioVersion)' == '11.0'">v110</PlatformToolset>
|
||||
<PlatformToolset Condition="'$(VisualStudioVersion)' == '11.0' and exists('$(MSBuildProgramFiles32)\Microsoft SDKs\Windows\v7.1A')">v110_xp</PlatformToolset>
|
||||
<PlatformToolset Condition="'$(VisualStudioVersion)' == '12.0'">v120</PlatformToolset>
|
||||
<PlatformToolset Condition="'$(VisualStudioVersion)' == '12.0' and exists('$(MSBuildProgramFiles32)\Microsoft SDKs\Windows\v7.1A')">v120_xp</PlatformToolset>
|
||||
<PlatformToolset Condition="'$(VisualStudioVersion)' == '12.0'">v120</PlatformToolset>
|
||||
<PlatformToolset Condition="'$(VisualStudioVersion)' == '12.0' and exists('$(MSBuildProgramFiles32)\Microsoft SDKs\Windows\v7.1A')">v120_xp</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
|
@ -77,7 +77,7 @@
|
|||
</PreBuildEvent>
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>$(MSBuildProgramFiles32)\Microsoft SDKs\Windows\v7.1A\include;$(EngineRoot)external\sqlite3\include;$(EngineRoot)external\unzip;$(EngineRoot)external\edtaa3func;$(EngineRoot)external\tinyxml2;$(EngineRoot)external\png\include\win32;$(EngineRoot)external\jpeg\include\win32;$(EngineRoot)external\tiff\include\win32;$(EngineRoot)external\webp\include\win32;$(EngineRoot)external\freetype2\include\win32;$(EngineRoot)external\win32-specific\icon\include;$(EngineRoot)external\win32-specific\zlib\include;$(EngineRoot)external\chipmunk\include\chipmunk;$(EngineRoot)external\xxhash;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>$(MSBuildProgramFiles32)\Microsoft SDKs\Windows\v7.1A\include;$(EngineRoot)external\sqlite3\include;$(EngineRoot)external\unzip;$(EngineRoot)external\edtaa3func;$(EngineRoot)external\tinyxml2;$(EngineRoot)external\png\include\win32;$(EngineRoot)external\jpeg\include\win32;$(EngineRoot)external\tiff\include\win32;$(EngineRoot)external\webp\include\win32;$(EngineRoot)external\freetype2\include\win32;$(EngineRoot)external\win32-specific\icon\include;$(EngineRoot)external\win32-specific\zlib\include;$(EngineRoot)external\chipmunk\include\chipmunk;$(EngineRoot)external\xxhash;$(EngineRoot)external\ConvertUTF;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_LIB;COCOS2DXWIN32_EXPORTS;GL_GLEXT_PROTOTYPES;COCOS2D_DEBUG=1;_CRT_SECURE_NO_WARNINGS;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>false</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
|
@ -125,7 +125,7 @@ xcopy /Y /Q "$(ProjectDir)..\..\external\win32-specific\gles\prebuilt\*.*" "$(Ou
|
|||
</Command>
|
||||
</PreBuildEvent>
|
||||
<ClCompile>
|
||||
<AdditionalIncludeDirectories>$(MSBuildProgramFiles32)\Microsoft SDKs\Windows\v7.1A\include;$(EngineRoot)external\sqlite3\include;$(EngineRoot)external\unzip;$(EngineRoot)external\edtaa3func;$(EngineRoot)external\tinyxml2;$(EngineRoot)external\png\include\win32;$(EngineRoot)external\jpeg\include\win32;$(EngineRoot)external\tiff\include\win32;$(EngineRoot)external\webp\include\win32;$(EngineRoot)external\freetype2\include\win32;$(EngineRoot)external\win32-specific\icon\include;$(EngineRoot)external\win32-specific\zlib\include;$(EngineRoot)external\chipmunk\include\chipmunk;$(EngineRoot)external\xxhash;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>$(MSBuildProgramFiles32)\Microsoft SDKs\Windows\v7.1A\include;$(EngineRoot)external\sqlite3\include;$(EngineRoot)external\unzip;$(EngineRoot)external\edtaa3func;$(EngineRoot)external\tinyxml2;$(EngineRoot)external\png\include\win32;$(EngineRoot)external\jpeg\include\win32;$(EngineRoot)external\tiff\include\win32;$(EngineRoot)external\webp\include\win32;$(EngineRoot)external\freetype2\include\win32;$(EngineRoot)external\win32-specific\icon\include;$(EngineRoot)external\win32-specific\zlib\include;$(EngineRoot)external\chipmunk\include\chipmunk;$(EngineRoot)external\xxhash;$(EngineRoot)external\ConvertUTF;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_LIB;COCOS2DXWIN32_EXPORTS;GL_GLEXT_PROTOTYPES;_CRT_SECURE_NO_WARNINGS;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||
<PrecompiledHeader>
|
||||
|
@ -168,6 +168,8 @@ xcopy /Y /Q "$(ProjectDir)..\..\external\win32-specific\gles\prebuilt\*.*" "$(Ou
|
|||
</PostBuildEvent>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\external\ConvertUTF\ConvertUTF.c" />
|
||||
<ClCompile Include="..\..\external\ConvertUTF\ConvertUTFWrapper.cpp" />
|
||||
<ClCompile Include="..\..\external\edtaa3func\edtaa3func.cpp" />
|
||||
<ClCompile Include="..\..\external\tinyxml2\tinyxml2.cpp" />
|
||||
<ClCompile Include="..\..\external\unzip\ioapi.cpp" />
|
||||
|
@ -331,6 +333,7 @@ xcopy /Y /Q "$(ProjectDir)..\..\external\win32-specific\gles\prebuilt\*.*" "$(Ou
|
|||
<ClCompile Include="TGAlib.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\..\external\ConvertUTF\ConvertUTF.h" />
|
||||
<ClInclude Include="..\..\external\edtaa3func\edtaa3func.h" />
|
||||
<ClInclude Include="..\..\external\tinyxml2\tinyxml2.h" />
|
||||
<ClInclude Include="..\..\external\unzip\ioapi.h" />
|
||||
|
|
|
@ -100,8 +100,8 @@
|
|||
<Filter Include="math">
|
||||
<UniqueIdentifier>{02a21a86-8f65-441b-ae13-11dec1c45ee5}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="renderer\shaders">
|
||||
<UniqueIdentifier>{438e71df-3684-4619-9659-10e07ed6cd62}</UniqueIdentifier>
|
||||
<Filter Include="ConvertUTF">
|
||||
<UniqueIdentifier>{6c1e4a6b-c168-436b-aa63-0af7f4caebf9}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
|
@ -406,7 +406,6 @@
|
|||
<ClCompile Include="platform\CCImage.cpp">
|
||||
<Filter>platform</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="CCTweenFunction.cpp" />
|
||||
<ClCompile Include="..\..\external\xxhash\xxhash.c">
|
||||
<Filter>xxhash</Filter>
|
||||
</ClCompile>
|
||||
|
@ -582,6 +581,15 @@
|
|||
<ClCompile Include="..\renderer\CCGLProgramCache.cpp">
|
||||
<Filter>renderer</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\external\ConvertUTF\ConvertUTF.c">
|
||||
<Filter>ConvertUTF</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\external\ConvertUTF\ConvertUTFWrapper.cpp">
|
||||
<Filter>ConvertUTF</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="CCTweenFunction.cpp">
|
||||
<Filter>actions</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\physics\CCPhysicsBody.h">
|
||||
|
@ -915,7 +923,6 @@
|
|||
<ClInclude Include="platform\desktop\CCGLView.h">
|
||||
<Filter>platform\desktop</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="CCTweenFunction.h" />
|
||||
<ClInclude Include="..\..\external\xxhash\xxhash.h">
|
||||
<Filter>xxhash</Filter>
|
||||
</ClInclude>
|
||||
|
@ -1130,6 +1137,12 @@
|
|||
<ClInclude Include="..\renderer\CCGLProgramCache.h">
|
||||
<Filter>renderer</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\external\ConvertUTF\ConvertUTF.h">
|
||||
<Filter>ConvertUTF</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="CCTweenFunction.h">
|
||||
<Filter>actions</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Text Include="..\math\CMakeLists.txt">
|
||||
|
|
|
@ -119,4 +119,4 @@ bool FileUtilsLinux::isFileExistInternal(const std::string& strFilePath) const
|
|||
|
||||
NS_CC_END
|
||||
|
||||
#endif CC_TARGET_PLATFORM == CC_PLATFORM_LINUX
|
||||
#endif // CC_TARGET_PLATFORM == CC_PLATFORM_LINUX
|
||||
|
|
|
@ -159,6 +159,8 @@ physics/chipmunk/CCPhysicsContactInfo_chipmunk.cpp \
|
|||
physics/chipmunk/CCPhysicsJointInfo_chipmunk.cpp \
|
||||
physics/chipmunk/CCPhysicsShapeInfo_chipmunk.cpp \
|
||||
physics/chipmunk/CCPhysicsWorldInfo_chipmunk.cpp \
|
||||
../external/ConvertUTF/ConvertUTFWrapper.cpp \
|
||||
../external/ConvertUTF/ConvertUTF.c \
|
||||
../external/tinyxml2/tinyxml2.cpp \
|
||||
../external/unzip/ioapi.cpp \
|
||||
../external/unzip/unzip.cpp \
|
||||
|
@ -182,7 +184,8 @@ LOCAL_C_INCLUDES := $(LOCAL_PATH) \
|
|||
$(LOCAL_PATH)/../external/unzip \
|
||||
$(LOCAL_PATH)/../external/chipmunk/include/chipmunk \
|
||||
$(LOCAL_PATH)/../external/edtaa3func \
|
||||
$(LOCAL_PATH)/../external/xxhash
|
||||
$(LOCAL_PATH)/../external/xxhash \
|
||||
$(LOCAL_PATH)/../external/ConvertUTF
|
||||
|
||||
|
||||
LOCAL_LDLIBS := -lGLESv2 \
|
||||
|
|
|
@ -272,4 +272,29 @@ __String* __String::clone() const
|
|||
return __String::create(_string);
|
||||
}
|
||||
|
||||
namespace StringUtils {
|
||||
|
||||
std::string format(const char* format, ...)
|
||||
{
|
||||
#define CC_MAX_STRING_LENGTH (1024*100)
|
||||
|
||||
std::string ret;
|
||||
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
|
||||
char* buf = (char*)malloc(CC_MAX_STRING_LENGTH);
|
||||
if (buf != nullptr)
|
||||
{
|
||||
vsnprintf(buf, CC_MAX_STRING_LENGTH, format, ap);
|
||||
ret = buf;
|
||||
free(buf);
|
||||
}
|
||||
va_end(ap);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // namespace StringUtils {
|
||||
|
||||
NS_CC_END
|
||||
|
|
|
@ -205,40 +205,19 @@ struct StringCompare : public std::binary_function<__String *, __String *, bool>
|
|||
#define StringMake(str) String::create(str)
|
||||
#define ccs StringMake
|
||||
|
||||
class StringUtils
|
||||
namespace StringUtils {
|
||||
|
||||
template<typename T>
|
||||
std::string toString(T arg)
|
||||
{
|
||||
public:
|
||||
std::stringstream ss;
|
||||
ss << arg;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string format(const char* format, ...) CC_FORMAT_PRINTF(1, 2);
|
||||
|
||||
template<typename T>
|
||||
static std::string toString(T arg)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << arg;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
static std::string format(const char* format, ...) CC_FORMAT_PRINTF(1, 2)
|
||||
{
|
||||
#define CC_MAX_STRING_LENGTH (1024*100)
|
||||
|
||||
std::string ret;
|
||||
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
|
||||
char* buf = (char*)malloc(CC_MAX_STRING_LENGTH);
|
||||
if (buf != nullptr)
|
||||
{
|
||||
vsnprintf(buf, CC_MAX_STRING_LENGTH, format, ap);
|
||||
ret = buf;
|
||||
free(buf);
|
||||
}
|
||||
va_end(ap);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
};
|
||||
} // namespace StringUtils {
|
||||
|
||||
// end of data_structure group
|
||||
/// @}
|
||||
|
|
|
@ -0,0 +1,589 @@
|
|||
/*===--- ConvertUTF.c - Universal Character Names conversions ---------------===
|
||||
*
|
||||
* The LLVM Compiler Infrastructure
|
||||
*
|
||||
* This file is distributed under the University of Illinois Open Source
|
||||
* License. See LICENSE.TXT for details.
|
||||
*
|
||||
*===------------------------------------------------------------------------=*/
|
||||
/*
|
||||
* Copyright 2001-2004 Unicode, Inc.
|
||||
*
|
||||
* Disclaimer
|
||||
*
|
||||
* This source code is provided as is by Unicode, Inc. No claims are
|
||||
* made as to fitness for any particular purpose. No warranties of any
|
||||
* kind are expressed or implied. The recipient agrees to determine
|
||||
* applicability of information provided. If this file has been
|
||||
* purchased on magnetic or optical media from Unicode, Inc., the
|
||||
* sole remedy for any claim will be exchange of defective media
|
||||
* within 90 days of receipt.
|
||||
*
|
||||
* Limitations on Rights to Redistribute This Code
|
||||
*
|
||||
* Unicode, Inc. hereby grants the right to freely use the information
|
||||
* supplied in this file in the creation of products supporting the
|
||||
* Unicode Standard, and to make copies of this file in any form
|
||||
* for internal or external distribution as long as this notice
|
||||
* remains attached.
|
||||
*/
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
|
||||
Conversions between UTF32, UTF-16, and UTF-8. Source code file.
|
||||
Author: Mark E. Davis, 1994.
|
||||
Rev History: Rick McGowan, fixes & updates May 2001.
|
||||
Sept 2001: fixed const & error conditions per
|
||||
mods suggested by S. Parent & A. Lillich.
|
||||
June 2002: Tim Dodd added detection and handling of incomplete
|
||||
source sequences, enhanced error detection, added casts
|
||||
to eliminate compiler warnings.
|
||||
July 2003: slight mods to back out aggressive FFFE detection.
|
||||
Jan 2004: updated switches in from-UTF8 conversions.
|
||||
Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.
|
||||
|
||||
See the header file "ConvertUTF.h" for complete documentation.
|
||||
|
||||
------------------------------------------------------------------------ */
|
||||
|
||||
|
||||
#include "ConvertUTF.h"
|
||||
#ifdef CVTUTF_DEBUG
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#include <string.h>
|
||||
|
||||
static const int halfShift = 10; /* used for shifting by 10 bits */
|
||||
|
||||
static const UTF32 halfBase = 0x0010000UL;
|
||||
static const UTF32 halfMask = 0x3FFUL;
|
||||
|
||||
#define UNI_SUR_HIGH_START (UTF32)0xD800
|
||||
#define UNI_SUR_HIGH_END (UTF32)0xDBFF
|
||||
#define UNI_SUR_LOW_START (UTF32)0xDC00
|
||||
#define UNI_SUR_LOW_END (UTF32)0xDFFF
|
||||
#define false 0
|
||||
#define true 1
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Index into the table below with the first byte of a UTF-8 sequence to
|
||||
* get the number of trailing bytes that are supposed to follow it.
|
||||
* Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
|
||||
* left as-is for anyone who may want to do such conversion, which was
|
||||
* allowed in earlier algorithms.
|
||||
*/
|
||||
static const char trailingBytesForUTF8[256] = {
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
|
||||
};
|
||||
|
||||
/*
|
||||
* Magic values subtracted from a buffer value during UTF8 conversion.
|
||||
* This table contains as many values as there might be trailing bytes
|
||||
* in a UTF-8 sequence.
|
||||
*/
|
||||
static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
|
||||
0x03C82080UL, 0xFA082080UL, 0x82082080UL };
|
||||
|
||||
/*
|
||||
* Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
|
||||
* into the first byte, depending on how many bytes follow. There are
|
||||
* as many entries in this table as there are UTF-8 sequence types.
|
||||
* (I.e., one byte sequence, two byte... etc.). Remember that sequencs
|
||||
* for *legal* UTF-8 will be 4 or fewer bytes total.
|
||||
*/
|
||||
static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/* The interface converts a whole buffer to avoid function-call overhead.
|
||||
* Constants have been gathered. Loops & conditionals have been removed as
|
||||
* much as possible for efficiency, in favor of drop-through switches.
|
||||
* (See "Note A" at the bottom of the file for equivalent code.)
|
||||
* If your compiler supports it, the "isLegalUTF8" call can be turned
|
||||
* into an inline function.
|
||||
*/
|
||||
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF32toUTF16 (
|
||||
const UTF32** sourceStart, const UTF32* sourceEnd,
|
||||
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
|
||||
ConversionResult result = conversionOK;
|
||||
const UTF32* source = *sourceStart;
|
||||
UTF16* target = *targetStart;
|
||||
while (source < sourceEnd) {
|
||||
UTF32 ch;
|
||||
if (target >= targetEnd) {
|
||||
result = targetExhausted; break;
|
||||
}
|
||||
ch = *source++;
|
||||
if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
|
||||
/* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
|
||||
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
|
||||
if (flags == strictConversion) {
|
||||
--source; /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
} else {
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
} else {
|
||||
*target++ = (UTF16)ch; /* normal case */
|
||||
}
|
||||
} else if (ch > UNI_MAX_LEGAL_UTF32) {
|
||||
if (flags == strictConversion) {
|
||||
result = sourceIllegal;
|
||||
} else {
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
} else {
|
||||
/* target is a character in range 0xFFFF - 0x10FFFF. */
|
||||
if (target + 1 >= targetEnd) {
|
||||
--source; /* Back up source pointer! */
|
||||
result = targetExhausted; break;
|
||||
}
|
||||
ch -= halfBase;
|
||||
*target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
|
||||
*target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
|
||||
}
|
||||
}
|
||||
*sourceStart = source;
|
||||
*targetStart = target;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF16toUTF32 (
|
||||
const UTF16** sourceStart, const UTF16* sourceEnd,
|
||||
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
|
||||
ConversionResult result = conversionOK;
|
||||
const UTF16* source = *sourceStart;
|
||||
UTF32* target = *targetStart;
|
||||
UTF32 ch, ch2;
|
||||
while (source < sourceEnd) {
|
||||
const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
|
||||
ch = *source++;
|
||||
/* If we have a surrogate pair, convert to UTF32 first. */
|
||||
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
|
||||
/* If the 16 bits following the high surrogate are in the source buffer... */
|
||||
if (source < sourceEnd) {
|
||||
ch2 = *source;
|
||||
/* If it's a low surrogate, convert to UTF32. */
|
||||
if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
|
||||
ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
|
||||
+ (ch2 - UNI_SUR_LOW_START) + halfBase;
|
||||
++source;
|
||||
} else if (flags == strictConversion) { /* it's an unpaired high surrogate */
|
||||
--source; /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
} else { /* We don't have the 16 bits following the high surrogate. */
|
||||
--source; /* return to the high surrogate */
|
||||
result = sourceExhausted;
|
||||
break;
|
||||
}
|
||||
} else if (flags == strictConversion) {
|
||||
/* UTF-16 surrogate values are illegal in UTF-32 */
|
||||
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
|
||||
--source; /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (target >= targetEnd) {
|
||||
source = oldSource; /* Back up source pointer! */
|
||||
result = targetExhausted; break;
|
||||
}
|
||||
*target++ = ch;
|
||||
}
|
||||
*sourceStart = source;
|
||||
*targetStart = target;
|
||||
#ifdef CVTUTF_DEBUG
|
||||
if (result == sourceIllegal) {
|
||||
fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
|
||||
fflush(stderr);
|
||||
}
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
ConversionResult ConvertUTF16toUTF8 (
|
||||
const UTF16** sourceStart, const UTF16* sourceEnd,
|
||||
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
|
||||
ConversionResult result = conversionOK;
|
||||
const UTF16* source = *sourceStart;
|
||||
UTF8* target = *targetStart;
|
||||
while (source < sourceEnd) {
|
||||
UTF32 ch;
|
||||
unsigned short bytesToWrite = 0;
|
||||
const UTF32 byteMask = 0xBF;
|
||||
const UTF32 byteMark = 0x80;
|
||||
const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
|
||||
ch = *source++;
|
||||
/* If we have a surrogate pair, convert to UTF32 first. */
|
||||
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
|
||||
/* If the 16 bits following the high surrogate are in the source buffer... */
|
||||
if (source < sourceEnd) {
|
||||
UTF32 ch2 = *source;
|
||||
/* If it's a low surrogate, convert to UTF32. */
|
||||
if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
|
||||
ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
|
||||
+ (ch2 - UNI_SUR_LOW_START) + halfBase;
|
||||
++source;
|
||||
} else if (flags == strictConversion) { /* it's an unpaired high surrogate */
|
||||
--source; /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
} else { /* We don't have the 16 bits following the high surrogate. */
|
||||
--source; /* return to the high surrogate */
|
||||
result = sourceExhausted;
|
||||
break;
|
||||
}
|
||||
} else if (flags == strictConversion) {
|
||||
/* UTF-16 surrogate values are illegal in UTF-32 */
|
||||
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
|
||||
--source; /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* Figure out how many bytes the result will require */
|
||||
if (ch < (UTF32)0x80) { bytesToWrite = 1;
|
||||
} else if (ch < (UTF32)0x800) { bytesToWrite = 2;
|
||||
} else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
|
||||
} else if (ch < (UTF32)0x110000) { bytesToWrite = 4;
|
||||
} else { bytesToWrite = 3;
|
||||
ch = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
|
||||
target += bytesToWrite;
|
||||
if (target > targetEnd) {
|
||||
source = oldSource; /* Back up source pointer! */
|
||||
target -= bytesToWrite; result = targetExhausted; break;
|
||||
}
|
||||
switch (bytesToWrite) { /* note: everything falls through. */
|
||||
case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]);
|
||||
}
|
||||
target += bytesToWrite;
|
||||
}
|
||||
*sourceStart = source;
|
||||
*targetStart = target;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF32toUTF8 (
|
||||
const UTF32** sourceStart, const UTF32* sourceEnd,
|
||||
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
|
||||
ConversionResult result = conversionOK;
|
||||
const UTF32* source = *sourceStart;
|
||||
UTF8* target = *targetStart;
|
||||
while (source < sourceEnd) {
|
||||
UTF32 ch;
|
||||
unsigned short bytesToWrite = 0;
|
||||
const UTF32 byteMask = 0xBF;
|
||||
const UTF32 byteMark = 0x80;
|
||||
ch = *source++;
|
||||
if (flags == strictConversion ) {
|
||||
/* UTF-16 surrogate values are illegal in UTF-32 */
|
||||
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
|
||||
--source; /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Figure out how many bytes the result will require. Turn any
|
||||
* illegally large UTF32 things (> Plane 17) into replacement chars.
|
||||
*/
|
||||
if (ch < (UTF32)0x80) { bytesToWrite = 1;
|
||||
} else if (ch < (UTF32)0x800) { bytesToWrite = 2;
|
||||
} else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
|
||||
} else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4;
|
||||
} else { bytesToWrite = 3;
|
||||
ch = UNI_REPLACEMENT_CHAR;
|
||||
result = sourceIllegal;
|
||||
}
|
||||
|
||||
target += bytesToWrite;
|
||||
if (target > targetEnd) {
|
||||
--source; /* Back up source pointer! */
|
||||
target -= bytesToWrite; result = targetExhausted; break;
|
||||
}
|
||||
switch (bytesToWrite) { /* note: everything falls through. */
|
||||
case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
|
||||
}
|
||||
target += bytesToWrite;
|
||||
}
|
||||
*sourceStart = source;
|
||||
*targetStart = target;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Utility routine to tell whether a sequence of bytes is legal UTF-8.
|
||||
* This must be called with the length pre-determined by the first byte.
|
||||
* If not calling this from ConvertUTF8to*, then the length can be set by:
|
||||
* length = trailingBytesForUTF8[*source]+1;
|
||||
* and the sequence is illegal right away if there aren't that many bytes
|
||||
* available.
|
||||
* If presented with a length > 4, this returns false. The Unicode
|
||||
* definition of UTF-8 goes up to 4-byte sequences.
|
||||
*/
|
||||
|
||||
static Boolean isLegalUTF8(const UTF8 *source, int length) {
|
||||
UTF8 a;
|
||||
const UTF8 *srcptr = source+length;
|
||||
switch (length) {
|
||||
default: return false;
|
||||
/* Everything else falls through when "true"... */
|
||||
case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
|
||||
case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
|
||||
case 2: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
|
||||
|
||||
switch (*source) {
|
||||
/* no fall-through in this inner switch */
|
||||
case 0xE0: if (a < 0xA0) return false; break;
|
||||
case 0xED: if (a > 0x9F) return false; break;
|
||||
case 0xF0: if (a < 0x90) return false; break;
|
||||
case 0xF4: if (a > 0x8F) return false; break;
|
||||
default: if (a < 0x80) return false;
|
||||
}
|
||||
|
||||
case 1: if (*source >= 0x80 && *source < 0xC2) return false;
|
||||
}
|
||||
if (*source > 0xF4) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Exported function to return whether a UTF-8 sequence is legal or not.
|
||||
* This is not used here; it's just exported.
|
||||
*/
|
||||
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
|
||||
int length = trailingBytesForUTF8[*source]+1;
|
||||
if (length > sourceEnd - source) {
|
||||
return false;
|
||||
}
|
||||
return isLegalUTF8(source, length);
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Exported function to return the total number of bytes in a codepoint
|
||||
* represented in UTF-8, given the value of the first byte.
|
||||
*/
|
||||
unsigned getNumBytesForUTF8(UTF8 first) {
|
||||
return trailingBytesForUTF8[first] + 1;
|
||||
}
|
||||
|
||||
int getUTF8StringLength(const UTF8* utf8)
|
||||
{
|
||||
const UTF8** source = &utf8;
|
||||
const UTF8* sourceEnd = utf8 + strlen((const char*)utf8);
|
||||
int ret = 0;
|
||||
while (*source != sourceEnd) {
|
||||
int length = trailingBytesForUTF8[**source] + 1;
|
||||
if (length > sourceEnd - *source || !isLegalUTF8(*source, length))
|
||||
return 0;
|
||||
*source += length;
|
||||
++ret;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Exported function to return whether a UTF-8 string is legal or not.
|
||||
* This is not used here; it's just exported.
|
||||
*/
|
||||
Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) {
|
||||
while (*source != sourceEnd) {
|
||||
int length = trailingBytesForUTF8[**source] + 1;
|
||||
if (length > sourceEnd - *source || !isLegalUTF8(*source, length))
|
||||
return false;
|
||||
*source += length;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF8toUTF16 (
|
||||
const UTF8** sourceStart, const UTF8* sourceEnd,
|
||||
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
|
||||
ConversionResult result = conversionOK;
|
||||
const UTF8* source = *sourceStart;
|
||||
UTF16* target = *targetStart;
|
||||
while (source < sourceEnd) {
|
||||
UTF32 ch = 0;
|
||||
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
|
||||
if (extraBytesToRead >= sourceEnd - source) {
|
||||
result = sourceExhausted; break;
|
||||
}
|
||||
/* Do this check whether lenient or strict */
|
||||
if (!isLegalUTF8(source, extraBytesToRead+1)) {
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* The cases all fall through. See "Note A" below.
|
||||
*/
|
||||
switch (extraBytesToRead) {
|
||||
case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
|
||||
case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
|
||||
case 3: ch += *source++; ch <<= 6;
|
||||
case 2: ch += *source++; ch <<= 6;
|
||||
case 1: ch += *source++; ch <<= 6;
|
||||
case 0: ch += *source++;
|
||||
}
|
||||
ch -= offsetsFromUTF8[extraBytesToRead];
|
||||
|
||||
if (target >= targetEnd) {
|
||||
source -= (extraBytesToRead+1); /* Back up source pointer! */
|
||||
result = targetExhausted; break;
|
||||
}
|
||||
if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
|
||||
/* UTF-16 surrogate values are illegal in UTF-32 */
|
||||
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
|
||||
if (flags == strictConversion) {
|
||||
source -= (extraBytesToRead+1); /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
} else {
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
} else {
|
||||
*target++ = (UTF16)ch; /* normal case */
|
||||
}
|
||||
} else if (ch > UNI_MAX_UTF16) {
|
||||
if (flags == strictConversion) {
|
||||
result = sourceIllegal;
|
||||
source -= (extraBytesToRead+1); /* return to the start */
|
||||
break; /* Bail out; shouldn't continue */
|
||||
} else {
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
} else {
|
||||
/* target is a character in range 0xFFFF - 0x10FFFF. */
|
||||
if (target + 1 >= targetEnd) {
|
||||
source -= (extraBytesToRead+1); /* Back up source pointer! */
|
||||
result = targetExhausted; break;
|
||||
}
|
||||
ch -= halfBase;
|
||||
*target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
|
||||
*target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
|
||||
}
|
||||
}
|
||||
*sourceStart = source;
|
||||
*targetStart = target;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF8toUTF32 (
|
||||
const UTF8** sourceStart, const UTF8* sourceEnd,
|
||||
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
|
||||
ConversionResult result = conversionOK;
|
||||
const UTF8* source = *sourceStart;
|
||||
UTF32* target = *targetStart;
|
||||
while (source < sourceEnd) {
|
||||
UTF32 ch = 0;
|
||||
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
|
||||
if (extraBytesToRead >= sourceEnd - source) {
|
||||
result = sourceExhausted; break;
|
||||
}
|
||||
/* Do this check whether lenient or strict */
|
||||
if (!isLegalUTF8(source, extraBytesToRead+1)) {
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* The cases all fall through. See "Note A" below.
|
||||
*/
|
||||
switch (extraBytesToRead) {
|
||||
case 5: ch += *source++; ch <<= 6;
|
||||
case 4: ch += *source++; ch <<= 6;
|
||||
case 3: ch += *source++; ch <<= 6;
|
||||
case 2: ch += *source++; ch <<= 6;
|
||||
case 1: ch += *source++; ch <<= 6;
|
||||
case 0: ch += *source++;
|
||||
}
|
||||
ch -= offsetsFromUTF8[extraBytesToRead];
|
||||
|
||||
if (target >= targetEnd) {
|
||||
source -= (extraBytesToRead+1); /* Back up the source pointer! */
|
||||
result = targetExhausted; break;
|
||||
}
|
||||
if (ch <= UNI_MAX_LEGAL_UTF32) {
|
||||
/*
|
||||
* UTF-16 surrogate values are illegal in UTF-32, and anything
|
||||
* over Plane 17 (> 0x10FFFF) is illegal.
|
||||
*/
|
||||
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
|
||||
if (flags == strictConversion) {
|
||||
source -= (extraBytesToRead+1); /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
} else {
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
} else {
|
||||
*target++ = ch;
|
||||
}
|
||||
} else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
|
||||
result = sourceIllegal;
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
}
|
||||
*sourceStart = source;
|
||||
*targetStart = target;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
|
||||
Note A.
|
||||
The fall-through switches in UTF-8 reading code save a
|
||||
temp variable, some decrements & conditionals. The switches
|
||||
are equivalent to the following loop:
|
||||
{
|
||||
int tmpBytesToRead = extraBytesToRead+1;
|
||||
do {
|
||||
ch += *source++;
|
||||
--tmpBytesToRead;
|
||||
if (tmpBytesToRead) ch <<= 6;
|
||||
} while (tmpBytesToRead > 0);
|
||||
}
|
||||
In UTF-8 writing code, the switches on "bytesToWrite" are
|
||||
similarly unrolled loops.
|
||||
|
||||
--------------------------------------------------------------------- */
|
|
@ -0,0 +1,254 @@
|
|||
/*===--- ConvertUTF.h - Universal Character Names conversions ---------------===
|
||||
*
|
||||
* The LLVM Compiler Infrastructure
|
||||
*
|
||||
* This file is distributed under the University of Illinois Open Source
|
||||
* License. See LICENSE.TXT for details.
|
||||
*
|
||||
*==------------------------------------------------------------------------==*/
|
||||
/*
|
||||
* Copyright 2001-2004 Unicode, Inc.
|
||||
*
|
||||
* Disclaimer
|
||||
*
|
||||
* This source code is provided as is by Unicode, Inc. No claims are
|
||||
* made as to fitness for any particular purpose. No warranties of any
|
||||
* kind are expressed or implied. The recipient agrees to determine
|
||||
* applicability of information provided. If this file has been
|
||||
* purchased on magnetic or optical media from Unicode, Inc., the
|
||||
* sole remedy for any claim will be exchange of defective media
|
||||
* within 90 days of receipt.
|
||||
*
|
||||
* Limitations on Rights to Redistribute This Code
|
||||
*
|
||||
* Unicode, Inc. hereby grants the right to freely use the information
|
||||
* supplied in this file in the creation of products supporting the
|
||||
* Unicode Standard, and to make copies of this file in any form
|
||||
* for internal or external distribution as long as this notice
|
||||
* remains attached.
|
||||
*/
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
|
||||
Conversions between UTF32, UTF-16, and UTF-8. Header file.
|
||||
|
||||
Several funtions are included here, forming a complete set of
|
||||
conversions between the three formats. UTF-7 is not included
|
||||
here, but is handled in a separate source file.
|
||||
|
||||
Each of these routines takes pointers to input buffers and output
|
||||
buffers. The input buffers are const.
|
||||
|
||||
Each routine converts the text between *sourceStart and sourceEnd,
|
||||
putting the result into the buffer between *targetStart and
|
||||
targetEnd. Note: the end pointers are *after* the last item: e.g.
|
||||
*(sourceEnd - 1) is the last item.
|
||||
|
||||
The return result indicates whether the conversion was successful,
|
||||
and if not, whether the problem was in the source or target buffers.
|
||||
(Only the first encountered problem is indicated.)
|
||||
|
||||
After the conversion, *sourceStart and *targetStart are both
|
||||
updated to point to the end of last text successfully converted in
|
||||
the respective buffers.
|
||||
|
||||
Input parameters:
|
||||
sourceStart - pointer to a pointer to the source buffer.
|
||||
The contents of this are modified on return so that
|
||||
it points at the next thing to be converted.
|
||||
targetStart - similarly, pointer to pointer to the target buffer.
|
||||
sourceEnd, targetEnd - respectively pointers to the ends of the
|
||||
two buffers, for overflow checking only.
|
||||
|
||||
These conversion functions take a ConversionFlags argument. When this
|
||||
flag is set to strict, both irregular sequences and isolated surrogates
|
||||
will cause an error. When the flag is set to lenient, both irregular
|
||||
sequences and isolated surrogates are converted.
|
||||
|
||||
Whether the flag is strict or lenient, all illegal sequences will cause
|
||||
an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>,
|
||||
or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code
|
||||
must check for illegal sequences.
|
||||
|
||||
When the flag is set to lenient, characters over 0x10FFFF are converted
|
||||
to the replacement character; otherwise (when the flag is set to strict)
|
||||
they constitute an error.
|
||||
|
||||
Output parameters:
|
||||
The value "sourceIllegal" is returned from some routines if the input
|
||||
sequence is malformed. When "sourceIllegal" is returned, the source
|
||||
value will point to the illegal value that caused the problem. E.g.,
|
||||
in UTF-8 when a sequence is malformed, it points to the start of the
|
||||
malformed sequence.
|
||||
|
||||
Author: Mark E. Davis, 1994.
|
||||
Rev History: Rick McGowan, fixes & updates May 2001.
|
||||
Fixes & updates, Sept 2001.
|
||||
|
||||
------------------------------------------------------------------------ */
|
||||
|
||||
#ifndef LLVM_SUPPORT_CONVERTUTF_H
|
||||
#define LLVM_SUPPORT_CONVERTUTF_H
|
||||
|
||||
#include <stddef.h> /* ptrdiff_t */
|
||||
/* ---------------------------------------------------------------------
|
||||
The following 4 definitions are compiler-specific.
|
||||
The C standard does not guarantee that wchar_t has at least
|
||||
16 bits, so wchar_t is no less portable than unsigned short!
|
||||
All should be unsigned values to avoid sign extension during
|
||||
bit mask & shift operations.
|
||||
------------------------------------------------------------------------ */
|
||||
|
||||
typedef unsigned int UTF32; /* at least 32 bits */
|
||||
typedef unsigned short UTF16; /* at least 16 bits */
|
||||
typedef unsigned char UTF8; /* typically 8 bits */
|
||||
typedef unsigned char Boolean; /* 0 or 1 */
|
||||
|
||||
/* Some fundamental constants */
|
||||
#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
|
||||
#define UNI_MAX_BMP (UTF32)0x0000FFFF
|
||||
#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
|
||||
#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
|
||||
#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
|
||||
|
||||
#define UNI_MAX_UTF8_BYTES_PER_CODE_POINT 4
|
||||
|
||||
#define UNI_UTF16_BYTE_ORDER_MARK_NATIVE 0xFEFF
|
||||
#define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED 0xFFFE
|
||||
|
||||
typedef enum {
|
||||
conversionOK, /* conversion successful */
|
||||
sourceExhausted, /* partial character in source, but hit end */
|
||||
targetExhausted, /* insuff. room in target for conversion */
|
||||
sourceIllegal /* source sequence is illegal/malformed */
|
||||
} ConversionResult;
|
||||
|
||||
typedef enum {
|
||||
strictConversion = 0,
|
||||
lenientConversion
|
||||
} ConversionFlags;
|
||||
|
||||
/* This is for C++ and does no harm in C */
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
ConversionResult ConvertUTF8toUTF16 (
|
||||
const UTF8** sourceStart, const UTF8* sourceEnd,
|
||||
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
|
||||
|
||||
ConversionResult ConvertUTF8toUTF32 (
|
||||
const UTF8** sourceStart, const UTF8* sourceEnd,
|
||||
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
|
||||
|
||||
ConversionResult ConvertUTF16toUTF8 (
|
||||
const UTF16** sourceStart, const UTF16* sourceEnd,
|
||||
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
|
||||
|
||||
ConversionResult ConvertUTF32toUTF8 (
|
||||
const UTF32** sourceStart, const UTF32* sourceEnd,
|
||||
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
|
||||
|
||||
ConversionResult ConvertUTF16toUTF32 (
|
||||
const UTF16** sourceStart, const UTF16* sourceEnd,
|
||||
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
|
||||
|
||||
ConversionResult ConvertUTF32toUTF16 (
|
||||
const UTF32** sourceStart, const UTF32* sourceEnd,
|
||||
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
|
||||
|
||||
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
|
||||
|
||||
Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd);
|
||||
|
||||
unsigned getNumBytesForUTF8(UTF8 firstByte);
|
||||
|
||||
int getUTF8StringLength(const UTF8* utf8);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
/*************************************************************************/
|
||||
/* Below are LLVM-specific wrappers of the functions above. */
|
||||
|
||||
//#include "llvm/ADT/ArrayRef.h"
|
||||
//#include "llvm/ADT/StringRef.h"
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
/**
|
||||
* Convert an UTF8 StringRef to UTF8, UTF16, or UTF32 depending on
|
||||
* WideCharWidth. The converted data is written to ResultPtr, which needs to
|
||||
* point to at least WideCharWidth * (Source.Size() + 1) bytes. On success,
|
||||
* ResultPtr will point one after the end of the copied string. On failure,
|
||||
* ResultPtr will not be changed, and ErrorPtr will be set to the location of
|
||||
* the first character which could not be converted.
|
||||
* \return true on success.
|
||||
*/
|
||||
bool ConvertUTF8toWide(unsigned WideCharWidth, const std::string& Source,
|
||||
char *&ResultPtr, const UTF8 *&ErrorPtr);
|
||||
|
||||
/**
|
||||
* Convert an Unicode code point to UTF8 sequence.
|
||||
*
|
||||
* \param Source a Unicode code point.
|
||||
* \param [in,out] ResultPtr pointer to the output buffer, needs to be at least
|
||||
* \c UNI_MAX_UTF8_BYTES_PER_CODE_POINT bytes. On success \c ResultPtr is
|
||||
* updated one past end of the converted sequence.
|
||||
*
|
||||
* \returns true on success.
|
||||
*/
|
||||
bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr);
|
||||
|
||||
/**
|
||||
* Convert the first UTF8 sequence in the given source buffer to a UTF32
|
||||
* code point.
|
||||
*
|
||||
* \param [in,out] source A pointer to the source buffer. If the conversion
|
||||
* succeeds, this pointer will be updated to point to the byte just past the
|
||||
* end of the converted sequence.
|
||||
* \param sourceEnd A pointer just past the end of the source buffer.
|
||||
* \param [out] target The converted code
|
||||
* \param flags Whether the conversion is strict or lenient.
|
||||
*
|
||||
* \returns conversionOK on success
|
||||
*
|
||||
* \sa ConvertUTF8toUTF32
|
||||
*/
|
||||
static inline ConversionResult convertUTF8Sequence(const UTF8 **source,
|
||||
const UTF8 *sourceEnd,
|
||||
UTF32 *target,
|
||||
ConversionFlags flags) {
|
||||
if (*source == sourceEnd)
|
||||
return sourceExhausted;
|
||||
unsigned size = getNumBytesForUTF8(**source);
|
||||
if ((ptrdiff_t)size > sourceEnd - *source)
|
||||
return sourceExhausted;
|
||||
return ConvertUTF8toUTF32(source, *source + size, &target, target + 1, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if a blob of text starts with a UTF-16 big or little endian byte
|
||||
* order mark.
|
||||
*/
|
||||
bool hasUTF16ByteOrderMark(const char* SrcBytes, size_t len);
|
||||
|
||||
/**
|
||||
* Converts a stream of raw bytes assumed to be UTF16 into a UTF8 std::string.
|
||||
*
|
||||
* \param [in] SrcBytes A buffer of what is assumed to be UTF-16 encoded text.
|
||||
* \param [out] Out Converted UTF-8 is stored here on success.
|
||||
* \returns true on success
|
||||
*/
|
||||
bool convertUTF16ToUTF8String(const std::u16string& utf16, std::string &Out);
|
||||
|
||||
} /* end namespace llvm */
|
||||
|
||||
#endif
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
#endif
|
|
@ -0,0 +1,144 @@
|
|||
//===-- ConvertUTFWrapper.cpp - Wrap ConvertUTF.h with clang data types -----===
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ConvertUTF.h"
|
||||
//#include "llvm/Support/SwapByteOrder.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <stdint.h> // uint16_t
|
||||
#include <assert.h>
|
||||
#include <memory.h>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
bool ConvertUTF8toWide(unsigned WideCharWidth, const std::string& Source,
|
||||
char *&ResultPtr, const UTF8 *&ErrorPtr) {
|
||||
assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4);
|
||||
ConversionResult result = conversionOK;
|
||||
// Copy the character span over.
|
||||
if (WideCharWidth == 1) {
|
||||
const UTF8 *Pos = reinterpret_cast<const UTF8*>(Source.data());
|
||||
if (!isLegalUTF8String(&Pos, reinterpret_cast<const UTF8*>(Source.data() + Source.length()))) {
|
||||
result = sourceIllegal;
|
||||
ErrorPtr = Pos;
|
||||
} else {
|
||||
memcpy(ResultPtr, Source.data(), Source.size());
|
||||
ResultPtr += Source.size();
|
||||
}
|
||||
} else if (WideCharWidth == 2) {
|
||||
const UTF8 *sourceStart = (const UTF8*)Source.data();
|
||||
// FIXME: Make the type of the result buffer correct instead of
|
||||
// using reinterpret_cast.
|
||||
UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr);
|
||||
ConversionFlags flags = strictConversion;
|
||||
result = ConvertUTF8toUTF16(
|
||||
&sourceStart, sourceStart + Source.size(),
|
||||
&targetStart, targetStart + 2*Source.size(), flags);
|
||||
if (result == conversionOK)
|
||||
ResultPtr = reinterpret_cast<char*>(targetStart);
|
||||
else
|
||||
ErrorPtr = sourceStart;
|
||||
} else if (WideCharWidth == 4) {
|
||||
const UTF8 *sourceStart = (const UTF8*)Source.data();
|
||||
// FIXME: Make the type of the result buffer correct instead of
|
||||
// using reinterpret_cast.
|
||||
UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr);
|
||||
ConversionFlags flags = strictConversion;
|
||||
result = ConvertUTF8toUTF32(
|
||||
&sourceStart, sourceStart + Source.size(),
|
||||
&targetStart, targetStart + 4*Source.size(), flags);
|
||||
if (result == conversionOK)
|
||||
ResultPtr = reinterpret_cast<char*>(targetStart);
|
||||
else
|
||||
ErrorPtr = sourceStart;
|
||||
}
|
||||
assert((result != targetExhausted)
|
||||
&& "ConvertUTF8toUTFXX exhausted target buffer");
|
||||
return result == conversionOK;
|
||||
}
|
||||
|
||||
bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr) {
|
||||
const UTF32 *SourceStart = &Source;
|
||||
const UTF32 *SourceEnd = SourceStart + 1;
|
||||
UTF8 *TargetStart = reinterpret_cast<UTF8 *>(ResultPtr);
|
||||
UTF8 *TargetEnd = TargetStart + 4;
|
||||
ConversionResult CR = ConvertUTF32toUTF8(&SourceStart, SourceEnd,
|
||||
&TargetStart, TargetEnd,
|
||||
strictConversion);
|
||||
if (CR != conversionOK)
|
||||
return false;
|
||||
|
||||
ResultPtr = reinterpret_cast<char*>(TargetStart);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool hasUTF16ByteOrderMark(const char* S, size_t len) {
|
||||
return (len >= 2 &&
|
||||
((S[0] == '\xff' && S[1] == '\xfe') ||
|
||||
(S[0] == '\xfe' && S[1] == '\xff')));
|
||||
}
|
||||
|
||||
/// SwapByteOrder_16 - This function returns a byte-swapped representation of
|
||||
/// the 16-bit argument.
|
||||
inline uint16_t SwapByteOrder_16(uint16_t value) {
|
||||
#if defined(_MSC_VER) && !defined(_DEBUG)
|
||||
// The DLL version of the runtime lacks these functions (bug!?), but in a
|
||||
// release build they're replaced with BSWAP instructions anyway.
|
||||
return _byteswap_ushort(value);
|
||||
#else
|
||||
uint16_t Hi = value << 8;
|
||||
uint16_t Lo = value >> 8;
|
||||
return Hi | Lo;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool convertUTF16ToUTF8String(const std::u16string& utf16, std::string &Out) {
|
||||
assert(Out.empty());
|
||||
|
||||
// Avoid OOB by returning early on empty input.
|
||||
if (utf16.empty())
|
||||
return true;
|
||||
|
||||
const UTF16 *Src = reinterpret_cast<const UTF16 *>(utf16.data());
|
||||
const UTF16 *SrcEnd = reinterpret_cast<const UTF16 *>(utf16.data() + utf16.length());
|
||||
|
||||
// Byteswap if necessary.
|
||||
std::vector<UTF16> ByteSwapped;
|
||||
if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_SWAPPED) {
|
||||
ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);
|
||||
for (size_t I = 0, E = ByteSwapped.size(); I != E; ++I)
|
||||
ByteSwapped[I] = SwapByteOrder_16(ByteSwapped[I]);
|
||||
Src = &ByteSwapped[0];
|
||||
SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1;
|
||||
}
|
||||
|
||||
// Skip the BOM for conversion.
|
||||
if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_NATIVE)
|
||||
Src++;
|
||||
|
||||
// Just allocate enough space up front. We'll shrink it later.
|
||||
Out.resize(utf16.length() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT + 1);
|
||||
UTF8 *Dst = reinterpret_cast<UTF8 *>(&Out[0]);
|
||||
UTF8 *DstEnd = Dst + Out.size();
|
||||
|
||||
ConversionResult CR =
|
||||
ConvertUTF16toUTF8(&Src, SrcEnd, &Dst, DstEnd, strictConversion);
|
||||
assert(CR != targetExhausted);
|
||||
|
||||
if (CR != conversionOK) {
|
||||
Out.clear();
|
||||
return false;
|
||||
}
|
||||
|
||||
Out.resize(reinterpret_cast<char *>(Dst) - &Out[0]);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // end namespace llvm
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
==============================================================================
|
||||
LLVM Release License
|
||||
==============================================================================
|
||||
University of Illinois/NCSA
|
||||
Open Source License
|
||||
|
||||
Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign.
|
||||
All rights reserved.
|
||||
|
||||
Developed by:
|
||||
|
||||
LLVM Team
|
||||
|
||||
University of Illinois at Urbana-Champaign
|
||||
|
||||
http://llvm.org
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal with
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
of the Software, and to permit persons to whom the Software is furnished to do
|
||||
so, subject to the following conditions:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimers.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimers in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the names of the LLVM Team, University of Illinois at
|
||||
Urbana-Champaign, nor the names of its contributors may be used to
|
||||
endorse or promote products derived from this Software without specific
|
||||
prior written permission.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
|
||||
SOFTWARE.
|
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
* Copyright 2001-2004 Unicode, Inc.
|
||||
*
|
||||
* Disclaimer
|
||||
*
|
||||
* This source code is provided as is by Unicode, Inc. No claims are
|
||||
* made as to fitness for any particular purpose. No warranties of any
|
||||
* kind are expressed or implied. The recipient agrees to determine
|
||||
* applicability of information provided. If this file has been
|
||||
* purchased on magnetic or optical media from Unicode, Inc., the
|
||||
* sole remedy for any claim will be exchange of defective media
|
||||
* within 90 days of receipt.
|
||||
*
|
||||
* Limitations on Rights to Redistribute This Code
|
||||
*
|
||||
* Unicode, Inc. hereby grants the right to freely use the information
|
||||
* supplied in this file in the creation of products supporting the
|
||||
* Unicode Standard, and to make copies of this file in any form
|
||||
* for internal or external distribution as long as this notice
|
||||
* remains attached.
|
||||
*/
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
|
||||
Conversions between UTF32, UTF-16, and UTF-8. Header file.
|
||||
|
||||
Several funtions are included here, forming a complete set of
|
||||
conversions between the three formats. UTF-7 is not included
|
||||
here, but is handled in a separate source file.
|
||||
|
||||
Each of these routines takes pointers to input buffers and output
|
||||
buffers. The input buffers are const.
|
||||
|
||||
Each routine converts the text between *sourceStart and sourceEnd,
|
||||
putting the result into the buffer between *targetStart and
|
||||
targetEnd. Note: the end pointers are *after* the last item: e.g.
|
||||
*(sourceEnd - 1) is the last item.
|
||||
|
||||
The return result indicates whether the conversion was successful,
|
||||
and if not, whether the problem was in the source or target buffers.
|
||||
(Only the first encountered problem is indicated.)
|
||||
|
||||
After the conversion, *sourceStart and *targetStart are both
|
||||
updated to point to the end of last text successfully converted in
|
||||
the respective buffers.
|
||||
|
||||
Input parameters:
|
||||
sourceStart - pointer to a pointer to the source buffer.
|
||||
The contents of this are modified on return so that
|
||||
it points at the next thing to be converted.
|
||||
targetStart - similarly, pointer to pointer to the target buffer.
|
||||
sourceEnd, targetEnd - respectively pointers to the ends of the
|
||||
two buffers, for overflow checking only.
|
||||
|
||||
These conversion functions take a ConversionFlags argument. When this
|
||||
flag is set to strict, both irregular sequences and isolated surrogates
|
||||
will cause an error. When the flag is set to lenient, both irregular
|
||||
sequences and isolated surrogates are converted.
|
||||
|
||||
Whether the flag is strict or lenient, all illegal sequences will cause
|
||||
an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>,
|
||||
or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code
|
||||
must check for illegal sequences.
|
||||
|
||||
When the flag is set to lenient, characters over 0x10FFFF are converted
|
||||
to the replacement character; otherwise (when the flag is set to strict)
|
||||
they constitute an error.
|
||||
|
||||
Output parameters:
|
||||
The value "sourceIllegal" is returned from some routines if the input
|
||||
sequence is malformed. When "sourceIllegal" is returned, the source
|
||||
value will point to the illegal value that caused the problem. E.g.,
|
||||
in UTF-8 when a sequence is malformed, it points to the start of the
|
||||
malformed sequence.
|
||||
|
||||
Author: Mark E. Davis, 1994.
|
||||
Rev History: Rick McGowan, fixes & updates May 2001.
|
||||
Fixes & updates, Sept 2001.
|
||||
|
||||
------------------------------------------------------------------------ */
|
|
@ -7,7 +7,8 @@ static std::function<Layer*()> createFunctions[] = {
|
|||
CL(TemplateVectorTest),
|
||||
CL(TemplateMapTest),
|
||||
CL(ValueTest),
|
||||
CL(RefPtrTest)
|
||||
CL(RefPtrTest),
|
||||
CL(UTFConversionTest)
|
||||
};
|
||||
|
||||
static int sceneIdx = -1;
|
||||
|
@ -659,3 +660,159 @@ void ValueTest::constFunc(const Value& value) const
|
|||
{
|
||||
|
||||
}
|
||||
|
||||
// UTFConversionTest
|
||||
|
||||
static const int TEST_CODE_NUM = 11;
|
||||
|
||||
static const char16_t __utf16Code[] =
|
||||
{
|
||||
0x3042,
|
||||
0x3044,
|
||||
0x3046,
|
||||
0x3048,
|
||||
0x304A,
|
||||
0x3042,
|
||||
0x3044,
|
||||
0x3046,
|
||||
0x3048,
|
||||
0x304A,
|
||||
0x0041,
|
||||
0x0000,
|
||||
};
|
||||
|
||||
// to avoid Xcode error, char => unsigned char
|
||||
// If you use this table, please cast manually as (const char *).
|
||||
static const unsigned char __utf8Code[] =
|
||||
{
|
||||
0xE3,0x81,0x82,
|
||||
0xE3,0x81,0x84,
|
||||
0xE3,0x81,0x86,
|
||||
0xE3,0x81,0x88,
|
||||
0xE3,0x81,0x8A,
|
||||
0xE3,0x81,0x82,
|
||||
0xE3,0x81,0x84,
|
||||
0xE3,0x81,0x86,
|
||||
0xE3,0x81,0x88,
|
||||
0xE3,0x81,0x8A,
|
||||
0x41,
|
||||
0x00,
|
||||
};
|
||||
|
||||
|
||||
static const char16_t WHITE_SPACE_CODE[] =
|
||||
{
|
||||
0x0009,
|
||||
0x000A,
|
||||
0x000B,
|
||||
0x000C,
|
||||
0x000D,
|
||||
0x0020,
|
||||
0x0085,
|
||||
0x00A0,
|
||||
0x1680,
|
||||
0x2000,
|
||||
0x2001,
|
||||
0x2002,
|
||||
0x2003,
|
||||
0x2004,
|
||||
0x2005,
|
||||
0x2006,
|
||||
0x2007,
|
||||
0x2008,
|
||||
0x2009,
|
||||
0x200A,
|
||||
0x2028,
|
||||
0x2029,
|
||||
0x202F,
|
||||
0x205F,
|
||||
0x3000
|
||||
};
|
||||
|
||||
static void doUTFConversion()
|
||||
{
|
||||
bool isSuccess = false;
|
||||
|
||||
std::string originalUTF8 = (const char*)__utf8Code;
|
||||
std::u16string originalUTF16 = __utf16Code;
|
||||
|
||||
//---------------------------
|
||||
std::string utf8Str;
|
||||
isSuccess = StringUtils::UTF16ToUTF8(originalUTF16, utf8Str);
|
||||
|
||||
if (isSuccess)
|
||||
{
|
||||
isSuccess = memcmp(utf8Str.data(), originalUTF8.data(), originalUTF8.length()+1)==0;
|
||||
}
|
||||
|
||||
CCASSERT(isSuccess, "StringUtils::UTF16ToUTF8 failed");
|
||||
|
||||
//---------------------------
|
||||
std::u16string utf16Str;
|
||||
isSuccess = StringUtils::UTF8ToUTF16(originalUTF8, utf16Str);
|
||||
|
||||
if (isSuccess)
|
||||
{
|
||||
isSuccess = memcmp(utf16Str.data(), originalUTF16.data(), originalUTF16.length()+1)==0;
|
||||
}
|
||||
|
||||
CCASSERT(isSuccess && (utf16Str.length() == TEST_CODE_NUM), "StringUtils::UTF8ToUTF16 failed");
|
||||
|
||||
//---------------------------
|
||||
auto vec1 = StringUtils::getChar16VectorFromUTF16String(originalUTF16);
|
||||
|
||||
CCASSERT(vec1.size() == originalUTF16.length(), "StringUtils::getChar16VectorFromUTF16String failed");
|
||||
|
||||
//---------------------------
|
||||
std::vector<char16_t> vec2( vec1 );
|
||||
vec2.push_back(0x2009);
|
||||
vec2.push_back(0x2009);
|
||||
vec2.push_back(0x2009);
|
||||
vec2.push_back(0x2009);
|
||||
|
||||
std::vector<char16_t> vec3( vec2 );
|
||||
StringUtils::trimUTF16Vector(vec2);
|
||||
|
||||
CCASSERT(vec1.size() == vec2.size(), "StringUtils::trimUTF16Vector failed");
|
||||
|
||||
for (size_t i = 0; i < vec2.size(); i++ )
|
||||
{
|
||||
CCASSERT(vec1.at(i) == vec2.at(i), "StringUtils::trimUTF16Vector failed");
|
||||
}
|
||||
|
||||
//---------------------------
|
||||
CCASSERT(StringUtils::getCharacterCountInUTF8String(originalUTF8) == TEST_CODE_NUM, "StringUtils::getCharacterCountInUTF8String failed");
|
||||
|
||||
//---------------------------
|
||||
int lastIndex = StringUtils::getIndexOfLastNotChar16(vec3, 0x2009);
|
||||
CCASSERT(lastIndex == (vec1.size()-1), "StringUtils::getIndexOfLastNotChar16 failed");
|
||||
|
||||
//---------------------------
|
||||
CCASSERT(originalUTF16.length() == TEST_CODE_NUM, "The length of the original utf16 string isn't equal to TEST_CODE_NUM");
|
||||
|
||||
//---------------------------
|
||||
size_t whiteCodeNum = sizeof(WHITE_SPACE_CODE) / sizeof(WHITE_SPACE_CODE[0]);
|
||||
for( size_t i = 0; i < whiteCodeNum; i++ )
|
||||
{
|
||||
CCASSERT(StringUtils::isUnicodeSpace(WHITE_SPACE_CODE[i]), "StringUtils::isUnicodeSpace failed");
|
||||
}
|
||||
|
||||
CCASSERT(!StringUtils::isUnicodeSpace(0xFFFF), "StringUtils::isUnicodeSpace failed");
|
||||
|
||||
CCASSERT(!StringUtils::isCJKUnicode(0xFFFF) && StringUtils::isCJKUnicode(0x3100), "StringUtils::isCJKUnicode failed");
|
||||
}
|
||||
|
||||
void UTFConversionTest::onEnter()
|
||||
{
|
||||
UnitTestDemo::onEnter();
|
||||
|
||||
for (int i = 0; i < 10000; ++i)
|
||||
{
|
||||
doUTFConversion();
|
||||
}
|
||||
}
|
||||
|
||||
std::string UTFConversionTest::subtitle() const
|
||||
{
|
||||
return "UTF8 <-> UTF16 Conversion Test, no crash";
|
||||
}
|
||||
|
|
|
@ -53,4 +53,12 @@ public:
|
|||
void constFunc(const Value& value) const;
|
||||
};
|
||||
|
||||
class UTFConversionTest : public UnitTestDemo
|
||||
{
|
||||
public:
|
||||
CREATE_FUNC(UTFConversionTest);
|
||||
virtual void onEnter() override;
|
||||
virtual std::string subtitle() const override;
|
||||
};
|
||||
|
||||
#endif /* __UNIT_TEST__ */
|
||||
|
|
Loading…
Reference in New Issue