mirror of https://github.com/axmolengine/axmol.git
issue #4660: Adds ConvertUTFWrapper.cpp, using llvm::ConvertUTF8toWide and llvm:: convertUTF16ToUTF8String
This commit is contained in:
parent
5fda13639d
commit
a61b1097db
|
@ -98,6 +98,10 @@
|
|||
1A12775A18DFCC4F0005F345 /* CCTweenFunction.h in Headers */ = {isa = PBXBuildFile; fileRef = 2986667918B1B079000E39CA /* CCTweenFunction.h */; };
|
||||
1A12775B18DFCC540005F345 /* CCTweenFunction.h in Headers */ = {isa = PBXBuildFile; fileRef = 2986667918B1B079000E39CA /* CCTweenFunction.h */; };
|
||||
1A12775C18DFCC590005F345 /* CCTweenFunction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2986667818B1B079000E39CA /* CCTweenFunction.cpp */; };
|
||||
1A1645B0191B726C008C7C7F /* ConvertUTF.c in Sources */ = {isa = PBXBuildFile; fileRef = 1A1645AE191B726C008C7C7F /* ConvertUTF.c */; };
|
||||
1A1645B1191B726C008C7C7F /* ConvertUTF.c in Sources */ = {isa = PBXBuildFile; fileRef = 1A1645AE191B726C008C7C7F /* ConvertUTF.c */; };
|
||||
1A1645B2191B726C008C7C7F /* ConvertUTFWrapper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1A1645AF191B726C008C7C7F /* ConvertUTFWrapper.cpp */; };
|
||||
1A1645B3191B726C008C7C7F /* ConvertUTFWrapper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1A1645AF191B726C008C7C7F /* ConvertUTFWrapper.cpp */; };
|
||||
1A570061180BC5A10088DEC7 /* CCAction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1A570047180BC5A10088DEC7 /* CCAction.cpp */; };
|
||||
1A570062180BC5A10088DEC7 /* CCAction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1A570047180BC5A10088DEC7 /* CCAction.cpp */; };
|
||||
1A570063180BC5A10088DEC7 /* CCAction.h in Headers */ = {isa = PBXBuildFile; fileRef = 1A570048180BC5A10088DEC7 /* CCAction.h */; };
|
||||
|
@ -765,8 +769,6 @@
|
|||
1ABA68AF1888D700007D1BB4 /* CCFontCharMap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ABA68AC1888D700007D1BB4 /* CCFontCharMap.cpp */; };
|
||||
1ABA68B01888D700007D1BB4 /* CCFontCharMap.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ABA68AD1888D700007D1BB4 /* CCFontCharMap.h */; };
|
||||
1ABA68B11888D700007D1BB4 /* CCFontCharMap.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ABA68AD1888D700007D1BB4 /* CCFontCharMap.h */; };
|
||||
1AC0269A1914068200FA920D /* ConvertUTF.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1AC026981914068200FA920D /* ConvertUTF.cpp */; };
|
||||
1AC0269B1914068200FA920D /* ConvertUTF.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1AC026981914068200FA920D /* ConvertUTF.cpp */; };
|
||||
1AC0269C1914068200FA920D /* ConvertUTF.h in Headers */ = {isa = PBXBuildFile; fileRef = 1AC026991914068200FA920D /* ConvertUTF.h */; };
|
||||
1AC0269D1914068200FA920D /* ConvertUTF.h in Headers */ = {isa = PBXBuildFile; fileRef = 1AC026991914068200FA920D /* ConvertUTF.h */; };
|
||||
1AD71DA9180E26E600808F54 /* CCBAnimationManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1AD71CFA180E26E600808F54 /* CCBAnimationManager.cpp */; };
|
||||
|
@ -1878,6 +1880,8 @@
|
|||
1A0DB7301823827C0025743D /* CCGL.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCGL.h; sourceTree = "<group>"; };
|
||||
1A0DB7311823827C0025743D /* CCEAGLView.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCEAGLView.h; sourceTree = "<group>"; };
|
||||
1A0DB7351823828F0025743D /* CCGL.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCGL.h; sourceTree = "<group>"; };
|
||||
1A1645AE191B726C008C7C7F /* ConvertUTF.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = ConvertUTF.c; sourceTree = "<group>"; };
|
||||
1A1645AF191B726C008C7C7F /* ConvertUTFWrapper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ConvertUTFWrapper.cpp; sourceTree = "<group>"; };
|
||||
1A570047180BC5A10088DEC7 /* CCAction.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CCAction.cpp; sourceTree = "<group>"; };
|
||||
1A570048180BC5A10088DEC7 /* CCAction.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCAction.h; sourceTree = "<group>"; };
|
||||
1A570049180BC5A10088DEC7 /* CCActionCamera.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CCActionCamera.cpp; sourceTree = "<group>"; };
|
||||
|
@ -2129,7 +2133,6 @@
|
|||
1AAF584E180E40B9000584C8 /* LocalStorageAndroid.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = LocalStorageAndroid.cpp; sourceTree = "<group>"; };
|
||||
1ABA68AC1888D700007D1BB4 /* CCFontCharMap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CCFontCharMap.cpp; sourceTree = "<group>"; };
|
||||
1ABA68AD1888D700007D1BB4 /* CCFontCharMap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCFontCharMap.h; sourceTree = "<group>"; };
|
||||
1AC026981914068200FA920D /* ConvertUTF.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ConvertUTF.cpp; sourceTree = "<group>"; };
|
||||
1AC026991914068200FA920D /* ConvertUTF.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ConvertUTF.h; sourceTree = "<group>"; };
|
||||
1AD71CFA180E26E600808F54 /* CCBAnimationManager.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CCBAnimationManager.cpp; sourceTree = "<group>"; };
|
||||
1AD71CFB180E26E600808F54 /* CCBAnimationManager.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCBAnimationManager.h; sourceTree = "<group>"; };
|
||||
|
@ -3799,8 +3802,9 @@
|
|||
1AC026971914068200FA920D /* ConvertUTF */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
1AC026981914068200FA920D /* ConvertUTF.cpp */,
|
||||
1A1645AE191B726C008C7C7F /* ConvertUTF.c */,
|
||||
1AC026991914068200FA920D /* ConvertUTF.h */,
|
||||
1A1645AF191B726C008C7C7F /* ConvertUTFWrapper.cpp */,
|
||||
);
|
||||
name = ConvertUTF;
|
||||
path = ../external/ConvertUTF;
|
||||
|
@ -6112,7 +6116,7 @@
|
|||
2AC795DB1862870F005EC8E1 /* SkeletonBounds.cpp in Sources */,
|
||||
2AC795DC1862870F005EC8E1 /* Event.cpp in Sources */,
|
||||
1A01C68A18F57BE800EFE3A6 /* CCDeprecated.cpp in Sources */,
|
||||
1AC0269A1914068200FA920D /* ConvertUTF.cpp in Sources */,
|
||||
1A1645B0191B726C008C7C7F /* ConvertUTF.c in Sources */,
|
||||
500DC93219106300007B91BF /* CCAutoreleasePool.cpp in Sources */,
|
||||
2905FA5618CF08D100240AA3 /* UILayout.cpp in Sources */,
|
||||
2AC795DD1862870F005EC8E1 /* EventData.cpp in Sources */,
|
||||
|
@ -6214,6 +6218,7 @@
|
|||
500DC9B619106E6D007B91BF /* TransformUtils.cpp in Sources */,
|
||||
1A5701EE180BCB8C0088DEC7 /* CCTransitionProgress.cpp in Sources */,
|
||||
1A5701F7180BCBAD0088DEC7 /* CCMenu.cpp in Sources */,
|
||||
1A1645B2191B726C008C7C7F /* ConvertUTFWrapper.cpp in Sources */,
|
||||
1A5701FB180BCBAD0088DEC7 /* CCMenuItem.cpp in Sources */,
|
||||
1A570202180BCBD40088DEC7 /* CCClippingNode.cpp in Sources */,
|
||||
06CAAACF186AD7FC0012A414 /* TriggerBase.cpp in Sources */,
|
||||
|
@ -6823,13 +6828,14 @@
|
|||
1A8C59EC180E930E00EF57C3 /* CCSkin.cpp in Sources */,
|
||||
2905FA4718CF08D100240AA3 /* UIButton.cpp in Sources */,
|
||||
1A8C59F0180E930E00EF57C3 /* CCSpriteFrameCacheHelper.cpp in Sources */,
|
||||
1AC0269B1914068200FA920D /* ConvertUTF.cpp in Sources */,
|
||||
1A1645B1191B726C008C7C7F /* ConvertUTF.c in Sources */,
|
||||
B2AF2FA218EBAEAE00C5807C /* Vector2.cpp in Sources */,
|
||||
500DC8D219105F7D007B91BF /* CCAffineTransform.cpp in Sources */,
|
||||
1A8C59F4180E930E00EF57C3 /* CCSSceneReader.cpp in Sources */,
|
||||
1A8C59F8180E930E00EF57C3 /* CCTransformHelp.cpp in Sources */,
|
||||
1A8C59FC180E930E00EF57C3 /* CCTween.cpp in Sources */,
|
||||
2905FA5318CF08D100240AA3 /* UIImageView.cpp in Sources */,
|
||||
1A1645B3191B726C008C7C7F /* ConvertUTFWrapper.cpp in Sources */,
|
||||
1A8C5A04180E930E00EF57C3 /* CCUtilMath.cpp in Sources */,
|
||||
2905FA7518CF08D100240AA3 /* UIScrollView.cpp in Sources */,
|
||||
1A8C5A0E180E930E00EF57C3 /* DictionaryHelper.cpp in Sources */,
|
||||
|
|
|
@ -125,17 +125,15 @@ bool UTF8ToUTF16(const std::string& utf8, std::u16string& outUtf16)
|
|||
}
|
||||
|
||||
bool ret = false;
|
||||
const size_t utf16Bytes = (utf8.length()+1) << 1;
|
||||
|
||||
const size_t utf16Bytes = (utf8.length()+1) * sizeof(char16_t);
|
||||
char16_t* utf16 = (char16_t*)malloc(utf16Bytes);
|
||||
memset(utf16, 0, utf16Bytes);
|
||||
|
||||
UTF16* utf16Start = (UTF16*)utf16;
|
||||
UTF16* utf16End = ((UTF16*)utf16) + (utf8.length());
|
||||
char* utf16ptr = reinterpret_cast<char*>(utf16);
|
||||
const UTF8* error = nullptr;
|
||||
|
||||
const UTF8* utf8Start = (const UTF8*)utf8.data();
|
||||
const UTF8* utf8End = ((const UTF8*)utf8.data()) + utf8.length();
|
||||
|
||||
if (conversionOK == ConvertUTF8toUTF16((const UTF8 **) &utf8Start, utf8End, &utf16Start, utf16End, strictConversion))
|
||||
if (llvm::ConvertUTF8toWide(2, utf8, utf16ptr, error))
|
||||
{
|
||||
outUtf16 = utf16;
|
||||
ret = true;
|
||||
|
@ -154,26 +152,7 @@ bool UTF16ToUTF8(const std::u16string& utf16, std::string& outUtf8)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool ret = false;
|
||||
const size_t utf8Bytes = (utf16.length() << 2) + 1;
|
||||
char* utf8 = (char*)malloc(utf8Bytes);
|
||||
memset(utf8, 0, utf8Bytes);
|
||||
|
||||
UTF8 *utf8Start = (UTF8*)utf8;
|
||||
UTF8 *utf8End = ((UTF8*)utf8) + (utf8Bytes -1);
|
||||
|
||||
const UTF16* utf16Start = (const UTF16*)utf16.data();
|
||||
const UTF16* utf16End = ((const UTF16*)utf16.data()) + utf16.length();
|
||||
|
||||
if (conversionOK == ConvertUTF16toUTF8(&utf16Start, utf16End, &utf8Start, utf8End, strictConversion))
|
||||
{
|
||||
outUtf8 = utf8;
|
||||
ret = true;
|
||||
}
|
||||
|
||||
free(utf8);
|
||||
|
||||
return ret;
|
||||
return llvm::convertUTF16ToUTF8String(utf16, outUtf8);
|
||||
}
|
||||
|
||||
std::vector<char16_t> getUTF16VectorFromUTF16String(const std::u16string& str)
|
||||
|
|
|
@ -51,7 +51,6 @@
|
|||
#ifdef CVTUTF_DEBUG
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
#include <string.h>
|
||||
|
||||
static const int halfShift = 10; /* used for shifting by 10 bits */
|
||||
|
||||
|
@ -401,6 +400,22 @@ unsigned getNumBytesForUTF8(UTF8 first) {
|
|||
return trailingBytesForUTF8[first] + 1;
|
||||
}
|
||||
|
||||
int getUTF8StringLength(const UTF8* utf8)
|
||||
{
|
||||
const UTF8** source = &utf8;
|
||||
const UTF8* sourceEnd = utf8 + strlen((const char*)utf8);
|
||||
int ret = 0;
|
||||
while (*source != sourceEnd) {
|
||||
int length = trailingBytesForUTF8[**source] + 1;
|
||||
if (length > sourceEnd - *source || !isLegalUTF8(*source, length))
|
||||
return 0;
|
||||
*source += length;
|
||||
++ret;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
|
@ -417,21 +432,6 @@ Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) {
|
|||
return true;
|
||||
}
|
||||
|
||||
int getUTF8StringLength(const UTF8* utf8)
|
||||
{
|
||||
const UTF8** source = &utf8;
|
||||
const UTF8* sourceEnd = utf8 + strlen((char*)utf8);
|
||||
int ret = 0;
|
||||
while (*source != sourceEnd) {
|
||||
int length = trailingBytesForUTF8[**source] + 1;
|
||||
if (length > sourceEnd - *source || !isLegalUTF8(*source, length))
|
||||
return 0;
|
||||
*source += length;
|
||||
++ret;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF8toUTF16 (
|
|
@ -87,13 +87,13 @@
|
|||
|
||||
------------------------------------------------------------------------ */
|
||||
|
||||
#ifndef CC_LLVM_SUPPORT_CONVERTUTF_H
|
||||
#define CC_LLVM_SUPPORT_CONVERTUTF_H
|
||||
#ifndef LLVM_SUPPORT_CONVERTUTF_H
|
||||
#define LLVM_SUPPORT_CONVERTUTF_H
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
The following 4 definitions are compiler-specific.
|
||||
The C standard does not guarantee that char16_t has at least
|
||||
16 bits, so char16_t is no less portable than unsigned short!
|
||||
The C standard does not guarantee that wchar_t has at least
|
||||
16 bits, so wchar_t is no less portable than unsigned short!
|
||||
All should be unsigned values to avoid sign extension during
|
||||
bit mask & shift operations.
|
||||
------------------------------------------------------------------------ */
|
||||
|
@ -127,6 +127,10 @@ typedef enum {
|
|||
lenientConversion
|
||||
} ConversionFlags;
|
||||
|
||||
/* This is for C++ and does no harm in C */
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
ConversionResult ConvertUTF8toUTF16 (
|
||||
const UTF8** sourceStart, const UTF8* sourceEnd,
|
||||
|
@ -157,10 +161,93 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
|
|||
Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd);
|
||||
|
||||
unsigned getNumBytesForUTF8(UTF8 firstByte);
|
||||
|
||||
|
||||
int getUTF8StringLength(const UTF8* utf8);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
/*************************************************************************/
|
||||
/* Below are LLVM-specific wrappers of the functions above. */
|
||||
|
||||
//#include "llvm/ADT/ArrayRef.h"
|
||||
//#include "llvm/ADT/StringRef.h"
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
/**
|
||||
* Convert an UTF8 StringRef to UTF8, UTF16, or UTF32 depending on
|
||||
* WideCharWidth. The converted data is written to ResultPtr, which needs to
|
||||
* point to at least WideCharWidth * (Source.Size() + 1) bytes. On success,
|
||||
* ResultPtr will point one after the end of the copied string. On failure,
|
||||
* ResultPtr will not be changed, and ErrorPtr will be set to the location of
|
||||
* the first character which could not be converted.
|
||||
* \return true on success.
|
||||
*/
|
||||
bool ConvertUTF8toWide(unsigned WideCharWidth, const std::string& Source,
|
||||
char *&ResultPtr, const UTF8 *&ErrorPtr);
|
||||
|
||||
/**
|
||||
* Convert an Unicode code point to UTF8 sequence.
|
||||
*
|
||||
* \param Source a Unicode code point.
|
||||
* \param [in,out] ResultPtr pointer to the output buffer, needs to be at least
|
||||
* \c UNI_MAX_UTF8_BYTES_PER_CODE_POINT bytes. On success \c ResultPtr is
|
||||
* updated one past end of the converted sequence.
|
||||
*
|
||||
* \returns true on success.
|
||||
*/
|
||||
bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr);
|
||||
|
||||
/**
|
||||
* Convert the first UTF8 sequence in the given source buffer to a UTF32
|
||||
* code point.
|
||||
*
|
||||
* \param [in,out] source A pointer to the source buffer. If the conversion
|
||||
* succeeds, this pointer will be updated to point to the byte just past the
|
||||
* end of the converted sequence.
|
||||
* \param sourceEnd A pointer just past the end of the source buffer.
|
||||
* \param [out] target The converted code
|
||||
* \param flags Whether the conversion is strict or lenient.
|
||||
*
|
||||
* \returns conversionOK on success
|
||||
*
|
||||
* \sa ConvertUTF8toUTF32
|
||||
*/
|
||||
static inline ConversionResult convertUTF8Sequence(const UTF8 **source,
|
||||
const UTF8 *sourceEnd,
|
||||
UTF32 *target,
|
||||
ConversionFlags flags) {
|
||||
if (*source == sourceEnd)
|
||||
return sourceExhausted;
|
||||
unsigned size = getNumBytesForUTF8(**source);
|
||||
if ((ptrdiff_t)size > sourceEnd - *source)
|
||||
return sourceExhausted;
|
||||
return ConvertUTF8toUTF32(source, *source + size, &target, target + 1, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if a blob of text starts with a UTF-16 big or little endian byte
|
||||
* order mark.
|
||||
*/
|
||||
bool hasUTF16ByteOrderMark(const char* SrcBytes, size_t len);
|
||||
|
||||
/**
|
||||
* Converts a stream of raw bytes assumed to be UTF16 into a UTF8 std::string.
|
||||
*
|
||||
* \param [in] SrcBytes A buffer of what is assumed to be UTF-16 encoded text.
|
||||
* \param [out] Out Converted UTF-8 is stored here on success.
|
||||
* \returns true on success
|
||||
*/
|
||||
bool convertUTF16ToUTF8String(const std::u16string& utf16, std::string &Out);
|
||||
|
||||
} /* end namespace llvm */
|
||||
|
||||
#endif
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
#endif // CC_LLVM_SUPPORT_CONVERTUTF_H
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,142 @@
|
|||
//===-- ConvertUTFWrapper.cpp - Wrap ConvertUTF.h with clang data types -----===
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ConvertUTF.h"
|
||||
//#include "llvm/Support/SwapByteOrder.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <assert.h>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
bool ConvertUTF8toWide(unsigned WideCharWidth, const std::string& Source,
|
||||
char *&ResultPtr, const UTF8 *&ErrorPtr) {
|
||||
assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4);
|
||||
ConversionResult result = conversionOK;
|
||||
// Copy the character span over.
|
||||
if (WideCharWidth == 1) {
|
||||
const UTF8 *Pos = reinterpret_cast<const UTF8*>(Source.data());
|
||||
if (!isLegalUTF8String(&Pos, reinterpret_cast<const UTF8*>(Source.data() + Source.length()))) {
|
||||
result = sourceIllegal;
|
||||
ErrorPtr = Pos;
|
||||
} else {
|
||||
memcpy(ResultPtr, Source.data(), Source.size());
|
||||
ResultPtr += Source.size();
|
||||
}
|
||||
} else if (WideCharWidth == 2) {
|
||||
const UTF8 *sourceStart = (const UTF8*)Source.data();
|
||||
// FIXME: Make the type of the result buffer correct instead of
|
||||
// using reinterpret_cast.
|
||||
UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr);
|
||||
ConversionFlags flags = strictConversion;
|
||||
result = ConvertUTF8toUTF16(
|
||||
&sourceStart, sourceStart + Source.size(),
|
||||
&targetStart, targetStart + 2*Source.size(), flags);
|
||||
if (result == conversionOK)
|
||||
ResultPtr = reinterpret_cast<char*>(targetStart);
|
||||
else
|
||||
ErrorPtr = sourceStart;
|
||||
} else if (WideCharWidth == 4) {
|
||||
const UTF8 *sourceStart = (const UTF8*)Source.data();
|
||||
// FIXME: Make the type of the result buffer correct instead of
|
||||
// using reinterpret_cast.
|
||||
UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr);
|
||||
ConversionFlags flags = strictConversion;
|
||||
result = ConvertUTF8toUTF32(
|
||||
&sourceStart, sourceStart + Source.size(),
|
||||
&targetStart, targetStart + 4*Source.size(), flags);
|
||||
if (result == conversionOK)
|
||||
ResultPtr = reinterpret_cast<char*>(targetStart);
|
||||
else
|
||||
ErrorPtr = sourceStart;
|
||||
}
|
||||
assert((result != targetExhausted)
|
||||
&& "ConvertUTF8toUTFXX exhausted target buffer");
|
||||
return result == conversionOK;
|
||||
}
|
||||
|
||||
bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr) {
|
||||
const UTF32 *SourceStart = &Source;
|
||||
const UTF32 *SourceEnd = SourceStart + 1;
|
||||
UTF8 *TargetStart = reinterpret_cast<UTF8 *>(ResultPtr);
|
||||
UTF8 *TargetEnd = TargetStart + 4;
|
||||
ConversionResult CR = ConvertUTF32toUTF8(&SourceStart, SourceEnd,
|
||||
&TargetStart, TargetEnd,
|
||||
strictConversion);
|
||||
if (CR != conversionOK)
|
||||
return false;
|
||||
|
||||
ResultPtr = reinterpret_cast<char*>(TargetStart);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool hasUTF16ByteOrderMark(const char* S, size_t len) {
|
||||
return (len >= 2 &&
|
||||
((S[0] == '\xff' && S[1] == '\xfe') ||
|
||||
(S[0] == '\xfe' && S[1] == '\xff')));
|
||||
}
|
||||
|
||||
/// SwapByteOrder_16 - This function returns a byte-swapped representation of
|
||||
/// the 16-bit argument.
|
||||
inline uint16_t SwapByteOrder_16(uint16_t value) {
|
||||
#if defined(_MSC_VER) && !defined(_DEBUG)
|
||||
// The DLL version of the runtime lacks these functions (bug!?), but in a
|
||||
// release build they're replaced with BSWAP instructions anyway.
|
||||
return _byteswap_ushort(value);
|
||||
#else
|
||||
uint16_t Hi = value << 8;
|
||||
uint16_t Lo = value >> 8;
|
||||
return Hi | Lo;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool convertUTF16ToUTF8String(const std::u16string& utf16, std::string &Out) {
|
||||
assert(Out.empty());
|
||||
|
||||
// Avoid OOB by returning early on empty input.
|
||||
if (utf16.empty())
|
||||
return true;
|
||||
|
||||
const UTF16 *Src = reinterpret_cast<const UTF16 *>(utf16.data());
|
||||
const UTF16 *SrcEnd = reinterpret_cast<const UTF16 *>(utf16.data() + utf16.length());
|
||||
|
||||
// Byteswap if necessary.
|
||||
std::vector<UTF16> ByteSwapped;
|
||||
if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_SWAPPED) {
|
||||
ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);
|
||||
for (size_t I = 0, E = ByteSwapped.size(); I != E; ++I)
|
||||
ByteSwapped[I] = SwapByteOrder_16(ByteSwapped[I]);
|
||||
Src = &ByteSwapped[0];
|
||||
SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1;
|
||||
}
|
||||
|
||||
// Skip the BOM for conversion.
|
||||
if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_NATIVE)
|
||||
Src++;
|
||||
|
||||
// Just allocate enough space up front. We'll shrink it later.
|
||||
Out.resize(utf16.length() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT + 1);
|
||||
UTF8 *Dst = reinterpret_cast<UTF8 *>(&Out[0]);
|
||||
UTF8 *DstEnd = Dst + Out.size();
|
||||
|
||||
ConversionResult CR =
|
||||
ConvertUTF16toUTF8(&Src, SrcEnd, &Dst, DstEnd, strictConversion);
|
||||
assert(CR != targetExhausted);
|
||||
|
||||
if (CR != conversionOK) {
|
||||
Out.clear();
|
||||
return false;
|
||||
}
|
||||
|
||||
Out.resize(reinterpret_cast<char *>(Dst) - &Out[0]);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // end namespace llvm
|
||||
|
Loading…
Reference in New Issue