issue #4660: Adds ConvertUTFWrapper.cpp, using llvm::ConvertUTF8toWide and llvm:: convertUTF16ToUTF8String

2014-05-08 16:58:16 +08:00 · 2014-05-08 16:58:16 +08:00 · a61b1097db
parent 5fda13639d
commit a61b1097db
5 changed files with 269 additions and 55 deletions
--- a/build/cocos2d_libs.xcodeproj/project.pbxproj
+++ b/build/cocos2d_libs.xcodeproj/project.pbxproj
@ -98,6 +98,10 @@
 		1A12775A18DFCC4F0005F345 /* CCTweenFunction.h in Headers */ = {isa = PBXBuildFile; fileRef = 2986667918B1B079000E39CA /* CCTweenFunction.h */; };
 		1A12775B18DFCC540005F345 /* CCTweenFunction.h in Headers */ = {isa = PBXBuildFile; fileRef = 2986667918B1B079000E39CA /* CCTweenFunction.h */; };
 		1A12775C18DFCC590005F345 /* CCTweenFunction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2986667818B1B079000E39CA /* CCTweenFunction.cpp */; };
+		1A1645B0191B726C008C7C7F /* ConvertUTF.c in Sources */ = {isa = PBXBuildFile; fileRef = 1A1645AE191B726C008C7C7F /* ConvertUTF.c */; };
+		1A1645B1191B726C008C7C7F /* ConvertUTF.c in Sources */ = {isa = PBXBuildFile; fileRef = 1A1645AE191B726C008C7C7F /* ConvertUTF.c */; };
+		1A1645B2191B726C008C7C7F /* ConvertUTFWrapper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1A1645AF191B726C008C7C7F /* ConvertUTFWrapper.cpp */; };
+		1A1645B3191B726C008C7C7F /* ConvertUTFWrapper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1A1645AF191B726C008C7C7F /* ConvertUTFWrapper.cpp */; };
 		1A570061180BC5A10088DEC7 /* CCAction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1A570047180BC5A10088DEC7 /* CCAction.cpp */; };
 		1A570062180BC5A10088DEC7 /* CCAction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1A570047180BC5A10088DEC7 /* CCAction.cpp */; };
 		1A570063180BC5A10088DEC7 /* CCAction.h in Headers */ = {isa = PBXBuildFile; fileRef = 1A570048180BC5A10088DEC7 /* CCAction.h */; };
@ -765,8 +769,6 @@
 		1ABA68AF1888D700007D1BB4 /* CCFontCharMap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ABA68AC1888D700007D1BB4 /* CCFontCharMap.cpp */; };
 		1ABA68B01888D700007D1BB4 /* CCFontCharMap.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ABA68AD1888D700007D1BB4 /* CCFontCharMap.h */; };
 		1ABA68B11888D700007D1BB4 /* CCFontCharMap.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ABA68AD1888D700007D1BB4 /* CCFontCharMap.h */; };
-		1AC0269A1914068200FA920D /* ConvertUTF.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1AC026981914068200FA920D /* ConvertUTF.cpp */; };
-		1AC0269B1914068200FA920D /* ConvertUTF.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1AC026981914068200FA920D /* ConvertUTF.cpp */; };
 		1AC0269C1914068200FA920D /* ConvertUTF.h in Headers */ = {isa = PBXBuildFile; fileRef = 1AC026991914068200FA920D /* ConvertUTF.h */; };
 		1AC0269D1914068200FA920D /* ConvertUTF.h in Headers */ = {isa = PBXBuildFile; fileRef = 1AC026991914068200FA920D /* ConvertUTF.h */; };
 		1AD71DA9180E26E600808F54 /* CCBAnimationManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1AD71CFA180E26E600808F54 /* CCBAnimationManager.cpp */; };
@ -1878,6 +1880,8 @@
 		1A0DB7301823827C0025743D /* CCGL.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCGL.h; sourceTree = "<group>"; };
 		1A0DB7311823827C0025743D /* CCEAGLView.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCEAGLView.h; sourceTree = "<group>"; };
 		1A0DB7351823828F0025743D /* CCGL.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCGL.h; sourceTree = "<group>"; };
+		1A1645AE191B726C008C7C7F /* ConvertUTF.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = ConvertUTF.c; sourceTree = "<group>"; };
+		1A1645AF191B726C008C7C7F /* ConvertUTFWrapper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ConvertUTFWrapper.cpp; sourceTree = "<group>"; };
 		1A570047180BC5A10088DEC7 /* CCAction.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CCAction.cpp; sourceTree = "<group>"; };
 		1A570048180BC5A10088DEC7 /* CCAction.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCAction.h; sourceTree = "<group>"; };
 		1A570049180BC5A10088DEC7 /* CCActionCamera.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CCActionCamera.cpp; sourceTree = "<group>"; };
@ -2129,7 +2133,6 @@
 		1AAF584E180E40B9000584C8 /* LocalStorageAndroid.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = LocalStorageAndroid.cpp; sourceTree = "<group>"; };
 		1ABA68AC1888D700007D1BB4 /* CCFontCharMap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CCFontCharMap.cpp; sourceTree = "<group>"; };
 		1ABA68AD1888D700007D1BB4 /* CCFontCharMap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCFontCharMap.h; sourceTree = "<group>"; };
-		1AC026981914068200FA920D /* ConvertUTF.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ConvertUTF.cpp; sourceTree = "<group>"; };
 		1AC026991914068200FA920D /* ConvertUTF.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ConvertUTF.h; sourceTree = "<group>"; };
 		1AD71CFA180E26E600808F54 /* CCBAnimationManager.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CCBAnimationManager.cpp; sourceTree = "<group>"; };
 		1AD71CFB180E26E600808F54 /* CCBAnimationManager.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CCBAnimationManager.h; sourceTree = "<group>"; };
@ -3799,8 +3802,9 @@
 		1AC026971914068200FA920D /* ConvertUTF */ = {
 			isa = PBXGroup;
 			children = (
-				1AC026981914068200FA920D /* ConvertUTF.cpp */,
+				1A1645AE191B726C008C7C7F /* ConvertUTF.c */,
 				1AC026991914068200FA920D /* ConvertUTF.h */,
+				1A1645AF191B726C008C7C7F /* ConvertUTFWrapper.cpp */,
 			);
 			name = ConvertUTF;
 			path = ../external/ConvertUTF;
@ -6112,7 +6116,7 @@
 				2AC795DB1862870F005EC8E1 /* SkeletonBounds.cpp in Sources */,
 				2AC795DC1862870F005EC8E1 /* Event.cpp in Sources */,
 				1A01C68A18F57BE800EFE3A6 /* CCDeprecated.cpp in Sources */,
-				1AC0269A1914068200FA920D /* ConvertUTF.cpp in Sources */,
+				1A1645B0191B726C008C7C7F /* ConvertUTF.c in Sources */,
 				500DC93219106300007B91BF /* CCAutoreleasePool.cpp in Sources */,
 				2905FA5618CF08D100240AA3 /* UILayout.cpp in Sources */,
 				2AC795DD1862870F005EC8E1 /* EventData.cpp in Sources */,
@ -6214,6 +6218,7 @@
 				500DC9B619106E6D007B91BF /* TransformUtils.cpp in Sources */,
 				1A5701EE180BCB8C0088DEC7 /* CCTransitionProgress.cpp in Sources */,
 				1A5701F7180BCBAD0088DEC7 /* CCMenu.cpp in Sources */,
+				1A1645B2191B726C008C7C7F /* ConvertUTFWrapper.cpp in Sources */,
 				1A5701FB180BCBAD0088DEC7 /* CCMenuItem.cpp in Sources */,
 				1A570202180BCBD40088DEC7 /* CCClippingNode.cpp in Sources */,
 				06CAAACF186AD7FC0012A414 /* TriggerBase.cpp in Sources */,
@ -6823,13 +6828,14 @@
 				1A8C59EC180E930E00EF57C3 /* CCSkin.cpp in Sources */,
 				2905FA4718CF08D100240AA3 /* UIButton.cpp in Sources */,
 				1A8C59F0180E930E00EF57C3 /* CCSpriteFrameCacheHelper.cpp in Sources */,
-				1AC0269B1914068200FA920D /* ConvertUTF.cpp in Sources */,
+				1A1645B1191B726C008C7C7F /* ConvertUTF.c in Sources */,
 				B2AF2FA218EBAEAE00C5807C /* Vector2.cpp in Sources */,
 				500DC8D219105F7D007B91BF /* CCAffineTransform.cpp in Sources */,
 				1A8C59F4180E930E00EF57C3 /* CCSSceneReader.cpp in Sources */,
 				1A8C59F8180E930E00EF57C3 /* CCTransformHelp.cpp in Sources */,
 				1A8C59FC180E930E00EF57C3 /* CCTween.cpp in Sources */,
 				2905FA5318CF08D100240AA3 /* UIImageView.cpp in Sources */,
+				1A1645B3191B726C008C7C7F /* ConvertUTFWrapper.cpp in Sources */,
 				1A8C5A04180E930E00EF57C3 /* CCUtilMath.cpp in Sources */,
 				2905FA7518CF08D100240AA3 /* UIScrollView.cpp in Sources */,
 				1A8C5A0E180E930E00EF57C3 /* DictionaryHelper.cpp in Sources */,
--- a/cocos/2d/ccUTF8.cpp
+++ b/cocos/2d/ccUTF8.cpp
@ -125,17 +125,15 @@ bool UTF8ToUTF16(const std::string& utf8, std::u16string& outUtf16)
    }

    bool ret = false;
-    const size_t utf16Bytes = (utf8.length()+1) << 1;
+    
+    const size_t utf16Bytes = (utf8.length()+1) * sizeof(char16_t);
    char16_t* utf16 = (char16_t*)malloc(utf16Bytes);
    memset(utf16, 0, utf16Bytes);

-    UTF16* utf16Start = (UTF16*)utf16;
-    UTF16* utf16End = ((UTF16*)utf16) + (utf8.length());
+    char* utf16ptr = reinterpret_cast<char*>(utf16);
+    const UTF8* error = nullptr;

-    const UTF8* utf8Start = (const UTF8*)utf8.data();
-    const UTF8* utf8End = ((const UTF8*)utf8.data()) + utf8.length();
-
-    if (conversionOK == ConvertUTF8toUTF16((const UTF8 **) &utf8Start, utf8End, &utf16Start, utf16End, strictConversion))
+    if (llvm::ConvertUTF8toWide(2, utf8, utf16ptr, error))
    {
        outUtf16 = utf16;
        ret = true;
@ -154,26 +152,7 @@ bool UTF16ToUTF8(const std::u16string& utf16, std::string& outUtf8)
        return true;
    }

-    bool ret = false;
-    const size_t utf8Bytes = (utf16.length() << 2) + 1;
-    char* utf8 = (char*)malloc(utf8Bytes);
-    memset(utf8, 0, utf8Bytes);
-
-    UTF8 *utf8Start = (UTF8*)utf8;
-    UTF8 *utf8End = ((UTF8*)utf8) + (utf8Bytes -1);
-
-    const UTF16* utf16Start = (const UTF16*)utf16.data();
-    const UTF16* utf16End = ((const UTF16*)utf16.data()) + utf16.length();
-
-    if (conversionOK == ConvertUTF16toUTF8(&utf16Start, utf16End, &utf8Start, utf8End, strictConversion))
-    {
-        outUtf8 = utf8;
-        ret = true;
-    }
-
-    free(utf8);
-
-    return ret;
+    return llvm::convertUTF16ToUTF8String(utf16, outUtf8);
 }

 std::vector<char16_t> getUTF16VectorFromUTF16String(const std::u16string& str)
--- a/external/ConvertUTF/ConvertUTF.cpp
+++ b/external/ConvertUTF/ConvertUTF.cpp
@ -51,7 +51,6 @@
 #ifdef CVTUTF_DEBUG
 #include <stdio.h>
 #endif
-#include <string.h>

 static const int halfShift  = 10; /* used for shifting by 10 bits */

@ -401,6 +400,22 @@ unsigned getNumBytesForUTF8(UTF8 first) {
  return trailingBytesForUTF8[first] + 1;
 }

+int getUTF8StringLength(const UTF8* utf8)
+{
+    const UTF8** source = &utf8;
+    const UTF8* sourceEnd = utf8 + strlen((const char*)utf8);
+    int ret = 0;
+    while (*source != sourceEnd) {
+        int length = trailingBytesForUTF8[**source] + 1;
+        if (length > sourceEnd - *source || !isLegalUTF8(*source, length))
+            return 0;
+        *source += length;
+        ++ret;
+    }
+    return ret;
+}
+
+
 /* --------------------------------------------------------------------- */

 /*
@ -417,21 +432,6 @@ Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) {
    return true;
 }

-int getUTF8StringLength(const UTF8* utf8)
-{
-    const UTF8** source = &utf8;
-    const UTF8* sourceEnd = utf8 + strlen((char*)utf8);
-    int ret = 0;
-    while (*source != sourceEnd) {
-        int length = trailingBytesForUTF8[**source] + 1;
-        if (length > sourceEnd - *source || !isLegalUTF8(*source, length))
-            return 0;
-        *source += length;
-        ++ret;
-    }
-    return ret;
-}
-
 /* --------------------------------------------------------------------- */

 ConversionResult ConvertUTF8toUTF16 (
--- a/external/ConvertUTF/ConvertUTF.h
+++ b/external/ConvertUTF/ConvertUTF.h
@ -87,13 +87,13 @@

 ------------------------------------------------------------------------ */

-#ifndef CC_LLVM_SUPPORT_CONVERTUTF_H
-#define CC_LLVM_SUPPORT_CONVERTUTF_H
+#ifndef LLVM_SUPPORT_CONVERTUTF_H
+#define LLVM_SUPPORT_CONVERTUTF_H

 /* ---------------------------------------------------------------------
    The following 4 definitions are compiler-specific.
-    The C standard does not guarantee that char16_t has at least
-    16 bits, so char16_t is no less portable than unsigned short!
+    The C standard does not guarantee that wchar_t has at least
+    16 bits, so wchar_t is no less portable than unsigned short!
    All should be unsigned values to avoid sign extension during
    bit mask & shift operations.
 ------------------------------------------------------------------------ */
@ -127,6 +127,10 @@ typedef enum {
  lenientConversion
 } ConversionFlags;

+/* This is for C++ and does no harm in C */
+#ifdef __cplusplus
+extern "C" {
+#endif

 ConversionResult ConvertUTF8toUTF16 (
  const UTF8** sourceStart, const UTF8* sourceEnd,
@ -157,10 +161,93 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
 Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd);

 unsigned getNumBytesForUTF8(UTF8 firstByte);
-
+    
 int getUTF8StringLength(const UTF8* utf8);

+#ifdef __cplusplus
+}
+
+/*************************************************************************/
+/* Below are LLVM-specific wrappers of the functions above. */
+
+//#include "llvm/ADT/ArrayRef.h"
+//#include "llvm/ADT/StringRef.h"
+
+#include <vector>
+#include <string>
+
+namespace llvm {
+
+/**
+ * Convert an UTF8 StringRef to UTF8, UTF16, or UTF32 depending on
+ * WideCharWidth. The converted data is written to ResultPtr, which needs to
+ * point to at least WideCharWidth * (Source.Size() + 1) bytes. On success,
+ * ResultPtr will point one after the end of the copied string. On failure,
+ * ResultPtr will not be changed, and ErrorPtr will be set to the location of
+ * the first character which could not be converted.
+ * \return true on success.
+ */
+bool ConvertUTF8toWide(unsigned WideCharWidth, const std::string& Source,
+                       char *&ResultPtr, const UTF8 *&ErrorPtr);
+
+/**
+ * Convert an Unicode code point to UTF8 sequence.
+ *
+ * \param Source a Unicode code point.
+ * \param [in,out] ResultPtr pointer to the output buffer, needs to be at least
+ * \c UNI_MAX_UTF8_BYTES_PER_CODE_POINT bytes.  On success \c ResultPtr is
+ * updated one past end of the converted sequence.
+ *
+ * \returns true on success.
+ */
+bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr);
+
+/**
+ * Convert the first UTF8 sequence in the given source buffer to a UTF32
+ * code point.
+ *
+ * \param [in,out] source A pointer to the source buffer. If the conversion
+ * succeeds, this pointer will be updated to point to the byte just past the
+ * end of the converted sequence.
+ * \param sourceEnd A pointer just past the end of the source buffer.
+ * \param [out] target The converted code
+ * \param flags Whether the conversion is strict or lenient.
+ *
+ * \returns conversionOK on success
+ *
+ * \sa ConvertUTF8toUTF32
+ */
+static inline ConversionResult convertUTF8Sequence(const UTF8 **source,
+                                                   const UTF8 *sourceEnd,
+                                                   UTF32 *target,
+                                                   ConversionFlags flags) {
+  if (*source == sourceEnd)
+    return sourceExhausted;
+  unsigned size = getNumBytesForUTF8(**source);
+  if ((ptrdiff_t)size > sourceEnd - *source)
+    return sourceExhausted;
+  return ConvertUTF8toUTF32(source, *source + size, &target, target + 1, flags);
+}
+
+/**
+ * Returns true if a blob of text starts with a UTF-16 big or little endian byte
+ * order mark.
+ */
+bool hasUTF16ByteOrderMark(const char* SrcBytes, size_t len);
+
+/**
+ * Converts a stream of raw bytes assumed to be UTF16 into a UTF8 std::string.
+ *
+ * \param [in] SrcBytes A buffer of what is assumed to be UTF-16 encoded text.
+ * \param [out] Out Converted UTF-8 is stored here on success.
+ * \returns true on success
+ */
+bool convertUTF16ToUTF8String(const std::u16string& utf16, std::string &Out);
+
+} /* end namespace llvm */
+
+#endif

 /* --------------------------------------------------------------------- */

-#endif // CC_LLVM_SUPPORT_CONVERTUTF_H
+#endif
--- a/external/ConvertUTF/ConvertUTFWrapper.cpp
+++ b/external/ConvertUTF/ConvertUTFWrapper.cpp
@ -0,0 +1,142 @@
+//===-- ConvertUTFWrapper.cpp - Wrap ConvertUTF.h with clang data types -----===
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ConvertUTF.h"
+//#include "llvm/Support/SwapByteOrder.h"
+#include <string>
+#include <vector>
+#include <assert.h>
+
+namespace llvm {
+
+bool ConvertUTF8toWide(unsigned WideCharWidth, const std::string& Source,
+                       char *&ResultPtr, const UTF8 *&ErrorPtr) {
+  assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4);
+  ConversionResult result = conversionOK;
+  // Copy the character span over.
+  if (WideCharWidth == 1) {
+    const UTF8 *Pos = reinterpret_cast<const UTF8*>(Source.data());
+    if (!isLegalUTF8String(&Pos, reinterpret_cast<const UTF8*>(Source.data() + Source.length()))) {
+      result = sourceIllegal;
+      ErrorPtr = Pos;
+    } else {
+      memcpy(ResultPtr, Source.data(), Source.size());
+      ResultPtr += Source.size();
+    }
+  } else if (WideCharWidth == 2) {
+    const UTF8 *sourceStart = (const UTF8*)Source.data();
+    // FIXME: Make the type of the result buffer correct instead of
+    // using reinterpret_cast.
+    UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr);
+    ConversionFlags flags = strictConversion;
+    result = ConvertUTF8toUTF16(
+        &sourceStart, sourceStart + Source.size(),
+        &targetStart, targetStart + 2*Source.size(), flags);
+    if (result == conversionOK)
+      ResultPtr = reinterpret_cast<char*>(targetStart);
+    else
+      ErrorPtr = sourceStart;
+  } else if (WideCharWidth == 4) {
+    const UTF8 *sourceStart = (const UTF8*)Source.data();
+    // FIXME: Make the type of the result buffer correct instead of
+    // using reinterpret_cast.
+    UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr);
+    ConversionFlags flags = strictConversion;
+    result = ConvertUTF8toUTF32(
+        &sourceStart, sourceStart + Source.size(),
+        &targetStart, targetStart + 4*Source.size(), flags);
+    if (result == conversionOK)
+      ResultPtr = reinterpret_cast<char*>(targetStart);
+    else
+      ErrorPtr = sourceStart;
+  }
+  assert((result != targetExhausted)
+         && "ConvertUTF8toUTFXX exhausted target buffer");
+  return result == conversionOK;
+}
+
+bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr) {
+  const UTF32 *SourceStart = &Source;
+  const UTF32 *SourceEnd = SourceStart + 1;
+  UTF8 *TargetStart = reinterpret_cast<UTF8 *>(ResultPtr);
+  UTF8 *TargetEnd = TargetStart + 4;
+  ConversionResult CR = ConvertUTF32toUTF8(&SourceStart, SourceEnd,
+                                           &TargetStart, TargetEnd,
+                                           strictConversion);
+  if (CR != conversionOK)
+    return false;
+
+  ResultPtr = reinterpret_cast<char*>(TargetStart);
+  return true;
+}
+
+bool hasUTF16ByteOrderMark(const char* S, size_t len) {
+  return (len >= 2 &&
+          ((S[0] == '\xff' && S[1] == '\xfe') ||
+           (S[0] == '\xfe' && S[1] == '\xff')));
+}
+    
+/// SwapByteOrder_16 - This function returns a byte-swapped representation of
+/// the 16-bit argument.
+inline uint16_t SwapByteOrder_16(uint16_t value) {
+#if defined(_MSC_VER) && !defined(_DEBUG)
+    // The DLL version of the runtime lacks these functions (bug!?), but in a
+    // release build they're replaced with BSWAP instructions anyway.
+    return _byteswap_ushort(value);
+#else
+    uint16_t Hi = value << 8;
+    uint16_t Lo = value >> 8;
+    return Hi | Lo;
+#endif
+}
+
+bool convertUTF16ToUTF8String(const std::u16string& utf16, std::string &Out) {
+  assert(Out.empty());
+
+  // Avoid OOB by returning early on empty input.
+  if (utf16.empty())
+    return true;
+
+  const UTF16 *Src = reinterpret_cast<const UTF16 *>(utf16.data());
+  const UTF16 *SrcEnd = reinterpret_cast<const UTF16 *>(utf16.data() + utf16.length());
+
+  // Byteswap if necessary.
+  std::vector<UTF16> ByteSwapped;
+  if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_SWAPPED) {
+    ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);
+    for (size_t I = 0, E = ByteSwapped.size(); I != E; ++I)
+      ByteSwapped[I] = SwapByteOrder_16(ByteSwapped[I]);
+    Src = &ByteSwapped[0];
+    SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1;
+  }
+
+  // Skip the BOM for conversion.
+  if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_NATIVE)
+    Src++;
+
+  // Just allocate enough space up front.  We'll shrink it later.
+  Out.resize(utf16.length() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT + 1);
+  UTF8 *Dst = reinterpret_cast<UTF8 *>(&Out[0]);
+  UTF8 *DstEnd = Dst + Out.size();
+
+  ConversionResult CR =
+      ConvertUTF16toUTF8(&Src, SrcEnd, &Dst, DstEnd, strictConversion);
+  assert(CR != targetExhausted);
+
+  if (CR != conversionOK) {
+    Out.clear();
+    return false;
+  }
+
+  Out.resize(reinterpret_cast<char *>(Dst) - &Out[0]);
+  return true;
+}
+
+} // end namespace llvm
+