Update ConvertUTF,poly2tri [ci build]

2021-06-01 11:47:19 +08:00 · 2021-06-01 11:47:19 +08:00 · e0beb6cf27
parent b30a6789b9
commit e0beb6cf27
15 changed files with 354 additions and 353 deletions
--- a/cocos/base/ccUTF8.cpp
+++ b/cocos/base/ccUTF8.cpp
@ -30,9 +30,12 @@
 #include "ConvertUTF.h"
 #include <limits>

+using namespace llvm;
+
 NS_CC_BEGIN

 namespace StringUtils {
+
 std::string CC_DLL format(const char* format, ...)
 {
    va_list args;
@ -393,6 +396,24 @@ long getCharacterCountInUTF8String(const std::string& utf8)
    return getUTF8StringLength((const UTF8*)utf8.c_str());
 }

+bool hasNonAsciiUTF8(const char* str, size_t len) {
+    for (size_t i = 0; i < len;)
+    {
+        int numByte = getNumBytesForUTF8(str[i]);
+        if (numByte > 1) 
+        { // byte=1, is ascii character
+            if (isLegalUTF8Sequence((const UTF8*) &str[i], (const UTF8*) &str[i] + numByte))
+                return true;
+        }
+        i += numByte;
+    }
+    return false;
+}
+
+
+bool isLegalUTF8String(const char* str, size_t len) {
+    return ::isLegalUTF8String((const UTF8**)&str, (const UTF8*)str + len);
+}

 StringUTF8::StringUTF8()
 {
--- a/cocos/base/ccUTF8.h
+++ b/cocos/base/ccUTF8.h
@ -213,7 +213,15 @@ CC_DLL unsigned int getIndexOfLastNotChar16(const std::vector<char16_t>& str, ch
 */
 CC_DLL std::vector<char16_t> getChar16VectorFromUTF16String(const std::u16string& utf16);

+/**
+ *  @brief Whether has non-ascii utf-8 characters
+ */
+CC_DLL bool hasNonAsciiUTF8(const char* str, size_t len);

+/**
+ *  @brief isLegalUTF8String, contains ascii characters
+ */
+CC_DLL bool isLegalUTF8String(const char* str, size_t len);

 /**
 * Utf8 sequence
--- a/external/CMakeLists.txt
+++ b/external/CMakeLists.txt
@ -167,7 +167,7 @@ if(BUILD_EXT_EDTAA3FUNC)
 endif(BUILD_EXT_EDTAA3FUNC)
 if(BUILD_EXT_CONVERTUTF)
    add_subdirectory(ConvertUTF)
-    target_link_libraries(external convertUTF)
+    target_link_libraries(external ConvertUTF)
 endif(BUILD_EXT_CONVERTUTF)
 if(BUILD_EXT_POLY2TRI)
    add_subdirectory(poly2tri)
--- a/external/ConvertUTF/CMakeLists.txt
+++ b/external/ConvertUTF/CMakeLists.txt
@ -1,12 +1,11 @@

-set(lib_name convertUTF)
+set(lib_name ConvertUTF)
 set(target_name ${lib_name})

 project(${lib_name})

 add_library(${target_name} STATIC
-  ConvertUTF.c
-  ConvertUTFWrapper.cpp
+  ConvertUTF.cpp
 )

 target_include_directories(${target_name} PUBLIC .)
--- a/external/ConvertUTF/ConvertUTF.cpp
+++ b/external/ConvertUTF/ConvertUTF.cpp
@ -1,16 +1,15 @@
 /*===--- ConvertUTF.c - Universal Character Names conversions ---------------===
 *
- *                     The LLVM Compiler Infrastructure
- *
- * This file is distributed under the University of Illinois Open Source
- * License. See LICENSE.TXT for details.
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===------------------------------------------------------------------------=*/
 /*
 * Copyright 2001-2004 Unicode, Inc.
- * 
+ *
 * Disclaimer
- * 
+ *
 * This source code is provided as is by Unicode, Inc. No claims are
 * made as to fitness for any particular purpose. No warranties of any
 * kind are expressed or implied. The recipient agrees to determine
@ -18,9 +17,9 @@
 * purchased on magnetic or optical media from Unicode, Inc., the
 * sole remedy for any claim will be exchange of defective media
 * within 90 days of receipt.
- * 
+ *
 * Limitations on Rights to Redistribute This Code
- * 
+ *
 * Unicode, Inc. hereby grants the right to freely use the information
 * supplied in this file in the creation of products supporting the
 * Unicode Standard, and to make copies of this file in any form
@ -46,13 +45,41 @@

 ------------------------------------------------------------------------ */

-
 #include "ConvertUTF.h"
 #ifdef CVTUTF_DEBUG
 #include <stdio.h>
 #endif
+#include <assert.h>

-#include <string.h>
+/*
+ * This code extensively uses fall-through switches.
+ * Keep the compiler from warning about that.
+ */
+#if defined(__clang__) && defined(__has_warning)
+# if __has_warning("-Wimplicit-fallthrough")
+#  define ConvertUTF_DISABLE_WARNINGS \
+    _Pragma("clang diagnostic push")  \
+    _Pragma("clang diagnostic ignored \"-Wimplicit-fallthrough\"")
+#  define ConvertUTF_RESTORE_WARNINGS \
+    _Pragma("clang diagnostic pop")
+# endif
+#elif defined(__GNUC__) && __GNUC__ > 6
+# define ConvertUTF_DISABLE_WARNINGS \
+   _Pragma("GCC diagnostic push")    \
+   _Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+# define ConvertUTF_RESTORE_WARNINGS \
+   _Pragma("GCC diagnostic pop")
+#endif
+#ifndef ConvertUTF_DISABLE_WARNINGS
+# define ConvertUTF_DISABLE_WARNINGS
+#endif
+#ifndef ConvertUTF_RESTORE_WARNINGS
+# define ConvertUTF_RESTORE_WARNINGS
+#endif
+
+ConvertUTF_DISABLE_WARNINGS
+
+namespace llvm {

 static const int halfShift  = 10; /* used for shifting by 10 bits */

@ -63,8 +90,6 @@ static const UTF32 halfMask = 0x3FFUL;
 #define UNI_SUR_HIGH_END    (UTF32)0xDBFF
 #define UNI_SUR_LOW_START   (UTF32)0xDC00
 #define UNI_SUR_LOW_END     (UTF32)0xDFFF
-#define false      0
-#define true        1

 /* --------------------------------------------------------------------- */

@ -91,7 +116,7 @@ static const char trailingBytesForUTF8[256] = {
 * This table contains as many values as there might be trailing bytes
 * in a UTF-8 sequence.
 */
-static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 
+static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
                     0x03C82080UL, 0xFA082080UL, 0x82082080UL };

 /*
@ -117,7 +142,7 @@ static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
 /* --------------------------------------------------------------------- */

 ConversionResult ConvertUTF32toUTF16 (
-        const UTF32** sourceStart, const UTF32* sourceEnd, 
+        const UTF32** sourceStart, const UTF32* sourceEnd,
        UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
    ConversionResult result = conversionOK;
    const UTF32* source = *sourceStart;
@ -166,7 +191,7 @@ ConversionResult ConvertUTF32toUTF16 (
 /* --------------------------------------------------------------------- */

 ConversionResult ConvertUTF16toUTF32 (
-        const UTF16** sourceStart, const UTF16* sourceEnd, 
+        const UTF16** sourceStart, const UTF16* sourceEnd,
        UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
    ConversionResult result = conversionOK;
    const UTF16* source = *sourceStart;
@ -220,7 +245,7 @@ if (result == sourceIllegal) {
    return result;
 }
 ConversionResult ConvertUTF16toUTF8 (
-        const UTF16** sourceStart, const UTF16* sourceEnd, 
+        const UTF16** sourceStart, const UTF16* sourceEnd,
        UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
    ConversionResult result = conversionOK;
    const UTF16* source = *sourceStart;
@ -229,7 +254,7 @@ ConversionResult ConvertUTF16toUTF8 (
        UTF32 ch;
        unsigned short bytesToWrite = 0;
        const UTF32 byteMask = 0xBF;
-        const UTF32 byteMark = 0x80; 
+        const UTF32 byteMark = 0x80;
        const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
        ch = *source++;
        /* If we have a surrogate pair, convert to UTF32 first. */
@ -290,7 +315,7 @@ ConversionResult ConvertUTF16toUTF8 (
 /* --------------------------------------------------------------------- */

 ConversionResult ConvertUTF32toUTF8 (
-        const UTF32** sourceStart, const UTF32* sourceEnd, 
+        const UTF32** sourceStart, const UTF32* sourceEnd,
        UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
    ConversionResult result = conversionOK;
    const UTF32* source = *sourceStart;
@ -299,7 +324,7 @@ ConversionResult ConvertUTF32toUTF8 (
        UTF32 ch;
        unsigned short bytesToWrite = 0;
        const UTF32 byteMask = 0xBF;
-        const UTF32 byteMark = 0x80; 
+        const UTF32 byteMark = 0x80;
        ch = *source++;
        if (flags == strictConversion ) {
            /* UTF-16 surrogate values are illegal in UTF-32 */
@ -321,7 +346,7 @@ ConversionResult ConvertUTF32toUTF8 (
                                            ch = UNI_REPLACEMENT_CHAR;
                                            result = sourceIllegal;
        }
-        
+
        target += bytesToWrite;
        if (target > targetEnd) {
            --source; /* Back up source pointer! */
@ -394,6 +419,99 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {

 /* --------------------------------------------------------------------- */

+static unsigned
+findMaximalSubpartOfIllFormedUTF8Sequence(const UTF8 *source,
+                                          const UTF8 *sourceEnd) {
+  UTF8 b1, b2, b3;
+
+  assert(!isLegalUTF8Sequence(source, sourceEnd));
+
+  /*
+   * Unicode 6.3.0, D93b:
+   *
+   *   Maximal subpart of an ill-formed subsequence: The longest code unit
+   *   subsequence starting at an unconvertible offset that is either:
+   *   a. the initial subsequence of a well-formed code unit sequence, or
+   *   b. a subsequence of length one.
+   */
+
+  if (source == sourceEnd)
+    return 0;
+
+  /*
+   * Perform case analysis.  See Unicode 6.3.0, Table 3-7. Well-Formed UTF-8
+   * Byte Sequences.
+   */
+
+  b1 = *source;
+  ++source;
+  if (b1 >= 0xC2 && b1 <= 0xDF) {
+    /*
+     * First byte is valid, but we know that this code unit sequence is
+     * invalid, so the maximal subpart has to end after the first byte.
+     */
+    return 1;
+  }
+
+  if (source == sourceEnd)
+    return 1;
+
+  b2 = *source;
+  ++source;
+
+  if (b1 == 0xE0) {
+    return (b2 >= 0xA0 && b2 <= 0xBF) ? 2 : 1;
+  }
+  if (b1 >= 0xE1 && b1 <= 0xEC) {
+    return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1;
+  }
+  if (b1 == 0xED) {
+    return (b2 >= 0x80 && b2 <= 0x9F) ? 2 : 1;
+  }
+  if (b1 >= 0xEE && b1 <= 0xEF) {
+    return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1;
+  }
+  if (b1 == 0xF0) {
+    if (b2 >= 0x90 && b2 <= 0xBF) {
+      if (source == sourceEnd)
+        return 2;
+
+      b3 = *source;
+      return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
+    }
+    return 1;
+  }
+  if (b1 >= 0xF1 && b1 <= 0xF3) {
+    if (b2 >= 0x80 && b2 <= 0xBF) {
+      if (source == sourceEnd)
+        return 2;
+
+      b3 = *source;
+      return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
+    }
+    return 1;
+  }
+  if (b1 == 0xF4) {
+    if (b2 >= 0x80 && b2 <= 0x8F) {
+      if (source == sourceEnd)
+        return 2;
+
+      b3 = *source;
+      return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
+    }
+    return 1;
+  }
+
+  assert((b1 >= 0x80 && b1 <= 0xC1) || b1 >= 0xF5);
+  /*
+   * There are no valid sequences that start with these bytes.  Maximal subpart
+   * is defined to have length 1 in these cases.
+   */
+  return 1;
+}
+
+/* --------------------------------------------------------------------- */
+
 /*
 * Exported function to return the total number of bytes in a codepoint
 * represented in UTF-8, given the value of the first byte.
@ -402,22 +520,6 @@ unsigned getNumBytesForUTF8(UTF8 first) {
  return trailingBytesForUTF8[first] + 1;
 }

-int getUTF8StringLength(const UTF8* utf8)
-{
-    const UTF8** source = &utf8;
-    const UTF8* sourceEnd = utf8 + strlen((const char*)utf8);
-    int ret = 0;
-    while (*source != sourceEnd) {
-        int length = trailingBytesForUTF8[**source] + 1;
-        if (length > sourceEnd - *source || !isLegalUTF8(*source, length))
-            return 0;
-        *source += length;
-        ++ret;
-    }
-    return ret;
-}
-
-
 /* --------------------------------------------------------------------- */

 /*
@ -437,7 +539,7 @@ Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) {
 /* --------------------------------------------------------------------- */

 ConversionResult ConvertUTF8toUTF16 (
-        const UTF8** sourceStart, const UTF8* sourceEnd, 
+        const UTF8** sourceStart, const UTF8* sourceEnd,
        UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
    ConversionResult result = conversionOK;
    const UTF8* source = *sourceStart;
@ -509,9 +611,10 @@ ConversionResult ConvertUTF8toUTF16 (

 /* --------------------------------------------------------------------- */

-ConversionResult ConvertUTF8toUTF32 (
-        const UTF8** sourceStart, const UTF8* sourceEnd, 
-        UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
+static ConversionResult ConvertUTF8toUTF32Impl(
+        const UTF8** sourceStart, const UTF8* sourceEnd,
+        UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags,
+        Boolean InputIsPartial) {
    ConversionResult result = conversionOK;
    const UTF8* source = *sourceStart;
    UTF32* target = *targetStart;
@ -519,12 +622,42 @@ ConversionResult ConvertUTF8toUTF32 (
        UTF32 ch = 0;
        unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
        if (extraBytesToRead >= sourceEnd - source) {
-            result = sourceExhausted; break;
+            if (flags == strictConversion || InputIsPartial) {
+                result = sourceExhausted;
+                break;
+            } else {
+                result = sourceIllegal;
+
+                /*
+                 * Replace the maximal subpart of ill-formed sequence with
+                 * replacement character.
+                 */
+                source += findMaximalSubpartOfIllFormedUTF8Sequence(source,
+                                                                    sourceEnd);
+                *target++ = UNI_REPLACEMENT_CHAR;
+                continue;
+            }
        }
+        if (target >= targetEnd) {
+            result = targetExhausted; break;
+        }
+
        /* Do this check whether lenient or strict */
        if (!isLegalUTF8(source, extraBytesToRead+1)) {
            result = sourceIllegal;
-            break;
+            if (flags == strictConversion) {
+                /* Abort conversion. */
+                break;
+            } else {
+                /*
+                 * Replace the maximal subpart of ill-formed sequence with
+                 * replacement character.
+                 */
+                source += findMaximalSubpartOfIllFormedUTF8Sequence(source,
+                                                                    sourceEnd);
+                *target++ = UNI_REPLACEMENT_CHAR;
+                continue;
+            }
        }
        /*
         * The cases all fall through. See "Note A" below.
@ -539,10 +672,6 @@ ConversionResult ConvertUTF8toUTF32 (
        }
        ch -= offsetsFromUTF8[extraBytesToRead];

-        if (target >= targetEnd) {
-            source -= (extraBytesToRead+1); /* Back up the source pointer! */
-            result = targetExhausted; break;
-        }
        if (ch <= UNI_MAX_LEGAL_UTF32) {
            /*
             * UTF-16 surrogate values are illegal in UTF-32, and anything
@ -569,6 +698,36 @@ ConversionResult ConvertUTF8toUTF32 (
    return result;
 }

+ConversionResult ConvertUTF8toUTF32Partial(const UTF8 **sourceStart,
+                                           const UTF8 *sourceEnd,
+                                           UTF32 **targetStart,
+                                           UTF32 *targetEnd,
+                                           ConversionFlags flags) {
+  return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd,
+                                flags, /*InputIsPartial=*/true);
+}
+
+ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart,
+                                    const UTF8 *sourceEnd, UTF32 **targetStart,
+                                    UTF32 *targetEnd, ConversionFlags flags) {
+  return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd,
+                                flags, /*InputIsPartial=*/false);
+}
+
+int getUTF8StringLength(const UTF8* utf8) {
+    const UTF8** source   = &utf8;
+    const UTF8* sourceEnd = utf8 + strlen((const char*) utf8);
+    int ret               = 0;
+    while (*source != sourceEnd) {
+        int length = trailingBytesForUTF8[**source] + 1;
+        if (length > sourceEnd - *source || !isLegalUTF8(*source, length))
+            return 0;
+        *source += length;
+        ++ret;
+    }
+    return ret;
+}
+
 /* ---------------------------------------------------------------------

    Note A.
@ -587,3 +746,7 @@ ConversionResult ConvertUTF8toUTF32 (
    similarly unrolled loops.

   --------------------------------------------------------------------- */
+
+} // namespace llvm
+
+ConvertUTF_RESTORE_WARNINGS
--- a/external/ConvertUTF/ConvertUTF.h
+++ b/external/ConvertUTF/ConvertUTF.h
@ -1,9 +1,8 @@
 /*===--- ConvertUTF.h - Universal Character Names conversions ---------------===
 *
- *                     The LLVM Compiler Infrastructure
- *
- * This file is distributed under the University of Illinois Open Source
- * License. See LICENSE.TXT for details.
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *==------------------------------------------------------------------------==*/
 /*
@ -90,7 +89,15 @@
 #ifndef LLVM_SUPPORT_CONVERTUTF_H
 #define LLVM_SUPPORT_CONVERTUTF_H

-#include <stddef.h>   /* ptrdiff_t */
+#include <cstddef>
+#include <string>
+#include <system_error>
+
+// Wrap everything in namespace llvm so that programs can link with llvm and
+// their own version of the unicode libraries.
+
+namespace llvm {
+
 /* ---------------------------------------------------------------------
    The following 4 definitions are compiler-specific.
    The C standard does not guarantee that wchar_t has at least
@ -128,16 +135,23 @@ typedef enum {
  lenientConversion
 } ConversionFlags;

-/* This is for C++ and does no harm in C */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 ConversionResult ConvertUTF8toUTF16 (
  const UTF8** sourceStart, const UTF8* sourceEnd,
  UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);

-ConversionResult ConvertUTF8toUTF32 (
+/**
+ * Convert a partial UTF8 sequence to UTF32.  If the sequence ends in an
+ * incomplete code unit sequence, returns \c sourceExhausted.
+ */
+ConversionResult ConvertUTF8toUTF32Partial(
+  const UTF8** sourceStart, const UTF8* sourceEnd,
+  UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
+
+/**
+ * Convert a partial UTF8 sequence to UTF32.  If the sequence ends in an
+ * incomplete code unit sequence, returns \c sourceIllegal.
+ */
+ConversionResult ConvertUTF8toUTF32(
  const UTF8** sourceStart, const UTF8* sourceEnd,
  UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);

@ -162,93 +176,10 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
 Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd);

 unsigned getNumBytesForUTF8(UTF8 firstByte);
-    
+
+// adxe-specific
 int getUTF8StringLength(const UTF8* utf8);

-#ifdef __cplusplus
-}
-
-/*************************************************************************/
-/* Below are LLVM-specific wrappers of the functions above. */
-
-//#include "llvm/ADT/ArrayRef.h"
-//#include "llvm/ADT/StringRef.h"
-
-#include <vector>
-#include <string>
-
-namespace llvm {
-
-/**
- * Convert an UTF8 StringRef to UTF8, UTF16, or UTF32 depending on
- * WideCharWidth. The converted data is written to ResultPtr, which needs to
- * point to at least WideCharWidth * (Source.Size() + 1) bytes. On success,
- * ResultPtr will point one after the end of the copied string. On failure,
- * ResultPtr will not be changed, and ErrorPtr will be set to the location of
- * the first character which could not be converted.
- * \return true on success.
- */
-bool ConvertUTF8toWide(unsigned WideCharWidth, const std::string& Source,
-                       char *&ResultPtr, const UTF8 *&ErrorPtr);
-
-/**
- * Convert an Unicode code point to UTF8 sequence.
- *
- * \param Source a Unicode code point.
- * \param [in,out] ResultPtr pointer to the output buffer, needs to be at least
- * \c UNI_MAX_UTF8_BYTES_PER_CODE_POINT bytes.  On success \c ResultPtr is
- * updated one past end of the converted sequence.
- *
- * \returns true on success.
- */
-bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr);
-
-/**
- * Convert the first UTF8 sequence in the given source buffer to a UTF32
- * code point.
- *
- * \param [in,out] source A pointer to the source buffer. If the conversion
- * succeeds, this pointer will be updated to point to the byte just past the
- * end of the converted sequence.
- * \param sourceEnd A pointer just past the end of the source buffer.
- * \param [out] target The converted code
- * \param flags Whether the conversion is strict or lenient.
- *
- * \returns conversionOK on success
- *
- * \sa ConvertUTF8toUTF32
- */
-static inline ConversionResult convertUTF8Sequence(const UTF8 **source,
-                                                   const UTF8 *sourceEnd,
-                                                   UTF32 *target,
-                                                   ConversionFlags flags) {
-  if (*source == sourceEnd)
-    return sourceExhausted;
-  unsigned size = getNumBytesForUTF8(**source);
-  if ((ptrdiff_t)size > sourceEnd - *source)
-    return sourceExhausted;
-  return ConvertUTF8toUTF32(source, *source + size, &target, target + 1, flags);
-}
-
-/**
- * Returns true if a blob of text starts with a UTF-16 big or little endian byte
- * order mark.
- */
-bool hasUTF16ByteOrderMark(const char* SrcBytes, size_t len);
-
-/**
- * Converts a stream of raw bytes assumed to be UTF16 into a UTF8 std::string.
- *
- * \param [in] SrcBytes A buffer of what is assumed to be UTF-16 encoded text.
- * \param [out] Out Converted UTF-8 is stored here on success.
- * \returns true on success
- */
-bool convertUTF16ToUTF8String(const std::u16string& utf16, std::string &Out);
-
 } /* end namespace llvm */

 #endif
-
-/* --------------------------------------------------------------------- */
-
-#endif
--- a/external/ConvertUTF/ConvertUTFWrapper.cpp
+++ b/external/ConvertUTF/ConvertUTFWrapper.cpp
@ -1,144 +0,0 @@
-//===-- ConvertUTFWrapper.cpp - Wrap ConvertUTF.h with clang data types -----===
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ConvertUTF.h"
-//#include "llvm/Support/SwapByteOrder.h"
-#include <string>
-#include <vector>
-#include <stdint.h>  // uint16_t
-#include <assert.h>
-#include <memory.h>
-
-namespace llvm {
-
-bool ConvertUTF8toWide(unsigned WideCharWidth, const std::string& Source,
-                       char *&ResultPtr, const UTF8 *&ErrorPtr) {
-  assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4);
-  ConversionResult result = conversionOK;
-  // Copy the character span over.
-  if (WideCharWidth == 1) {
-    const UTF8 *Pos = reinterpret_cast<const UTF8*>(Source.data());
-    if (!isLegalUTF8String(&Pos, reinterpret_cast<const UTF8*>(Source.data() + Source.length()))) {
-      result = sourceIllegal;
-      ErrorPtr = Pos;
-    } else {
-      memcpy(ResultPtr, Source.data(), Source.size());
-      ResultPtr += Source.size();
-    }
-  } else if (WideCharWidth == 2) {
-    const UTF8 *sourceStart = (const UTF8*)Source.data();
-    // FIXME: Make the type of the result buffer correct instead of
-    // using reinterpret_cast.
-    UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr);
-    ConversionFlags flags = strictConversion;
-    result = ConvertUTF8toUTF16(
-        &sourceStart, sourceStart + Source.size(),
-        &targetStart, targetStart + 2*Source.size(), flags);
-    if (result == conversionOK)
-      ResultPtr = reinterpret_cast<char*>(targetStart);
-    else
-      ErrorPtr = sourceStart;
-  } else if (WideCharWidth == 4) {
-    const UTF8 *sourceStart = (const UTF8*)Source.data();
-    // FIXME: Make the type of the result buffer correct instead of
-    // using reinterpret_cast.
-    UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr);
-    ConversionFlags flags = strictConversion;
-    result = ConvertUTF8toUTF32(
-        &sourceStart, sourceStart + Source.size(),
-        &targetStart, targetStart + 4*Source.size(), flags);
-    if (result == conversionOK)
-      ResultPtr = reinterpret_cast<char*>(targetStart);
-    else
-      ErrorPtr = sourceStart;
-  }
-  assert((result != targetExhausted)
-         && "ConvertUTF8toUTFXX exhausted target buffer");
-  return result == conversionOK;
-}
-
-bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr) {
-  const UTF32 *SourceStart = &Source;
-  const UTF32 *SourceEnd = SourceStart + 1;
-  UTF8 *TargetStart = reinterpret_cast<UTF8 *>(ResultPtr);
-  UTF8 *TargetEnd = TargetStart + 4;
-  ConversionResult CR = ConvertUTF32toUTF8(&SourceStart, SourceEnd,
-                                           &TargetStart, TargetEnd,
-                                           strictConversion);
-  if (CR != conversionOK)
-    return false;
-
-  ResultPtr = reinterpret_cast<char*>(TargetStart);
-  return true;
-}
-
-bool hasUTF16ByteOrderMark(const char* S, size_t len) {
-  return (len >= 2 &&
-          ((S[0] == '\xff' && S[1] == '\xfe') ||
-           (S[0] == '\xfe' && S[1] == '\xff')));
-}
-    
-/// SwapByteOrder_16 - This function returns a byte-swapped representation of
-/// the 16-bit argument.
-inline uint16_t SwapByteOrder_16(uint16_t value) {
-#if defined(_MSC_VER) && !defined(_DEBUG)
-    // The DLL version of the runtime lacks these functions (bug!?), but in a
-    // release build they're replaced with BSWAP instructions anyway.
-    return _byteswap_ushort(value);
-#else
-    uint16_t Hi = value << 8;
-    uint16_t Lo = value >> 8;
-    return Hi | Lo;
-#endif
-}
-
-bool convertUTF16ToUTF8String(const std::u16string& utf16, std::string &Out) {
-  assert(Out.empty());
-
-  // Avoid OOB by returning early on empty input.
-  if (utf16.empty())
-    return true;
-
-  const UTF16 *Src = reinterpret_cast<const UTF16 *>(utf16.data());
-  const UTF16 *SrcEnd = reinterpret_cast<const UTF16 *>(utf16.data() + utf16.length());
-
-  // Byteswap if necessary.
-  std::vector<UTF16> ByteSwapped;
-  if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_SWAPPED) {
-    ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);
-    for (size_t I = 0, E = ByteSwapped.size(); I != E; ++I)
-      ByteSwapped[I] = SwapByteOrder_16(ByteSwapped[I]);
-    Src = &ByteSwapped[0];
-    SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1;
-  }
-
-  // Skip the BOM for conversion.
-  if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_NATIVE)
-    Src++;
-
-  // Just allocate enough space up front.  We'll shrink it later.
-  Out.resize(utf16.length() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT + 1);
-  UTF8 *Dst = reinterpret_cast<UTF8 *>(&Out[0]);
-  UTF8 *DstEnd = Dst + Out.size();
-
-  ConversionResult CR =
-      ConvertUTF16toUTF8(&Src, SrcEnd, &Dst, DstEnd, strictConversion);
-  assert(CR != targetExhausted);
-
-  if (CR != conversionOK) {
-    Out.clear();
-    return false;
-  }
-
-  Out.resize(reinterpret_cast<char *>(Dst) - &Out[0]);
-  return true;
-}
-
-} // end namespace llvm
-
--- a/external/README.md
+++ b/external/README.md
@ -36,7 +36,7 @@

 ## ConvertUTF
 - Upstream: https://github.com/llvm/llvm-project
- Version: NA
+- Version: git 2946cd7 without LLVM-specific wrappers of the functions
 - License: Apache-2.0 WITH LLVM-exception

 ## curl
@ -153,7 +153,7 @@

 ## poly2tri
 - Upstream: https://github.com/jhasse/poly2tri
- Version: NA
+- Version: git 7f0487a
 - License: BSD-3-Clause

 ## pugixml
--- a/external/poly2tri/common/shapes.cc
+++ b/external/poly2tri/common/shapes.cc
@ -243,6 +243,17 @@ Point* Triangle::PointCCW(const Point& point)
  return nullptr;
 }

+// The neighbor across to given point
+Triangle* Triangle::NeighborAcross(const Point& point)
+{
+  if (&point == points_[0]) {
+    return neighbors_[0];
+  } else if (&point == points_[1]) {
+    return neighbors_[1];
+  }
+  return neighbors_[2];
+}
+
 // The neighbor clockwise to given point
 Triangle* Triangle::NeighborCW(const Point& point)
 {
@ -349,23 +360,6 @@ void Triangle::SetDelunayEdgeCW(const Point& p, bool e)
  }
 }

-// The neighbor across to given point
-Triangle& Triangle::NeighborAcross(const Point& opoint)
-{
-  Triangle* neighbor = nullptr;
-  if (&opoint == points_[0]) {
-    neighbor = neighbors_[0];
-  } else if (&opoint == points_[1]) {
-    neighbor = neighbors_[1];
-  } else {
-    neighbor = neighbors_[2];
-  }
-  if (neighbor == nullptr) {
-      throw std::runtime_error("NeighborAcross - null neighbor");
-  }
-  return *neighbor;
-}
-
 void Triangle::DebugPrint()
 {
  std::cout << *points_[0] << " " << *points_[1] << " " << *points_[2] << std::endl;
--- a/external/poly2tri/common/shapes.h
+++ b/external/poly2tri/common/shapes.h
@ -176,6 +176,7 @@ void MarkConstrainedEdge(Point* p, Point* q);
 int Index(const Point* p);
 int EdgeIndex(const Point* p1, const Point* p2);

+Triangle* NeighborAcross(const Point& point);
 Triangle* NeighborCW(const Point& point);
 Triangle* NeighborCCW(const Point& point);
 bool GetConstrainedEdgeCCW(const Point& p);
@ -203,8 +204,6 @@ void ClearDelunayEdges();
 inline bool IsInterior();
 inline void IsInterior(bool b);

-Triangle& NeighborAcross(const Point& opoint);
-
 void DebugPrint();

 bool CircumcicleContains(const Point&) const;
@ -260,7 +259,7 @@ inline bool operator ==(const Point& a, const Point& b)

 inline bool operator !=(const Point& a, const Point& b)
 {
-  return !(a.x == b.x) && !(a.y == b.y);
+  return !(a.x == b.x) || !(a.y == b.y);
 }

 /// Peform the dot product on two vectors.
--- a/external/poly2tri/sweep/cdt.cc
+++ b/external/poly2tri/sweep/cdt.cc
@ -1,5 +1,5 @@
 /*
- * Poly2Tri Copyright (c) 2009-2018, Poly2Tri Contributors
+ * Poly2Tri Copyright (c) 2009-2021, Poly2Tri Contributors
 * https://github.com/jhasse/poly2tri
 *
 * All rights reserved.
@ -68,4 +68,4 @@ CDT::~CDT()
  delete sweep_;
 }

-}
+} // namespace p2t
--- a/external/poly2tri/sweep/sweep.cc
+++ b/external/poly2tri/sweep/sweep.cc
@ -65,17 +65,25 @@ void Sweep::FinalizationPolygon(SweepContext& tcx)
  // Get an Internal triangle to start with
  Triangle* t = tcx.front()->head()->next->triangle;
  Point* p = tcx.front()->head()->next->point;
-  while (!t->GetConstrainedEdgeCW(*p)) {
+  while (t && !t->GetConstrainedEdgeCW(*p)) {
    t = t->NeighborCCW(*p);
  }

  // Collect interior triangles constrained by edges
-  tcx.MeshClean(*t);
+  if (t) {
+    tcx.MeshClean(*t);
+  }
 }

 Node& Sweep::PointEvent(SweepContext& tcx, Point& point)
 {
-  Node& node = tcx.LocateNode(point);
+  Node* node_ptr = tcx.LocateNode(point);
+  if (!node_ptr || !node_ptr->point || !node_ptr->next || !node_ptr->next->point)
+  {
+    throw std::runtime_error("PointEvent - null node");
+  }
+
+  Node& node = *node_ptr;
  Node& new_node = NewFrontTriangle(tcx, point, node);

  // Only need to check +epsilon since point never have smaller
@ -108,6 +116,9 @@ void Sweep::EdgeEvent(SweepContext& tcx, Edge* edge, Node* node)

 void Sweep::EdgeEvent(SweepContext& tcx, Point& ep, Point& eq, Triangle* triangle, Point& point)
 {
+  if (triangle == nullptr) {
+    throw std::runtime_error("EdgeEvent - null triangle");
+  }
  if (IsEdgeSideOfTriangle(*triangle, ep, eq)) {
    return;
  }
@ -115,13 +126,13 @@ void Sweep::EdgeEvent(SweepContext& tcx, Point& ep, Point& eq, Triangle* triangl
  Point* p1 = triangle->PointCCW(point);
  Orientation o1 = Orient2d(eq, *p1, ep);
  if (o1 == COLLINEAR) {
-    if( triangle->Contains(&eq, p1)) {
-      triangle->MarkConstrainedEdge(&eq, p1 );
+    if (triangle->Contains(&eq, p1)) {
+      triangle->MarkConstrainedEdge(&eq, p1);
      // We are modifying the constraint maybe it would be better to
      // not change the given constraint and just keep a variable for the new constraint
      tcx.edge_event.constrained_edge->q = p1;
-      triangle = &triangle->NeighborAcross(point);
-      EdgeEvent( tcx, ep, *p1, triangle, *p1 );
+      triangle = triangle->NeighborAcross(point);
+      EdgeEvent(tcx, ep, *p1, triangle, *p1);
    } else {
      throw std::runtime_error("EdgeEvent - collinear points not supported");
    }
@ -131,13 +142,13 @@ void Sweep::EdgeEvent(SweepContext& tcx, Point& ep, Point& eq, Triangle* triangl
  Point* p2 = triangle->PointCW(point);
  Orientation o2 = Orient2d(eq, *p2, ep);
  if (o2 == COLLINEAR) {
-    if( triangle->Contains(&eq, p2)) {
-      triangle->MarkConstrainedEdge(&eq, p2 );
+    if (triangle->Contains(&eq, p2)) {
+      triangle->MarkConstrainedEdge(&eq, p2);
      // We are modifying the constraint maybe it would be better to
      // not change the given constraint and just keep a variable for the new constraint
      tcx.edge_event.constrained_edge->q = p2;
-      triangle = &triangle->NeighborAcross(point);
-      EdgeEvent( tcx, ep, *p2, triangle, *p2 );
+      triangle = triangle->NeighborAcross(point);
+      EdgeEvent(tcx, ep, *p2, triangle, *p2);
    } else {
      throw std::runtime_error("EdgeEvent - collinear points not supported");
    }
@ -149,12 +160,13 @@ void Sweep::EdgeEvent(SweepContext& tcx, Point& ep, Point& eq, Triangle* triangl
    // that will cross edge
    if (o1 == CW) {
      triangle = triangle->NeighborCCW(point);
-    }       else{
+    } else {
      triangle = triangle->NeighborCW(point);
    }
    EdgeEvent(tcx, ep, eq, triangle, point);
  } else {
    // This triangle crosses constraint so lets flippin start!
+    assert(triangle);
    FlipEdgeEvent(tcx, ep, eq, triangle, point);
  }
 }
@ -215,7 +227,6 @@ void Sweep::Fill(SweepContext& tcx, Node& node)
  if (!Legalize(tcx, *triangle)) {
    tcx.MapTriangleToNodes(*triangle);
  }
-
 }

 void Sweep::FillAdvancingFront(SweepContext& tcx, Node& n)
@ -224,7 +235,7 @@ void Sweep::FillAdvancingFront(SweepContext& tcx, Node& n)
  // Fill right holes
  Node* node = n.next;

-  while (node->next) {
+  while (node && node->next) {
    // if HoleAngle exceeds 90 degrees then break.
    if (LargeHole_DontFill(node)) break;
    Fill(tcx, *node);
@ -234,7 +245,7 @@ void Sweep::FillAdvancingFront(SweepContext& tcx, Node& n)
  // Fill left holes
  node = n.prev;

-  while (node->prev) {
+  while (node && node->prev) {
    // if HoleAngle exceeds 90 degrees then break.
    if (LargeHole_DontFill(node)) break;
    Fill(tcx, *node);
@ -293,7 +304,7 @@ double Sweep::Angle(const Point* origin, const Point* pa, const Point* pb) const
   */
  const double px = origin->x;
  const double py = origin->y;
-  const double ax = pa->x- px;
+  const double ax = pa->x - px;
  const double ay = pa->y - py;
  const double bx = pb->x - px;
  const double by = pb->y - py;
@ -586,7 +597,7 @@ void Sweep::FillRightBelowEdgeEvent(SweepContext& tcx, Edge* edge, Node& node)
    if (Orient2d(*node.point, *node.next->point, *node.next->next->point) == CCW) {
      // Concave
      FillRightConcaveEdgeEvent(tcx, edge, node);
-    } else{
+    } else {
      // Convex
      FillRightConvexEdgeEvent(tcx, edge, node);
      // Retry this one
@ -610,7 +621,6 @@ void Sweep::FillRightConcaveEdgeEvent(SweepContext& tcx, Edge* edge, Node& node)
      }
    }
  }
-
 }

 void Sweep::FillRightConvexEdgeEvent(SweepContext& tcx, Edge* edge, Node& node)
@ -619,13 +629,13 @@ void Sweep::FillRightConvexEdgeEvent(SweepContext& tcx, Edge* edge, Node& node)
  if (Orient2d(*node.next->point, *node.next->next->point, *node.next->next->next->point) == CCW) {
    // Concave
    FillRightConcaveEdgeEvent(tcx, edge, *node.next);
-  } else{
+  } else {
    // Convex
    // Next above or below edge?
    if (Orient2d(*edge->q, *node.next->next->point, *edge->p) == CCW) {
      // Below
      FillRightConvexEdgeEvent(tcx, edge, *node.next);
-    } else{
+    } else {
      // Above
    }
  }
@ -664,13 +674,13 @@ void Sweep::FillLeftConvexEdgeEvent(SweepContext& tcx, Edge* edge, Node& node)
  if (Orient2d(*node.prev->point, *node.prev->prev->point, *node.prev->prev->prev->point) == CW) {
    // Concave
    FillLeftConcaveEdgeEvent(tcx, edge, *node.prev);
-  } else{
+  } else {
    // Convex
    // Next above or below edge?
    if (Orient2d(*edge->q, *node.prev->prev->point, *edge->p) == CW) {
      // Below
      FillLeftConvexEdgeEvent(tcx, edge, *node.prev);
-    } else{
+    } else {
      // Above
    }
  }
@ -686,17 +696,22 @@ void Sweep::FillLeftConcaveEdgeEvent(SweepContext& tcx, Edge* edge, Node& node)
      if (Orient2d(*node.point, *node.prev->point, *node.prev->prev->point) == CW) {
        // Next is concave
        FillLeftConcaveEdgeEvent(tcx, edge, node);
-      } else{
+      } else {
        // Next is convex
      }
    }
  }
-
 }

 void Sweep::FlipEdgeEvent(SweepContext& tcx, Point& ep, Point& eq, Triangle* t, Point& p)
 {
-  Triangle& ot = t->NeighborAcross(p);
+  assert(t);
+  Triangle* ot_ptr = t->NeighborAcross(p);
+  if (ot_ptr == nullptr)
+  {
+    throw std::runtime_error("FlipEdgeEvent - null neighbor across");
+  }
+  Triangle& ot = *ot_ptr;
  Point& op = *ot.OppositePoint(*t, p);

  if (InScanArea(p, *t->PointCCW(p), *t->PointCW(p), op)) {
@ -762,10 +777,26 @@ Point& Sweep::NextFlipPoint(Point& ep, Point& eq, Triangle& ot, Point& op)
 void Sweep::FlipScanEdgeEvent(SweepContext& tcx, Point& ep, Point& eq, Triangle& flip_triangle,
                              Triangle& t, Point& p)
 {
-  Triangle& ot = t.NeighborAcross(p);
-  Point& op = *ot.OppositePoint(t, p);
+  Triangle* ot_ptr = t.NeighborAcross(p);
+  if (ot_ptr == nullptr) {
+    throw std::runtime_error("FlipScanEdgeEvent - null neighbor across");
+  }

-  if (InScanArea(eq, *flip_triangle.PointCCW(eq), *flip_triangle.PointCW(eq), op)) {
+  Point* op_ptr = ot_ptr->OppositePoint(t, p);
+  if (op_ptr == nullptr) {
+    throw std::runtime_error("FlipScanEdgeEvent - null opposing point");
+  }
+
+  Point* p1 = flip_triangle.PointCCW(eq);
+  Point* p2 = flip_triangle.PointCW(eq);
+  if (p1 == nullptr || p2 == nullptr) {
+    throw std::runtime_error("FlipScanEdgeEvent - null on either of points");
+  }
+
+  Triangle& ot = *ot_ptr;
+  Point& op = *op_ptr;
+
+  if (InScanArea(eq, *p1, *p2, op)) {
    // flip with new edge op->eq
    FlipEdgeEvent(tcx, eq, op, &ot, op);
    // TODO: Actually I just figured out that it should be possible to
@ -775,7 +806,7 @@ void Sweep::FlipScanEdgeEvent(SweepContext& tcx, Point& ep, Point& eq, Triangle&
    // also need to set a new flip_triangle first
    // Turns out at first glance that this is somewhat complicated
    // so it will have to wait.
-  } else{
+  } else {
    Point& newP = NextFlipPoint(ep, eq, ot, op);
    FlipScanEdgeEvent(tcx, ep, eq, flip_triangle, ot, newP);
  }
@ -790,5 +821,4 @@ Sweep::~Sweep() {

 }

-}
-
+} // namespace p2t
--- a/external/poly2tri/sweep/sweep.h
+++ b/external/poly2tri/sweep/sweep.h
@ -33,7 +33,7 @@
 * Zalik, B.(2008)'Sweep-line algorithm for constrained Delaunay triangulation',
 * International Journal of Geographical Information Science
 *
- * "FlipScan" Constrained Edge Algorithm invented by Thomas ?hl?n, thahlen@gmail.com
+ * "FlipScan" Constrained Edge Algorithm invented by Thomas Åhlén, thahlen@gmail.com
 */

 #ifndef SWEEP_H
--- a/external/poly2tri/sweep/sweep_context.cc
+++ b/external/poly2tri/sweep/sweep_context.cc
@ -87,8 +87,8 @@ void SweepContext::InitTriangulation()

  double dx = kAlpha * (xmax - xmin);
  double dy = kAlpha * (ymax - ymin);
-  head_ = new Point(xmax + dx, ymin - dy);
-  tail_ = new Point(xmin - dx, ymin - dy);
+  head_ = new Point(xmin - dx, ymin - dy);
+  tail_ = new Point(xmax + dx, ymin - dy);

  // Sort points along y-axis
  std::sort(points_.begin(), points_.end(), cmp);
@ -114,17 +114,17 @@ void SweepContext::AddToMap(Triangle* triangle)
  map_.push_back(triangle);
 }

-Node& SweepContext::LocateNode(const Point& point)
+Node* SweepContext::LocateNode(const Point& point)
 {
  // TODO implement search tree
-  return *front_->LocateNode(point.x);
+  return front_->LocateNode(point.x);
 }

 void SweepContext::CreateAdvancingFront()
 {

  // Initial triangle
-  Triangle* triangle = new Triangle(*points_[0], *tail_, *head_);
+  Triangle* triangle = new Triangle(*points_[0], *head_, *tail_);

  map_.push_back(triangle);

@ -207,4 +207,4 @@ SweepContext::~SweepContext()

 }

-}
+} // namespace p2t
--- a/external/poly2tri/sweep/sweep_context.h
+++ b/external/poly2tri/sweep/sweep_context.h
@ -66,7 +66,7 @@ Point* tail() const;

 size_t point_count() const;

-Node& LocateNode(const Point& point);
+Node* LocateNode(const Point& point);

 void RemoveNode(Node* node);