/* auto-generated on 2023-12-07 12:42:28 -0500. Do not edit! */ /* including simdjson.h: */ /* begin file simdjson.h */ #ifndef SIMDJSON_H #define SIMDJSON_H /** * @mainpage * * Check the [README.md](https://github.com/simdjson/simdjson/blob/master/README.md#simdjson--parsing-gigabytes-of-json-per-second). * * Sample code. See https://github.com/simdjson/simdjson/blob/master/doc/basics.md for more examples. #include "simdjson.h" int main(void) { // load from `twitter.json` file: simdjson::dom::parser parser; simdjson::dom::element tweets = parser.load("twitter.json"); std::cout << tweets["search_metadata"]["count"] << " results." << std::endl; // Parse and iterate through an array of objects auto abstract_json = R"( [ { "12345" : {"a":12.34, "b":56.78, "c": 9998877} }, { "12545" : {"a":11.44, "b":12.78, "c": 11111111} } ] )"_padded; for (simdjson::dom::object obj : parser.parse(abstract_json)) { for(const auto key_value : obj) { cout << "key: " << key_value.key << " : "; simdjson::dom::object innerobj = key_value.value; cout << "a: " << double(innerobj["a"]) << ", "; cout << "b: " << double(innerobj["b"]) << ", "; cout << "c: " << int64_t(innerobj["c"]) << endl; } } } */ /* including simdjson/common_defs.h: #include "simdjson/common_defs.h" */ /* begin file simdjson/common_defs.h */ #ifndef SIMDJSON_COMMON_DEFS_H #define SIMDJSON_COMMON_DEFS_H #include /* including simdjson/compiler_check.h: #include "simdjson/compiler_check.h" */ /* begin file simdjson/compiler_check.h */ #ifndef SIMDJSON_COMPILER_CHECK_H #define SIMDJSON_COMPILER_CHECK_H #ifndef __cplusplus #error simdjson requires a C++ compiler #endif #ifndef SIMDJSON_CPLUSPLUS #if defined(_MSVC_LANG) && !defined(__clang__) #define SIMDJSON_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG) #else #define SIMDJSON_CPLUSPLUS __cplusplus #endif #endif // C++ 17 #if !defined(SIMDJSON_CPLUSPLUS17) && (SIMDJSON_CPLUSPLUS >= 201703L) #define SIMDJSON_CPLUSPLUS17 1 #endif // C++ 14 #if !defined(SIMDJSON_CPLUSPLUS14) && (SIMDJSON_CPLUSPLUS >= 201402L) #define SIMDJSON_CPLUSPLUS14 1 #endif // C++ 11 #if !defined(SIMDJSON_CPLUSPLUS11) && (SIMDJSON_CPLUSPLUS >= 201103L) #define SIMDJSON_CPLUSPLUS11 1 #endif #ifndef SIMDJSON_CPLUSPLUS11 #error simdjson requires a compiler compliant with the C++11 standard #endif #ifndef SIMDJSON_IF_CONSTEXPR #if SIMDJSON_CPLUSPLUS17 #define SIMDJSON_IF_CONSTEXPR if constexpr #else #define SIMDJSON_IF_CONSTEXPR if #endif #endif #endif // SIMDJSON_COMPILER_CHECK_H /* end file simdjson/compiler_check.h */ /* including simdjson/portability.h: #include "simdjson/portability.h" */ /* begin file simdjson/portability.h */ #ifndef SIMDJSON_PORTABILITY_H #define SIMDJSON_PORTABILITY_H #include #include #include #include #include #ifndef _WIN32 // strcasecmp, strncasecmp #include #endif #ifdef _MSC_VER #define SIMDJSON_VISUAL_STUDIO 1 /** * We want to differentiate carefully between * clang under visual studio and regular visual * studio. * * Under clang for Windows, we enable: * * target pragmas so that part and only part of the * code gets compiled for advanced instructions. * */ #ifdef __clang__ // clang under visual studio #define SIMDJSON_CLANG_VISUAL_STUDIO 1 #else // just regular visual studio (best guess) #define SIMDJSON_REGULAR_VISUAL_STUDIO 1 #endif // __clang__ #endif // _MSC_VER #if defined(__x86_64__) || defined(_M_AMD64) #define SIMDJSON_IS_X86_64 1 #elif defined(__aarch64__) || defined(_M_ARM64) #define SIMDJSON_IS_ARM64 1 #elif defined(__riscv) && __riscv_xlen == 64 #define SIMDJSON_IS_RISCV64 1 #elif defined(__PPC64__) || defined(_M_PPC64) #if defined(__ALTIVEC__) #define SIMDJSON_IS_PPC64_VMX 1 #endif // defined(__ALTIVEC__) #else #define SIMDJSON_IS_32BITS 1 #if defined(_M_IX86) || defined(__i386__) #define SIMDJSON_IS_X86_32BITS 1 #elif defined(__arm__) || defined(_M_ARM) #define SIMDJSON_IS_ARM_32BITS 1 #elif defined(__PPC__) || defined(_M_PPC) #define SIMDJSON_IS_PPC_32BITS 1 #endif #endif // defined(__x86_64__) || defined(_M_AMD64) #ifndef SIMDJSON_IS_32BITS #define SIMDJSON_IS_32BITS 0 #endif #if SIMDJSON_IS_32BITS #ifndef SIMDJSON_NO_PORTABILITY_WARNING // In the future, we should allow programmers // to get warning. #endif // SIMDJSON_NO_PORTABILITY_WARNING #endif // SIMDJSON_IS_32BITS #define SIMDJSON_CAT_IMPLEMENTATION_(a,...) a ## __VA_ARGS__ #define SIMDJSON_CAT(a,...) SIMDJSON_CAT_IMPLEMENTATION_(a, __VA_ARGS__) #define SIMDJSON_STRINGIFY_IMPLEMENTATION_(a,...) #a SIMDJSON_STRINGIFY(__VA_ARGS__) #define SIMDJSON_STRINGIFY(a,...) SIMDJSON_CAT_IMPLEMENTATION_(a, __VA_ARGS__) // this is almost standard? #undef SIMDJSON_STRINGIFY_IMPLEMENTATION_ #undef SIMDJSON_STRINGIFY #define SIMDJSON_STRINGIFY_IMPLEMENTATION_(a) #a #define SIMDJSON_STRINGIFY(a) SIMDJSON_STRINGIFY_IMPLEMENTATION_(a) // Our fast kernels require 64-bit systems. // // On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions. // Furthermore, the number of SIMD registers is reduced. // // On 32-bit ARM, we would have smaller registers. // // The simdjson users should still have the fallback kernel. It is // slower, but it should run everywhere. // // Enable valid runtime implementations, and select SIMDJSON_BUILTIN_IMPLEMENTATION // // We are going to use runtime dispatch. #if SIMDJSON_IS_X86_64 #ifdef __clang__ // clang does not have GCC push pop // warning: clang attribute push can't be used within a namespace in clang up // til 8.0 so SIMDJSON_TARGET_REGION and SIMDJSON_UNTARGET_REGION must be *outside* of a // namespace. #define SIMDJSON_TARGET_REGION(T) \ _Pragma(SIMDJSON_STRINGIFY( \ clang attribute push(__attribute__((target(T))), apply_to = function))) #define SIMDJSON_UNTARGET_REGION _Pragma("clang attribute pop") #elif defined(__GNUC__) // GCC is easier #define SIMDJSON_TARGET_REGION(T) \ _Pragma("GCC push_options") _Pragma(SIMDJSON_STRINGIFY(GCC target(T))) #define SIMDJSON_UNTARGET_REGION _Pragma("GCC pop_options") #endif // clang then gcc #endif // x86 // Default target region macros don't do anything. #ifndef SIMDJSON_TARGET_REGION #define SIMDJSON_TARGET_REGION(T) #define SIMDJSON_UNTARGET_REGION #endif // Is threading enabled? #if defined(_REENTRANT) || defined(_MT) #ifndef SIMDJSON_THREADS_ENABLED #define SIMDJSON_THREADS_ENABLED #endif #endif // workaround for large stack sizes under -O0. // https://github.com/simdjson/simdjson/issues/691 #ifdef __APPLE__ #ifndef __OPTIMIZE__ // Apple systems have small stack sizes in secondary threads. // Lack of compiler optimization may generate high stack usage. // Users may want to disable threads for safety, but only when // in debug mode which we detect by the fact that the __OPTIMIZE__ // macro is not defined. #undef SIMDJSON_THREADS_ENABLED #endif #endif #if defined(__clang__) #define SIMDJSON_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize("undefined"))) #elif defined(__GNUC__) #define SIMDJSON_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize_undefined)) #else #define SIMDJSON_NO_SANITIZE_UNDEFINED #endif #if defined(__clang__) || defined(__GNUC__) #if defined(__has_feature) # if __has_feature(memory_sanitizer) #define SIMDJSON_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory"))) # endif // if __has_feature(memory_sanitizer) #endif // defined(__has_feature) #endif // make sure it is defined as 'nothing' if it is unapplicable. #ifndef SIMDJSON_NO_SANITIZE_MEMORY #define SIMDJSON_NO_SANITIZE_MEMORY #endif #if SIMDJSON_VISUAL_STUDIO // This is one case where we do not distinguish between // regular visual studio and clang under visual studio. // clang under Windows has _stricmp (like visual studio) but not strcasecmp (as clang normally has) #define simdjson_strcasecmp _stricmp #define simdjson_strncasecmp _strnicmp #else // The strcasecmp, strncasecmp, and strcasestr functions do not work with multibyte strings (e.g. UTF-8). // So they are only useful for ASCII in our context. // https://www.gnu.org/software/libunistring/manual/libunistring.html#char-_002a-strings #define simdjson_strcasecmp strcasecmp #define simdjson_strncasecmp strncasecmp #endif #if defined(NDEBUG) || defined(__OPTIMIZE__) || (defined(_MSC_VER) && !defined(_DEBUG)) // If NDEBUG is set, or __OPTIMIZE__ is set, or we are under MSVC in release mode, // then do away with asserts and use __assume. #if SIMDJSON_VISUAL_STUDIO #define SIMDJSON_UNREACHABLE() __assume(0) #define SIMDJSON_ASSUME(COND) __assume(COND) #else #define SIMDJSON_UNREACHABLE() __builtin_unreachable(); #define SIMDJSON_ASSUME(COND) do { if (!(COND)) __builtin_unreachable(); } while (0) #endif #else // defined(NDEBUG) || defined(__OPTIMIZE__) || (defined(_MSC_VER) && !defined(_DEBUG)) // This should only ever be enabled in debug mode. #define SIMDJSON_UNREACHABLE() assert(0); #define SIMDJSON_ASSUME(COND) assert(COND) #endif #endif // SIMDJSON_PORTABILITY_H /* end file simdjson/portability.h */ namespace simdjson { namespace internal { /** * @private * Our own implementation of the C++17 to_chars function. * Defined in src/to_chars */ char *to_chars(char *first, const char *last, double value); /** * @private * A number parsing routine. * Defined in src/from_chars */ double from_chars(const char *first) noexcept; double from_chars(const char *first, const char* end) noexcept; } #ifndef SIMDJSON_EXCEPTIONS #if __cpp_exceptions #define SIMDJSON_EXCEPTIONS 1 #else #define SIMDJSON_EXCEPTIONS 0 #endif #endif } // namespace simdjson #if defined(__GNUC__) // Marks a block with a name so that MCA analysis can see it. #define SIMDJSON_BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name); #define SIMDJSON_END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name); #define SIMDJSON_DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name); #else #define SIMDJSON_BEGIN_DEBUG_BLOCK(name) #define SIMDJSON_END_DEBUG_BLOCK(name) #define SIMDJSON_DEBUG_BLOCK(name, block) #endif // Align to N-byte boundary #define SIMDJSON_ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1)) #define SIMDJSON_ROUNDDOWN_N(a, n) ((a) & ~((n)-1)) #define SIMDJSON_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0) #if SIMDJSON_REGULAR_VISUAL_STUDIO #define simdjson_really_inline __forceinline #define simdjson_never_inline __declspec(noinline) #define simdjson_unused #define simdjson_warn_unused #ifndef simdjson_likely #define simdjson_likely(x) x #endif #ifndef simdjson_unlikely #define simdjson_unlikely(x) x #endif #define SIMDJSON_PUSH_DISABLE_WARNINGS __pragma(warning( push )) #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS __pragma(warning( push, 0 )) #define SIMDJSON_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER )) // Get rid of Intellisense-only warnings (Code Analysis) // Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910). #ifdef __has_include #if __has_include() #include #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS) #endif #endif #ifndef SIMDJSON_DISABLE_UNDESIRED_WARNINGS #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS #endif #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_VS_WARNING(4996) #define SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING #define SIMDJSON_POP_DISABLE_WARNINGS __pragma(warning( pop )) #define SIMDJSON_PUSH_DISABLE_UNUSED_WARNINGS #define SIMDJSON_POP_DISABLE_UNUSED_WARNINGS #else // SIMDJSON_REGULAR_VISUAL_STUDIO #define simdjson_really_inline inline __attribute__((always_inline)) #define simdjson_never_inline inline __attribute__((noinline)) #define simdjson_unused __attribute__((unused)) #define simdjson_warn_unused __attribute__((warn_unused_result)) #ifndef simdjson_likely #define simdjson_likely(x) __builtin_expect(!!(x), 1) #endif #ifndef simdjson_unlikely #define simdjson_unlikely(x) __builtin_expect(!!(x), 0) #endif #define SIMDJSON_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push") // gcc doesn't seem to disable all warnings with all and extra, add warnings here as necessary // We do it separately for clang since it has different warnings. #ifdef __clang__ // clang is missing -Wmaybe-uninitialized. #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ SIMDJSON_DISABLE_GCC_WARNING(-Weffc++) \ SIMDJSON_DISABLE_GCC_WARNING(-Wall) \ SIMDJSON_DISABLE_GCC_WARNING(-Wconversion) \ SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \ SIMDJSON_DISABLE_GCC_WARNING(-Wattributes) \ SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ SIMDJSON_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ SIMDJSON_DISABLE_GCC_WARNING(-Wreturn-type) \ SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \ SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \ SIMDJSON_DISABLE_GCC_WARNING(-Wunused-variable) #else // __clang__ #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ SIMDJSON_DISABLE_GCC_WARNING(-Weffc++) \ SIMDJSON_DISABLE_GCC_WARNING(-Wall) \ SIMDJSON_DISABLE_GCC_WARNING(-Wconversion) \ SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \ SIMDJSON_DISABLE_GCC_WARNING(-Wattributes) \ SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ SIMDJSON_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ SIMDJSON_DISABLE_GCC_WARNING(-Wreturn-type) \ SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \ SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \ SIMDJSON_DISABLE_GCC_WARNING(-Wunused-variable) \ SIMDJSON_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) \ SIMDJSON_DISABLE_GCC_WARNING(-Wformat-security) #endif // __clang__ #define SIMDJSON_PRAGMA(P) _Pragma(#P) #define SIMDJSON_DISABLE_GCC_WARNING(WARNING) SIMDJSON_PRAGMA(GCC diagnostic ignored #WARNING) #if SIMDJSON_CLANG_VISUAL_STUDIO #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_GCC_WARNING(-Wmicrosoft-include) #else #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS #endif #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wdeprecated-declarations) #define SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wstrict-overflow) #define SIMDJSON_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop") #define SIMDJSON_PUSH_DISABLE_UNUSED_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ SIMDJSON_DISABLE_GCC_WARNING(-Wunused) #define SIMDJSON_POP_DISABLE_UNUSED_WARNINGS SIMDJSON_POP_DISABLE_WARNINGS #endif // MSC_VER #if defined(simdjson_inline) // Prefer the user's definition of simdjson_inline; don't define it ourselves. #elif defined(__GNUC__) && !defined(__OPTIMIZE__) // If optimizations are disabled, forcing inlining can lead to significant // code bloat and high compile times. Don't use simdjson_really_inline for // unoptimized builds. #define simdjson_inline inline #else // Force inlining for most simdjson functions. #define simdjson_inline simdjson_really_inline #endif #if SIMDJSON_VISUAL_STUDIO /** * Windows users need to do some extra work when building * or using a dynamic library (DLL). When building, we need * to set SIMDJSON_DLLIMPORTEXPORT to __declspec(dllexport). * When *using* the DLL, the user needs to set * SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport). * * Static libraries not need require such work. * * It does not matter here whether you are using * the regular visual studio or clang under visual * studio, you still need to handle these issues. * * Non-Windows systems do not have this complexity. */ #if SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY // We set SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY when we build a DLL under Windows. // It should never happen that both SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY and // SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY are set. #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllexport) #elif SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY // Windows user who call a dynamic library should set SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY to 1. #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport) #else // We assume by default static linkage #define SIMDJSON_DLLIMPORTEXPORT #endif /** * Workaround for the vcpkg package manager. Only vcpkg should * ever touch the next line. The SIMDJSON_USING_LIBRARY macro is otherwise unused. */ #if SIMDJSON_USING_LIBRARY #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport) #endif /** * End of workaround for the vcpkg package manager. */ #else #define SIMDJSON_DLLIMPORTEXPORT #endif // C++17 requires string_view. #if SIMDJSON_CPLUSPLUS17 #define SIMDJSON_HAS_STRING_VIEW #include // by the standard, this has to be safe. #endif // This macro (__cpp_lib_string_view) has to be defined // for C++17 and better, but if it is otherwise defined, // we are going to assume that string_view is available // even if we do not have C++17 support. #ifdef __cpp_lib_string_view #define SIMDJSON_HAS_STRING_VIEW #endif // Some systems have string_view even if we do not have C++17 support, // and even if __cpp_lib_string_view is undefined, it is the case // with Apple clang version 11. // We must handle it. *This is important.* #ifndef SIMDJSON_HAS_STRING_VIEW #if defined __has_include // do not combine the next #if with the previous one (unsafe) #if __has_include () // now it is safe to trigger the include #include // though the file is there, it does not follow that we got the implementation #if defined(_LIBCPP_STRING_VIEW) // Ah! So we under libc++ which under its Library Fundamentals Technical Specification, which preceded C++17, // included string_view. // This means that we have string_view *even though* we may not have C++17. #define SIMDJSON_HAS_STRING_VIEW #endif // _LIBCPP_STRING_VIEW #endif // __has_include () #endif // defined __has_include #endif // def SIMDJSON_HAS_STRING_VIEW // end of complicated but important routine to try to detect string_view. // // Backfill std::string_view using nonstd::string_view on systems where // we expect that string_view is missing. Important: if we get this wrong, // we will end up with two string_view definitions and potential trouble. // That is why we work so hard above to avoid it. // #ifndef SIMDJSON_HAS_STRING_VIEW SIMDJSON_PUSH_DISABLE_ALL_WARNINGS /* including simdjson/nonstd/string_view.hpp: #include "simdjson/nonstd/string_view.hpp" */ /* begin file simdjson/nonstd/string_view.hpp */ // Copyright 2017-2020 by Martin Moene // // string-view lite, a C++17-like string_view for C++98 and later. // For more information see https://github.com/martinmoene/string-view-lite // // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt) #pragma once #ifndef NONSTD_SV_LITE_H_INCLUDED #define NONSTD_SV_LITE_H_INCLUDED #define string_view_lite_MAJOR 1 #define string_view_lite_MINOR 7 #define string_view_lite_PATCH 0 #define string_view_lite_VERSION nssv_STRINGIFY(string_view_lite_MAJOR) "." nssv_STRINGIFY(string_view_lite_MINOR) "." nssv_STRINGIFY(string_view_lite_PATCH) #define nssv_STRINGIFY( x ) nssv_STRINGIFY_( x ) #define nssv_STRINGIFY_( x ) #x // string-view lite configuration: #define nssv_STRING_VIEW_DEFAULT 0 #define nssv_STRING_VIEW_NONSTD 1 #define nssv_STRING_VIEW_STD 2 // tweak header support: #ifdef __has_include # if __has_include() # include # endif #define nssv_HAVE_TWEAK_HEADER 1 #else #define nssv_HAVE_TWEAK_HEADER 0 //# pragma message("string_view.hpp: Note: Tweak header not supported.") #endif // string_view selection and configuration: #if !defined( nssv_CONFIG_SELECT_STRING_VIEW ) # define nssv_CONFIG_SELECT_STRING_VIEW ( nssv_HAVE_STD_STRING_VIEW ? nssv_STRING_VIEW_STD : nssv_STRING_VIEW_NONSTD ) #endif #ifndef nssv_CONFIG_STD_SV_OPERATOR # define nssv_CONFIG_STD_SV_OPERATOR 0 #endif #ifndef nssv_CONFIG_USR_SV_OPERATOR # define nssv_CONFIG_USR_SV_OPERATOR 1 #endif #ifdef nssv_CONFIG_CONVERSION_STD_STRING # define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS nssv_CONFIG_CONVERSION_STD_STRING # define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS nssv_CONFIG_CONVERSION_STD_STRING #endif #ifndef nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS # define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS 1 #endif #ifndef nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS # define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS 1 #endif #ifndef nssv_CONFIG_NO_STREAM_INSERTION # define nssv_CONFIG_NO_STREAM_INSERTION 0 #endif // Control presence of exception handling (try and auto discover): #ifndef nssv_CONFIG_NO_EXCEPTIONS # if defined(_MSC_VER) # include // for _HAS_EXCEPTIONS # endif # if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || (_HAS_EXCEPTIONS) # define nssv_CONFIG_NO_EXCEPTIONS 0 # else # define nssv_CONFIG_NO_EXCEPTIONS 1 # endif #endif // C++ language version detection (C++23 is speculative): // Note: VC14.0/1900 (VS2015) lacks too much from C++14. #ifndef nssv_CPLUSPLUS # if defined(_MSVC_LANG ) && !defined(__clang__) # define nssv_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG ) # else # define nssv_CPLUSPLUS __cplusplus # endif #endif #define nssv_CPP98_OR_GREATER ( nssv_CPLUSPLUS >= 199711L ) #define nssv_CPP11_OR_GREATER ( nssv_CPLUSPLUS >= 201103L ) #define nssv_CPP11_OR_GREATER_ ( nssv_CPLUSPLUS >= 201103L ) #define nssv_CPP14_OR_GREATER ( nssv_CPLUSPLUS >= 201402L ) #define nssv_CPP17_OR_GREATER ( nssv_CPLUSPLUS >= 201703L ) #define nssv_CPP20_OR_GREATER ( nssv_CPLUSPLUS >= 202002L ) #define nssv_CPP23_OR_GREATER ( nssv_CPLUSPLUS >= 202300L ) // use C++17 std::string_view if available and requested: #if nssv_CPP17_OR_GREATER && defined(__has_include ) # if __has_include( ) # define nssv_HAVE_STD_STRING_VIEW 1 # else # define nssv_HAVE_STD_STRING_VIEW 0 # endif #else # define nssv_HAVE_STD_STRING_VIEW 0 #endif #define nssv_USES_STD_STRING_VIEW ( (nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_STD) || ((nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_DEFAULT) && nssv_HAVE_STD_STRING_VIEW) ) #define nssv_HAVE_STARTS_WITH ( nssv_CPP20_OR_GREATER || !nssv_USES_STD_STRING_VIEW ) #define nssv_HAVE_ENDS_WITH nssv_HAVE_STARTS_WITH // // Use C++17 std::string_view: // #if nssv_USES_STD_STRING_VIEW #include // Extensions for std::string: #if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS namespace nonstd { template< class CharT, class Traits, class Allocator = std::allocator > std::basic_string to_string( std::basic_string_view v, Allocator const & a = Allocator() ) { return std::basic_string( v.begin(), v.end(), a ); } template< class CharT, class Traits, class Allocator > std::basic_string_view to_string_view( std::basic_string const & s ) { return std::basic_string_view( s.data(), s.size() ); } // Literal operators sv and _sv: #if nssv_CONFIG_STD_SV_OPERATOR using namespace std::literals::string_view_literals; #endif #if nssv_CONFIG_USR_SV_OPERATOR inline namespace literals { inline namespace string_view_literals { constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1) { return std::string_view{ str, len }; } constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2) { return std::u16string_view{ str, len }; } constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3) { return std::u32string_view{ str, len }; } constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4) { return std::wstring_view{ str, len }; } }} // namespace literals::string_view_literals #endif // nssv_CONFIG_USR_SV_OPERATOR } // namespace nonstd #endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS namespace nonstd { using std::string_view; using std::wstring_view; using std::u16string_view; using std::u32string_view; using std::basic_string_view; // literal "sv" and "_sv", see above using std::operator==; using std::operator!=; using std::operator<; using std::operator<=; using std::operator>; using std::operator>=; using std::operator<<; } // namespace nonstd #else // nssv_HAVE_STD_STRING_VIEW // // Before C++17: use string_view lite: // // Compiler versions: // // MSVC++ 6.0 _MSC_VER == 1200 nssv_COMPILER_MSVC_VERSION == 60 (Visual Studio 6.0) // MSVC++ 7.0 _MSC_VER == 1300 nssv_COMPILER_MSVC_VERSION == 70 (Visual Studio .NET 2002) // MSVC++ 7.1 _MSC_VER == 1310 nssv_COMPILER_MSVC_VERSION == 71 (Visual Studio .NET 2003) // MSVC++ 8.0 _MSC_VER == 1400 nssv_COMPILER_MSVC_VERSION == 80 (Visual Studio 2005) // MSVC++ 9.0 _MSC_VER == 1500 nssv_COMPILER_MSVC_VERSION == 90 (Visual Studio 2008) // MSVC++ 10.0 _MSC_VER == 1600 nssv_COMPILER_MSVC_VERSION == 100 (Visual Studio 2010) // MSVC++ 11.0 _MSC_VER == 1700 nssv_COMPILER_MSVC_VERSION == 110 (Visual Studio 2012) // MSVC++ 12.0 _MSC_VER == 1800 nssv_COMPILER_MSVC_VERSION == 120 (Visual Studio 2013) // MSVC++ 14.0 _MSC_VER == 1900 nssv_COMPILER_MSVC_VERSION == 140 (Visual Studio 2015) // MSVC++ 14.1 _MSC_VER >= 1910 nssv_COMPILER_MSVC_VERSION == 141 (Visual Studio 2017) // MSVC++ 14.2 _MSC_VER >= 1920 nssv_COMPILER_MSVC_VERSION == 142 (Visual Studio 2019) #if defined(_MSC_VER ) && !defined(__clang__) # define nssv_COMPILER_MSVC_VER (_MSC_VER ) # define nssv_COMPILER_MSVC_VERSION (_MSC_VER / 10 - 10 * ( 5 + (_MSC_VER < 1900 ) ) ) #else # define nssv_COMPILER_MSVC_VER 0 # define nssv_COMPILER_MSVC_VERSION 0 #endif #define nssv_COMPILER_VERSION( major, minor, patch ) ( 10 * ( 10 * (major) + (minor) ) + (patch) ) #if defined( __apple_build_version__ ) # define nssv_COMPILER_APPLECLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) # define nssv_COMPILER_CLANG_VERSION 0 #elif defined( __clang__ ) # define nssv_COMPILER_APPLECLANG_VERSION 0 # define nssv_COMPILER_CLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) #else # define nssv_COMPILER_APPLECLANG_VERSION 0 # define nssv_COMPILER_CLANG_VERSION 0 #endif #if defined(__GNUC__) && !defined(__clang__) # define nssv_COMPILER_GNUC_VERSION nssv_COMPILER_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) #else # define nssv_COMPILER_GNUC_VERSION 0 #endif // half-open range [lo..hi): #define nssv_BETWEEN( v, lo, hi ) ( (lo) <= (v) && (v) < (hi) ) // Presence of language and library features: #ifdef _HAS_CPP0X # define nssv_HAS_CPP0X _HAS_CPP0X #else # define nssv_HAS_CPP0X 0 #endif // Unless defined otherwise below, consider VC14 as C++11 for variant-lite: #if nssv_COMPILER_MSVC_VER >= 1900 # undef nssv_CPP11_OR_GREATER # define nssv_CPP11_OR_GREATER 1 #endif #define nssv_CPP11_90 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1500) #define nssv_CPP11_100 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1600) #define nssv_CPP11_110 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1700) #define nssv_CPP11_120 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1800) #define nssv_CPP11_140 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1900) #define nssv_CPP11_141 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1910) #define nssv_CPP14_000 (nssv_CPP14_OR_GREATER) #define nssv_CPP17_000 (nssv_CPP17_OR_GREATER) // Presence of C++11 language features: #define nssv_HAVE_CONSTEXPR_11 nssv_CPP11_140 #define nssv_HAVE_EXPLICIT_CONVERSION nssv_CPP11_140 #define nssv_HAVE_INLINE_NAMESPACE nssv_CPP11_140 #define nssv_HAVE_IS_DEFAULT nssv_CPP11_140 #define nssv_HAVE_IS_DELETE nssv_CPP11_140 #define nssv_HAVE_NOEXCEPT nssv_CPP11_140 #define nssv_HAVE_NULLPTR nssv_CPP11_100 #define nssv_HAVE_REF_QUALIFIER nssv_CPP11_140 #define nssv_HAVE_UNICODE_LITERALS nssv_CPP11_140 #define nssv_HAVE_USER_DEFINED_LITERALS nssv_CPP11_140 #define nssv_HAVE_WCHAR16_T nssv_CPP11_100 #define nssv_HAVE_WCHAR32_T nssv_CPP11_100 #if ! ( ( nssv_CPP11_OR_GREATER && nssv_COMPILER_CLANG_VERSION ) || nssv_BETWEEN( nssv_COMPILER_CLANG_VERSION, 300, 400 ) ) # define nssv_HAVE_STD_DEFINED_LITERALS nssv_CPP11_140 #else # define nssv_HAVE_STD_DEFINED_LITERALS 0 #endif // Presence of C++14 language features: #define nssv_HAVE_CONSTEXPR_14 nssv_CPP14_000 // Presence of C++17 language features: #define nssv_HAVE_NODISCARD nssv_CPP17_000 // Presence of C++ library features: #define nssv_HAVE_STD_HASH nssv_CPP11_120 // Presence of compiler intrinsics: // Providing char-type specializations for compare() and length() that // use compiler intrinsics can improve compile- and run-time performance. // // The challenge is in using the right combinations of builtin availability // and its constexpr-ness. // // | compiler | __builtin_memcmp (constexpr) | memcmp (constexpr) | // |----------|------------------------------|---------------------| // | clang | 4.0 (>= 4.0 ) | any (? ) | // | clang-a | 9.0 (>= 9.0 ) | any (? ) | // | gcc | any (constexpr) | any (? ) | // | msvc | >= 14.2 C++17 (>= 14.2 ) | any (? ) | #define nssv_HAVE_BUILTIN_VER ( (nssv_CPP17_000 && nssv_COMPILER_MSVC_VERSION >= 142) || nssv_COMPILER_GNUC_VERSION > 0 || nssv_COMPILER_CLANG_VERSION >= 400 || nssv_COMPILER_APPLECLANG_VERSION >= 900 ) #define nssv_HAVE_BUILTIN_CE ( nssv_HAVE_BUILTIN_VER ) #define nssv_HAVE_BUILTIN_MEMCMP ( (nssv_HAVE_CONSTEXPR_14 && nssv_HAVE_BUILTIN_CE) || !nssv_HAVE_CONSTEXPR_14 ) #define nssv_HAVE_BUILTIN_STRLEN ( (nssv_HAVE_CONSTEXPR_11 && nssv_HAVE_BUILTIN_CE) || !nssv_HAVE_CONSTEXPR_11 ) #ifdef __has_builtin # define nssv_HAVE_BUILTIN( x ) __has_builtin( x ) #else # define nssv_HAVE_BUILTIN( x ) 0 #endif #if nssv_HAVE_BUILTIN(__builtin_memcmp) || nssv_HAVE_BUILTIN_VER # define nssv_BUILTIN_MEMCMP __builtin_memcmp #else # define nssv_BUILTIN_MEMCMP memcmp #endif #if nssv_HAVE_BUILTIN(__builtin_strlen) || nssv_HAVE_BUILTIN_VER # define nssv_BUILTIN_STRLEN __builtin_strlen #else # define nssv_BUILTIN_STRLEN strlen #endif // C++ feature usage: #if nssv_HAVE_CONSTEXPR_11 # define nssv_constexpr constexpr #else # define nssv_constexpr /*constexpr*/ #endif #if nssv_HAVE_CONSTEXPR_14 # define nssv_constexpr14 constexpr #else # define nssv_constexpr14 /*constexpr*/ #endif #if nssv_HAVE_EXPLICIT_CONVERSION # define nssv_explicit explicit #else # define nssv_explicit /*explicit*/ #endif #if nssv_HAVE_INLINE_NAMESPACE # define nssv_inline_ns inline #else # define nssv_inline_ns /*inline*/ #endif #if nssv_HAVE_NOEXCEPT # define nssv_noexcept noexcept #else # define nssv_noexcept /*noexcept*/ #endif //#if nssv_HAVE_REF_QUALIFIER //# define nssv_ref_qual & //# define nssv_refref_qual && //#else //# define nssv_ref_qual /*&*/ //# define nssv_refref_qual /*&&*/ //#endif #if nssv_HAVE_NULLPTR # define nssv_nullptr nullptr #else # define nssv_nullptr NULL #endif #if nssv_HAVE_NODISCARD # define nssv_nodiscard [[nodiscard]] #else # define nssv_nodiscard /*[[nodiscard]]*/ #endif // Additional includes: #include #include #include #include #include // std::char_traits<> #if ! nssv_CONFIG_NO_STREAM_INSERTION # include #endif #if ! nssv_CONFIG_NO_EXCEPTIONS # include #endif #if nssv_CPP11_OR_GREATER # include #endif // Clang, GNUC, MSVC warning suppression macros: #if defined(__clang__) # pragma clang diagnostic ignored "-Wreserved-user-defined-literal" # pragma clang diagnostic push # pragma clang diagnostic ignored "-Wuser-defined-literals" #elif defined(__GNUC__) # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wliteral-suffix" #endif // __clang__ #if nssv_COMPILER_MSVC_VERSION >= 140 # define nssv_SUPPRESS_MSGSL_WARNING(expr) [[gsl::suppress(expr)]] # define nssv_SUPPRESS_MSVC_WARNING(code, descr) __pragma(warning(suppress: code) ) # define nssv_DISABLE_MSVC_WARNINGS(codes) __pragma(warning(push)) __pragma(warning(disable: codes)) #else # define nssv_SUPPRESS_MSGSL_WARNING(expr) # define nssv_SUPPRESS_MSVC_WARNING(code, descr) # define nssv_DISABLE_MSVC_WARNINGS(codes) #endif #if defined(__clang__) # define nssv_RESTORE_WARNINGS() _Pragma("clang diagnostic pop") #elif defined(__GNUC__) # define nssv_RESTORE_WARNINGS() _Pragma("GCC diagnostic pop") #elif nssv_COMPILER_MSVC_VERSION >= 140 # define nssv_RESTORE_WARNINGS() __pragma(warning(pop )) #else # define nssv_RESTORE_WARNINGS() #endif // Suppress the following MSVC (GSL) warnings: // - C4455, non-gsl : 'operator ""sv': literal suffix identifiers that do not // start with an underscore are reserved // - C26472, gsl::t.1 : don't use a static_cast for arithmetic conversions; // use brace initialization, gsl::narrow_cast or gsl::narow // - C26481: gsl::b.1 : don't use pointer arithmetic. Use span instead nssv_DISABLE_MSVC_WARNINGS( 4455 26481 26472 ) //nssv_DISABLE_CLANG_WARNINGS( "-Wuser-defined-literals" ) //nssv_DISABLE_GNUC_WARNINGS( -Wliteral-suffix ) namespace nonstd { namespace sv_lite { // // basic_string_view declaration: // template < class CharT, class Traits = std::char_traits > class basic_string_view; namespace detail { // support constexpr comparison in C++14; // for C++17 and later, use provided traits: template< typename CharT > inline nssv_constexpr14 int compare( CharT const * s1, CharT const * s2, std::size_t count ) { while ( count-- != 0 ) { if ( *s1 < *s2 ) return -1; if ( *s1 > *s2 ) return +1; ++s1; ++s2; } return 0; } #if nssv_HAVE_BUILTIN_MEMCMP // specialization of compare() for char, see also generic compare() above: inline nssv_constexpr14 int compare( char const * s1, char const * s2, std::size_t count ) { return nssv_BUILTIN_MEMCMP( s1, s2, count ); } #endif #if nssv_HAVE_BUILTIN_STRLEN // specialization of length() for char, see also generic length() further below: inline nssv_constexpr std::size_t length( char const * s ) { return nssv_BUILTIN_STRLEN( s ); } #endif #if defined(__OPTIMIZE__) // gcc, clang provide __OPTIMIZE__ // Expect tail call optimization to make length() non-recursive: template< typename CharT > inline nssv_constexpr std::size_t length( CharT * s, std::size_t result = 0 ) { return *s == '\0' ? result : length( s + 1, result + 1 ); } #else // OPTIMIZE // non-recursive: template< typename CharT > inline nssv_constexpr14 std::size_t length( CharT * s ) { std::size_t result = 0; while ( *s++ != '\0' ) { ++result; } return result; } #endif // OPTIMIZE #if nssv_CPP11_OR_GREATER && ! nssv_CPP17_OR_GREATER #if defined(__OPTIMIZE__) // gcc, clang provide __OPTIMIZE__ // Expect tail call optimization to make search() non-recursive: template< class CharT, class Traits = std::char_traits > constexpr const CharT* search( basic_string_view haystack, basic_string_view needle ) { return haystack.starts_with( needle ) ? haystack.begin() : haystack.empty() ? haystack.end() : search( haystack.substr(1), needle ); } #else // OPTIMIZE // non-recursive: template< class CharT, class Traits = std::char_traits > constexpr const CharT* search( basic_string_view haystack, basic_string_view needle ) { return std::search( haystack.begin(), haystack.end(), needle.begin(), needle.end() ); } #endif // OPTIMIZE #endif // nssv_CPP11_OR_GREATER && ! nssv_CPP17_OR_GREATER } // namespace detail // // basic_string_view: // template < class CharT, class Traits /* = std::char_traits */ > class basic_string_view { public: // Member types: typedef Traits traits_type; typedef CharT value_type; typedef CharT * pointer; typedef CharT const * const_pointer; typedef CharT & reference; typedef CharT const & const_reference; typedef const_pointer iterator; typedef const_pointer const_iterator; typedef std::reverse_iterator< const_iterator > reverse_iterator; typedef std::reverse_iterator< const_iterator > const_reverse_iterator; typedef std::size_t size_type; typedef std::ptrdiff_t difference_type; // 24.4.2.1 Construction and assignment: nssv_constexpr basic_string_view() nssv_noexcept : data_( nssv_nullptr ) , size_( 0 ) {} #if nssv_CPP11_OR_GREATER nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept = default; #else nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept : data_( other.data_) , size_( other.size_) {} #endif nssv_constexpr basic_string_view( CharT const * s, size_type count ) nssv_noexcept // non-standard noexcept : data_( s ) , size_( count ) {} nssv_constexpr basic_string_view( CharT const * s) nssv_noexcept // non-standard noexcept : data_( s ) #if nssv_CPP17_OR_GREATER , size_( Traits::length(s) ) #elif nssv_CPP11_OR_GREATER , size_( detail::length(s) ) #else , size_( Traits::length(s) ) #endif {} #if nssv_HAVE_NULLPTR # if nssv_HAVE_IS_DELETE nssv_constexpr basic_string_view( std::nullptr_t ) nssv_noexcept = delete; # else private: nssv_constexpr basic_string_view( std::nullptr_t ) nssv_noexcept; public: # endif #endif // Assignment: #if nssv_CPP11_OR_GREATER nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept = default; #else nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept { data_ = other.data_; size_ = other.size_; return *this; } #endif // 24.4.2.2 Iterator support: nssv_constexpr const_iterator begin() const nssv_noexcept { return data_; } nssv_constexpr const_iterator end() const nssv_noexcept { return data_ + size_; } nssv_constexpr const_iterator cbegin() const nssv_noexcept { return begin(); } nssv_constexpr const_iterator cend() const nssv_noexcept { return end(); } nssv_constexpr const_reverse_iterator rbegin() const nssv_noexcept { return const_reverse_iterator( end() ); } nssv_constexpr const_reverse_iterator rend() const nssv_noexcept { return const_reverse_iterator( begin() ); } nssv_constexpr const_reverse_iterator crbegin() const nssv_noexcept { return rbegin(); } nssv_constexpr const_reverse_iterator crend() const nssv_noexcept { return rend(); } // 24.4.2.3 Capacity: nssv_constexpr size_type size() const nssv_noexcept { return size_; } nssv_constexpr size_type length() const nssv_noexcept { return size_; } nssv_constexpr size_type max_size() const nssv_noexcept { return (std::numeric_limits< size_type >::max)(); } // since C++20 nssv_nodiscard nssv_constexpr bool empty() const nssv_noexcept { return 0 == size_; } // 24.4.2.4 Element access: nssv_constexpr const_reference operator[]( size_type pos ) const { return data_at( pos ); } nssv_constexpr14 const_reference at( size_type pos ) const { #if nssv_CONFIG_NO_EXCEPTIONS assert( pos < size() ); #else if ( pos >= size() ) { throw std::out_of_range("nonstd::string_view::at()"); } #endif return data_at( pos ); } nssv_constexpr const_reference front() const { return data_at( 0 ); } nssv_constexpr const_reference back() const { return data_at( size() - 1 ); } nssv_constexpr const_pointer data() const nssv_noexcept { return data_; } // 24.4.2.5 Modifiers: nssv_constexpr14 void remove_prefix( size_type n ) { assert( n <= size() ); data_ += n; size_ -= n; } nssv_constexpr14 void remove_suffix( size_type n ) { assert( n <= size() ); size_ -= n; } nssv_constexpr14 void swap( basic_string_view & other ) nssv_noexcept { const basic_string_view tmp(other); other = *this; *this = tmp; } // 24.4.2.6 String operations: size_type copy( CharT * dest, size_type n, size_type pos = 0 ) const { #if nssv_CONFIG_NO_EXCEPTIONS assert( pos <= size() ); #else if ( pos > size() ) { throw std::out_of_range("nonstd::string_view::copy()"); } #endif const size_type rlen = (std::min)( n, size() - pos ); (void) Traits::copy( dest, data() + pos, rlen ); return rlen; } nssv_constexpr14 basic_string_view substr( size_type pos = 0, size_type n = npos ) const { #if nssv_CONFIG_NO_EXCEPTIONS assert( pos <= size() ); #else if ( pos > size() ) { throw std::out_of_range("nonstd::string_view::substr()"); } #endif return basic_string_view( data() + pos, (std::min)( n, size() - pos ) ); } // compare(), 6x: nssv_constexpr14 int compare( basic_string_view other ) const nssv_noexcept // (1) { #if nssv_CPP17_OR_GREATER if ( const int result = Traits::compare( data(), other.data(), (std::min)( size(), other.size() ) ) ) #else if ( const int result = detail::compare( data(), other.data(), (std::min)( size(), other.size() ) ) ) #endif { return result; } return size() == other.size() ? 0 : size() < other.size() ? -1 : 1; } nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other ) const // (2) { return substr( pos1, n1 ).compare( other ); } nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other, size_type pos2, size_type n2 ) const // (3) { return substr( pos1, n1 ).compare( other.substr( pos2, n2 ) ); } nssv_constexpr int compare( CharT const * s ) const // (4) { return compare( basic_string_view( s ) ); } nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s ) const // (5) { return substr( pos1, n1 ).compare( basic_string_view( s ) ); } nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s, size_type n2 ) const // (6) { return substr( pos1, n1 ).compare( basic_string_view( s, n2 ) ); } // 24.4.2.7 Searching: // starts_with(), 3x, since C++20: nssv_constexpr bool starts_with( basic_string_view v ) const nssv_noexcept // (1) { return size() >= v.size() && compare( 0, v.size(), v ) == 0; } nssv_constexpr bool starts_with( CharT c ) const nssv_noexcept // (2) { return starts_with( basic_string_view( &c, 1 ) ); } nssv_constexpr bool starts_with( CharT const * s ) const // (3) { return starts_with( basic_string_view( s ) ); } // ends_with(), 3x, since C++20: nssv_constexpr bool ends_with( basic_string_view v ) const nssv_noexcept // (1) { return size() >= v.size() && compare( size() - v.size(), npos, v ) == 0; } nssv_constexpr bool ends_with( CharT c ) const nssv_noexcept // (2) { return ends_with( basic_string_view( &c, 1 ) ); } nssv_constexpr bool ends_with( CharT const * s ) const // (3) { return ends_with( basic_string_view( s ) ); } // find(), 4x: nssv_constexpr size_type find( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) { return assert( v.size() == 0 || v.data() != nssv_nullptr ) , pos >= size() ? npos : to_pos( #if nssv_CPP11_OR_GREATER && ! nssv_CPP17_OR_GREATER detail::search( substr(pos), v ) #else std::search( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) #endif ); } nssv_constexpr size_type find( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) { return find( basic_string_view( &c, 1 ), pos ); } nssv_constexpr size_type find( CharT const * s, size_type pos, size_type n ) const // (3) { return find( basic_string_view( s, n ), pos ); } nssv_constexpr size_type find( CharT const * s, size_type pos = 0 ) const // (4) { return find( basic_string_view( s ), pos ); } // rfind(), 4x: nssv_constexpr14 size_type rfind( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) { if ( size() < v.size() ) { return npos; } if ( v.empty() ) { return (std::min)( size(), pos ); } const_iterator last = cbegin() + (std::min)( size() - v.size(), pos ) + v.size(); const_iterator result = std::find_end( cbegin(), last, v.cbegin(), v.cend(), Traits::eq ); return result != last ? size_type( result - cbegin() ) : npos; } nssv_constexpr14 size_type rfind( CharT c, size_type pos = npos ) const nssv_noexcept // (2) { return rfind( basic_string_view( &c, 1 ), pos ); } nssv_constexpr14 size_type rfind( CharT const * s, size_type pos, size_type n ) const // (3) { return rfind( basic_string_view( s, n ), pos ); } nssv_constexpr14 size_type rfind( CharT const * s, size_type pos = npos ) const // (4) { return rfind( basic_string_view( s ), pos ); } // find_first_of(), 4x: nssv_constexpr size_type find_first_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) { return pos >= size() ? npos : to_pos( std::find_first_of( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) ); } nssv_constexpr size_type find_first_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) { return find_first_of( basic_string_view( &c, 1 ), pos ); } nssv_constexpr size_type find_first_of( CharT const * s, size_type pos, size_type n ) const // (3) { return find_first_of( basic_string_view( s, n ), pos ); } nssv_constexpr size_type find_first_of( CharT const * s, size_type pos = 0 ) const // (4) { return find_first_of( basic_string_view( s ), pos ); } // find_last_of(), 4x: nssv_constexpr size_type find_last_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) { return empty() ? npos : pos >= size() ? find_last_of( v, size() - 1 ) : to_pos( std::find_first_of( const_reverse_iterator( cbegin() + pos + 1 ), crend(), v.cbegin(), v.cend(), Traits::eq ) ); } nssv_constexpr size_type find_last_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2) { return find_last_of( basic_string_view( &c, 1 ), pos ); } nssv_constexpr size_type find_last_of( CharT const * s, size_type pos, size_type count ) const // (3) { return find_last_of( basic_string_view( s, count ), pos ); } nssv_constexpr size_type find_last_of( CharT const * s, size_type pos = npos ) const // (4) { return find_last_of( basic_string_view( s ), pos ); } // find_first_not_of(), 4x: nssv_constexpr size_type find_first_not_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) { return pos >= size() ? npos : to_pos( std::find_if( cbegin() + pos, cend(), not_in_view( v ) ) ); } nssv_constexpr size_type find_first_not_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) { return find_first_not_of( basic_string_view( &c, 1 ), pos ); } nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos, size_type count ) const // (3) { return find_first_not_of( basic_string_view( s, count ), pos ); } nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos = 0 ) const // (4) { return find_first_not_of( basic_string_view( s ), pos ); } // find_last_not_of(), 4x: nssv_constexpr size_type find_last_not_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) { return empty() ? npos : pos >= size() ? find_last_not_of( v, size() - 1 ) : to_pos( std::find_if( const_reverse_iterator( cbegin() + pos + 1 ), crend(), not_in_view( v ) ) ); } nssv_constexpr size_type find_last_not_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2) { return find_last_not_of( basic_string_view( &c, 1 ), pos ); } nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos, size_type count ) const // (3) { return find_last_not_of( basic_string_view( s, count ), pos ); } nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos = npos ) const // (4) { return find_last_not_of( basic_string_view( s ), pos ); } // Constants: #if nssv_CPP17_OR_GREATER static nssv_constexpr size_type npos = size_type(-1); #elif nssv_CPP11_OR_GREATER enum : size_type { npos = size_type(-1) }; #else enum { npos = size_type(-1) }; #endif private: struct not_in_view { const basic_string_view v; nssv_constexpr explicit not_in_view( basic_string_view v_ ) : v( v_ ) {} nssv_constexpr bool operator()( CharT c ) const { return npos == v.find_first_of( c ); } }; nssv_constexpr size_type to_pos( const_iterator it ) const { return it == cend() ? npos : size_type( it - cbegin() ); } nssv_constexpr size_type to_pos( const_reverse_iterator it ) const { return it == crend() ? npos : size_type( crend() - it - 1 ); } nssv_constexpr const_reference data_at( size_type pos ) const { #if nssv_BETWEEN( nssv_COMPILER_GNUC_VERSION, 1, 500 ) return data_[pos]; #else return assert( pos < size() ), data_[pos]; #endif } private: const_pointer data_; size_type size_; public: #if nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS template< class Allocator > basic_string_view( std::basic_string const & s ) nssv_noexcept : data_( s.data() ) , size_( s.size() ) {} #if nssv_HAVE_EXPLICIT_CONVERSION template< class Allocator > explicit operator std::basic_string() const { return to_string( Allocator() ); } #endif // nssv_HAVE_EXPLICIT_CONVERSION #if nssv_CPP11_OR_GREATER template< class Allocator = std::allocator > std::basic_string to_string( Allocator const & a = Allocator() ) const { return std::basic_string( begin(), end(), a ); } #else std::basic_string to_string() const { return std::basic_string( begin(), end() ); } template< class Allocator > std::basic_string to_string( Allocator const & a ) const { return std::basic_string( begin(), end(), a ); } #endif // nssv_CPP11_OR_GREATER #endif // nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS }; // // Non-member functions: // // 24.4.3 Non-member comparison functions: // lexicographically compare two string views (function template): template< class CharT, class Traits > nssv_constexpr bool operator== ( basic_string_view lhs, basic_string_view rhs ) nssv_noexcept { return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } template< class CharT, class Traits > nssv_constexpr bool operator!= ( basic_string_view lhs, basic_string_view rhs ) nssv_noexcept { return !( lhs == rhs ); } template< class CharT, class Traits > nssv_constexpr bool operator< ( basic_string_view lhs, basic_string_view rhs ) nssv_noexcept { return lhs.compare( rhs ) < 0; } template< class CharT, class Traits > nssv_constexpr bool operator<= ( basic_string_view lhs, basic_string_view rhs ) nssv_noexcept { return lhs.compare( rhs ) <= 0; } template< class CharT, class Traits > nssv_constexpr bool operator> ( basic_string_view lhs, basic_string_view rhs ) nssv_noexcept { return lhs.compare( rhs ) > 0; } template< class CharT, class Traits > nssv_constexpr bool operator>= ( basic_string_view lhs, basic_string_view rhs ) nssv_noexcept { return lhs.compare( rhs ) >= 0; } // Let S be basic_string_view, and sv be an instance of S. // Implementations shall provide sufficient additional overloads marked // constexpr and noexcept so that an object t with an implicit conversion // to S can be compared according to Table 67. #if ! nssv_CPP11_OR_GREATER || nssv_BETWEEN( nssv_COMPILER_MSVC_VERSION, 100, 141 ) // accommodate for older compilers: // == template< class CharT, class Traits> nssv_constexpr bool operator==( basic_string_view lhs, CharT const * rhs ) nssv_noexcept { return lhs.size() == detail::length( rhs ) && lhs.compare( rhs ) == 0; } template< class CharT, class Traits> nssv_constexpr bool operator==( CharT const * lhs, basic_string_view rhs ) nssv_noexcept { return detail::length( lhs ) == rhs.size() && rhs.compare( lhs ) == 0; } template< class CharT, class Traits> nssv_constexpr bool operator==( basic_string_view lhs, std::basic_string rhs ) nssv_noexcept { return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } template< class CharT, class Traits> nssv_constexpr bool operator==( std::basic_string rhs, basic_string_view lhs ) nssv_noexcept { return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } // != template< class CharT, class Traits> nssv_constexpr bool operator!=( basic_string_view lhs, CharT const * rhs ) nssv_noexcept { return !( lhs == rhs ); } template< class CharT, class Traits> nssv_constexpr bool operator!=( CharT const * lhs, basic_string_view rhs ) nssv_noexcept { return !( lhs == rhs ); } template< class CharT, class Traits> nssv_constexpr bool operator!=( basic_string_view lhs, std::basic_string rhs ) nssv_noexcept { return !( lhs == rhs ); } template< class CharT, class Traits> nssv_constexpr bool operator!=( std::basic_string rhs, basic_string_view lhs ) nssv_noexcept { return !( lhs == rhs ); } // < template< class CharT, class Traits> nssv_constexpr bool operator<( basic_string_view lhs, CharT const * rhs ) nssv_noexcept { return lhs.compare( rhs ) < 0; } template< class CharT, class Traits> nssv_constexpr bool operator<( CharT const * lhs, basic_string_view rhs ) nssv_noexcept { return rhs.compare( lhs ) > 0; } template< class CharT, class Traits> nssv_constexpr bool operator<( basic_string_view lhs, std::basic_string rhs ) nssv_noexcept { return lhs.compare( rhs ) < 0; } template< class CharT, class Traits> nssv_constexpr bool operator<( std::basic_string rhs, basic_string_view lhs ) nssv_noexcept { return rhs.compare( lhs ) > 0; } // <= template< class CharT, class Traits> nssv_constexpr bool operator<=( basic_string_view lhs, CharT const * rhs ) nssv_noexcept { return lhs.compare( rhs ) <= 0; } template< class CharT, class Traits> nssv_constexpr bool operator<=( CharT const * lhs, basic_string_view rhs ) nssv_noexcept { return rhs.compare( lhs ) >= 0; } template< class CharT, class Traits> nssv_constexpr bool operator<=( basic_string_view lhs, std::basic_string rhs ) nssv_noexcept { return lhs.compare( rhs ) <= 0; } template< class CharT, class Traits> nssv_constexpr bool operator<=( std::basic_string rhs, basic_string_view lhs ) nssv_noexcept { return rhs.compare( lhs ) >= 0; } // > template< class CharT, class Traits> nssv_constexpr bool operator>( basic_string_view lhs, CharT const * rhs ) nssv_noexcept { return lhs.compare( rhs ) > 0; } template< class CharT, class Traits> nssv_constexpr bool operator>( CharT const * lhs, basic_string_view rhs ) nssv_noexcept { return rhs.compare( lhs ) < 0; } template< class CharT, class Traits> nssv_constexpr bool operator>( basic_string_view lhs, std::basic_string rhs ) nssv_noexcept { return lhs.compare( rhs ) > 0; } template< class CharT, class Traits> nssv_constexpr bool operator>( std::basic_string rhs, basic_string_view lhs ) nssv_noexcept { return rhs.compare( lhs ) < 0; } // >= template< class CharT, class Traits> nssv_constexpr bool operator>=( basic_string_view lhs, CharT const * rhs ) nssv_noexcept { return lhs.compare( rhs ) >= 0; } template< class CharT, class Traits> nssv_constexpr bool operator>=( CharT const * lhs, basic_string_view rhs ) nssv_noexcept { return rhs.compare( lhs ) <= 0; } template< class CharT, class Traits> nssv_constexpr bool operator>=( basic_string_view lhs, std::basic_string rhs ) nssv_noexcept { return lhs.compare( rhs ) >= 0; } template< class CharT, class Traits> nssv_constexpr bool operator>=( std::basic_string rhs, basic_string_view lhs ) nssv_noexcept { return rhs.compare( lhs ) <= 0; } #else // newer compilers: #define nssv_BASIC_STRING_VIEW_I(T,U) typename std::decay< basic_string_view >::type #if defined(_MSC_VER) // issue 40 # define nssv_MSVC_ORDER(x) , int=x #else # define nssv_MSVC_ORDER(x) /*, int=x*/ #endif // == template< class CharT, class Traits nssv_MSVC_ORDER(1) > nssv_constexpr bool operator==( basic_string_view lhs, nssv_BASIC_STRING_VIEW_I(CharT, Traits) rhs ) nssv_noexcept { return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } template< class CharT, class Traits nssv_MSVC_ORDER(2) > nssv_constexpr bool operator==( nssv_BASIC_STRING_VIEW_I(CharT, Traits) lhs, basic_string_view rhs ) nssv_noexcept { return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } // != template< class CharT, class Traits nssv_MSVC_ORDER(1) > nssv_constexpr bool operator!= ( basic_string_view < CharT, Traits > lhs, nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept { return !( lhs == rhs ); } template< class CharT, class Traits nssv_MSVC_ORDER(2) > nssv_constexpr bool operator!= ( nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, basic_string_view < CharT, Traits > rhs ) nssv_noexcept { return !( lhs == rhs ); } // < template< class CharT, class Traits nssv_MSVC_ORDER(1) > nssv_constexpr bool operator< ( basic_string_view < CharT, Traits > lhs, nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept { return lhs.compare( rhs ) < 0; } template< class CharT, class Traits nssv_MSVC_ORDER(2) > nssv_constexpr bool operator< ( nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, basic_string_view < CharT, Traits > rhs ) nssv_noexcept { return lhs.compare( rhs ) < 0; } // <= template< class CharT, class Traits nssv_MSVC_ORDER(1) > nssv_constexpr bool operator<= ( basic_string_view < CharT, Traits > lhs, nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept { return lhs.compare( rhs ) <= 0; } template< class CharT, class Traits nssv_MSVC_ORDER(2) > nssv_constexpr bool operator<= ( nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, basic_string_view < CharT, Traits > rhs ) nssv_noexcept { return lhs.compare( rhs ) <= 0; } // > template< class CharT, class Traits nssv_MSVC_ORDER(1) > nssv_constexpr bool operator> ( basic_string_view < CharT, Traits > lhs, nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept { return lhs.compare( rhs ) > 0; } template< class CharT, class Traits nssv_MSVC_ORDER(2) > nssv_constexpr bool operator> ( nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, basic_string_view < CharT, Traits > rhs ) nssv_noexcept { return lhs.compare( rhs ) > 0; } // >= template< class CharT, class Traits nssv_MSVC_ORDER(1) > nssv_constexpr bool operator>= ( basic_string_view < CharT, Traits > lhs, nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept { return lhs.compare( rhs ) >= 0; } template< class CharT, class Traits nssv_MSVC_ORDER(2) > nssv_constexpr bool operator>= ( nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, basic_string_view < CharT, Traits > rhs ) nssv_noexcept { return lhs.compare( rhs ) >= 0; } #undef nssv_MSVC_ORDER #undef nssv_BASIC_STRING_VIEW_I #endif // compiler-dependent approach to comparisons // 24.4.4 Inserters and extractors: #if ! nssv_CONFIG_NO_STREAM_INSERTION namespace detail { template< class Stream > void write_padding( Stream & os, std::streamsize n ) { for ( std::streamsize i = 0; i < n; ++i ) os.rdbuf()->sputc( os.fill() ); } template< class Stream, class View > Stream & write_to_stream( Stream & os, View const & sv ) { typename Stream::sentry sentry( os ); if ( !sentry ) return os; const std::streamsize length = static_cast( sv.length() ); // Whether, and how, to pad: const bool pad = ( length < os.width() ); const bool left_pad = pad && ( os.flags() & std::ios_base::adjustfield ) == std::ios_base::right; if ( left_pad ) write_padding( os, os.width() - length ); // Write span characters: os.rdbuf()->sputn( sv.begin(), length ); if ( pad && !left_pad ) write_padding( os, os.width() - length ); // Reset output stream width: os.width( 0 ); return os; } } // namespace detail template< class CharT, class Traits > std::basic_ostream & operator<<( std::basic_ostream& os, basic_string_view sv ) { return detail::write_to_stream( os, sv ); } #endif // nssv_CONFIG_NO_STREAM_INSERTION // Several typedefs for common character types are provided: typedef basic_string_view string_view; typedef basic_string_view wstring_view; #if nssv_HAVE_WCHAR16_T typedef basic_string_view u16string_view; typedef basic_string_view u32string_view; #endif }} // namespace nonstd::sv_lite // // 24.4.6 Suffix for basic_string_view literals: // #if nssv_HAVE_USER_DEFINED_LITERALS namespace nonstd { nssv_inline_ns namespace literals { nssv_inline_ns namespace string_view_literals { #if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1) { return nonstd::sv_lite::string_view{ str, len }; } nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2) { return nonstd::sv_lite::u16string_view{ str, len }; } nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3) { return nonstd::sv_lite::u32string_view{ str, len }; } nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) { return nonstd::sv_lite::wstring_view{ str, len }; } #endif // nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS #if nssv_CONFIG_USR_SV_OPERATOR nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1) { return nonstd::sv_lite::string_view{ str, len }; } nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2) { return nonstd::sv_lite::u16string_view{ str, len }; } nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3) { return nonstd::sv_lite::u32string_view{ str, len }; } nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) { return nonstd::sv_lite::wstring_view{ str, len }; } #endif // nssv_CONFIG_USR_SV_OPERATOR }}} // namespace nonstd::literals::string_view_literals #endif // // Extensions for std::string: // #if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS namespace nonstd { namespace sv_lite { // Exclude MSVC 14 (19.00): it yields ambiguous to_string(): #if nssv_CPP11_OR_GREATER && nssv_COMPILER_MSVC_VERSION != 140 template< class CharT, class Traits, class Allocator = std::allocator > std::basic_string to_string( basic_string_view v, Allocator const & a = Allocator() ) { return std::basic_string( v.begin(), v.end(), a ); } #else template< class CharT, class Traits > std::basic_string to_string( basic_string_view v ) { return std::basic_string( v.begin(), v.end() ); } template< class CharT, class Traits, class Allocator > std::basic_string to_string( basic_string_view v, Allocator const & a ) { return std::basic_string( v.begin(), v.end(), a ); } #endif // nssv_CPP11_OR_GREATER template< class CharT, class Traits, class Allocator > basic_string_view to_string_view( std::basic_string const & s ) { return basic_string_view( s.data(), s.size() ); } }} // namespace nonstd::sv_lite #endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS // // make types and algorithms available in namespace nonstd: // namespace nonstd { using sv_lite::basic_string_view; using sv_lite::string_view; using sv_lite::wstring_view; #if nssv_HAVE_WCHAR16_T using sv_lite::u16string_view; #endif #if nssv_HAVE_WCHAR32_T using sv_lite::u32string_view; #endif // literal "sv" using sv_lite::operator==; using sv_lite::operator!=; using sv_lite::operator<; using sv_lite::operator<=; using sv_lite::operator>; using sv_lite::operator>=; #if ! nssv_CONFIG_NO_STREAM_INSERTION using sv_lite::operator<<; #endif #if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS using sv_lite::to_string; using sv_lite::to_string_view; #endif } // namespace nonstd // 24.4.5 Hash support (C++11): // Note: The hash value of a string view object is equal to the hash value of // the corresponding string object. #if nssv_HAVE_STD_HASH #include namespace std { template<> struct hash< nonstd::string_view > { public: std::size_t operator()( nonstd::string_view v ) const nssv_noexcept { return std::hash()( std::string( v.data(), v.size() ) ); } }; template<> struct hash< nonstd::wstring_view > { public: std::size_t operator()( nonstd::wstring_view v ) const nssv_noexcept { return std::hash()( std::wstring( v.data(), v.size() ) ); } }; template<> struct hash< nonstd::u16string_view > { public: std::size_t operator()( nonstd::u16string_view v ) const nssv_noexcept { return std::hash()( std::u16string( v.data(), v.size() ) ); } }; template<> struct hash< nonstd::u32string_view > { public: std::size_t operator()( nonstd::u32string_view v ) const nssv_noexcept { return std::hash()( std::u32string( v.data(), v.size() ) ); } }; } // namespace std #endif // nssv_HAVE_STD_HASH nssv_RESTORE_WARNINGS() #endif // nssv_HAVE_STD_STRING_VIEW #endif // NONSTD_SV_LITE_H_INCLUDED /* end file simdjson/nonstd/string_view.hpp */ SIMDJSON_POP_DISABLE_WARNINGS namespace std { using string_view = nonstd::string_view; } #endif // SIMDJSON_HAS_STRING_VIEW #undef SIMDJSON_HAS_STRING_VIEW // We are not going to need this macro anymore. /// If EXPR is an error, returns it. #define SIMDJSON_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } } // Unless the programmer has already set SIMDJSON_DEVELOPMENT_CHECKS, // we want to set it under debug builds. We detect a debug build // under Visual Studio when the _DEBUG macro is set. Under the other // compilers, we use the fact that they define __OPTIMIZE__ whenever // they allow optimizations. // It is possible that this could miss some cases where SIMDJSON_DEVELOPMENT_CHECKS // is helpful, but the programmer can set the macro SIMDJSON_DEVELOPMENT_CHECKS. // It could also wrongly set SIMDJSON_DEVELOPMENT_CHECKS (e.g., if the programmer // sets _DEBUG in a release build under Visual Studio, or if some compiler fails to // set the __OPTIMIZE__ macro). #ifndef SIMDJSON_DEVELOPMENT_CHECKS #ifdef _MSC_VER // Visual Studio seems to set _DEBUG for debug builds. #ifdef _DEBUG #define SIMDJSON_DEVELOPMENT_CHECKS 1 #endif // _DEBUG #else // _MSC_VER // All other compilers appear to set __OPTIMIZE__ to a positive integer // when the compiler is optimizing. #ifndef __OPTIMIZE__ #define SIMDJSON_DEVELOPMENT_CHECKS 1 #endif // __OPTIMIZE__ #endif // _MSC_VER #endif // SIMDJSON_DEVELOPMENT_CHECKS // The SIMDJSON_CHECK_EOF macro is a feature flag for the "don't require padding" // feature. #if SIMDJSON_CPLUSPLUS17 // if we have C++, then fallthrough is a default attribute # define simdjson_fallthrough [[fallthrough]] // check if we have __attribute__ support #elif defined(__has_attribute) // check if we have the __fallthrough__ attribute #if __has_attribute(__fallthrough__) // we are good to go: # define simdjson_fallthrough __attribute__((__fallthrough__)) #endif // __has_attribute(__fallthrough__) #endif // SIMDJSON_CPLUSPLUS17 // on some systems, we simply do not have support for fallthrough, so use a default: #ifndef simdjson_fallthrough # define simdjson_fallthrough do {} while (0) /* fallthrough */ #endif // simdjson_fallthrough #if SIMDJSON_DEVELOPMENT_CHECKS #define SIMDJSON_DEVELOPMENT_ASSERT(expr) do { assert ((expr)); } while (0) #else #define SIMDJSON_DEVELOPMENT_ASSERT(expr) do { } while (0) #endif #ifndef SIMDJSON_UTF8VALIDATION #define SIMDJSON_UTF8VALIDATION 1 #endif #ifdef __has_include // How do we detect that a compiler supports vbmi2? // For sure if the following header is found, we are ok? #if __has_include() #define SIMDJSON_COMPILER_SUPPORTS_VBMI2 1 #endif #endif #ifdef _MSC_VER #if _MSC_VER >= 1920 // Visual Studio 2019 and up support VBMI2 under x64 even if the header // avx512vbmi2intrin.h is not found. #define SIMDJSON_COMPILER_SUPPORTS_VBMI2 1 #endif #endif // By default, we allow AVX512. #ifndef SIMDJSON_AVX512_ALLOWED #define SIMDJSON_AVX512_ALLOWED 1 #endif #endif // SIMDJSON_COMMON_DEFS_H /* end file simdjson/common_defs.h */ // This provides the public API for simdjson. // DOM and ondemand are amalgamated separately, in simdjson.h /* including simdjson/simdjson_version.h: #include "simdjson/simdjson_version.h" */ /* begin file simdjson/simdjson_version.h */ // /include/simdjson/simdjson_version.h automatically generated by release.py, // do not change by hand #ifndef SIMDJSON_SIMDJSON_VERSION_H #define SIMDJSON_SIMDJSON_VERSION_H /** The version of simdjson being used (major.minor.revision) */ #define SIMDJSON_VERSION "3.6.3" namespace simdjson { enum { /** * The major version (MAJOR.minor.revision) of simdjson being used. */ SIMDJSON_VERSION_MAJOR = 3, /** * The minor version (major.MINOR.revision) of simdjson being used. */ SIMDJSON_VERSION_MINOR = 6, /** * The revision (major.minor.REVISION) of simdjson being used. */ SIMDJSON_VERSION_REVISION = 3 }; } // namespace simdjson #endif // SIMDJSON_SIMDJSON_VERSION_H /* end file simdjson/simdjson_version.h */ /* including simdjson/base.h: #include "simdjson/base.h" */ /* begin file simdjson/base.h */ /** * @file Base declarations for all simdjson headers * @private */ #ifndef SIMDJSON_BASE_H #define SIMDJSON_BASE_H /* skipped duplicate #include "simdjson/common_defs.h" */ /* skipped duplicate #include "simdjson/compiler_check.h" */ /* including simdjson/error.h: #include "simdjson/error.h" */ /* begin file simdjson/error.h */ #ifndef SIMDJSON_ERROR_H #define SIMDJSON_ERROR_H /* skipped duplicate #include "simdjson/base.h" */ #include #include namespace simdjson { /** * All possible errors returned by simdjson. These error codes are subject to change * and not all simdjson kernel returns the same error code given the same input: it is not * well defined which error a given input should produce. * * Only SUCCESS evaluates to false as a Boolean. All other error codes will evaluate * to true as a Boolean. */ enum error_code { SUCCESS = 0, ///< No error CAPACITY, ///< This parser can't support a document that big MEMALLOC, ///< Error allocating memory, most likely out of memory TAPE_ERROR, ///< Something went wrong, this is a generic error DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation STRING_ERROR, ///< Problem while parsing a string T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't' F_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'f' N_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'n' NUMBER_ERROR, ///< Problem while parsing a number UTF8_ERROR, ///< the input is not valid UTF-8 UNINITIALIZED, ///< unknown error, or uninitialized document EMPTY, ///< no structural element found UNESCAPED_CHARS, ///< found unescaped characters in a string. UNCLOSED_STRING, ///< missing quote at the end UNSUPPORTED_ARCHITECTURE, ///< unsupported architecture INCORRECT_TYPE, ///< JSON element has a different type than user expected NUMBER_OUT_OF_RANGE, ///< JSON number does not fit in 64 bits INDEX_OUT_OF_BOUNDS, ///< JSON array index too large NO_SUCH_FIELD, ///< JSON field not found in object IO_ERROR, ///< Error reading a file INVALID_JSON_POINTER, ///< Invalid JSON pointer reference INVALID_URI_FRAGMENT, ///< Invalid URI fragment UNEXPECTED_ERROR, ///< indicative of a bug in simdjson PARSER_IN_USE, ///< parser is already in use. OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order (checked when SIMDJSON_DEVELOPMENT_CHECKS=1) INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it. INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value. OUT_OF_BOUNDS, ///< Attempted to access location outside of document. TRAILING_CONTENT, ///< Unexpected trailing content in the JSON input NUM_ERROR_CODES }; /** * It is the convention throughout the code that the macro SIMDJSON_DEVELOPMENT_CHECKS determines whether * we check for OUT_OF_ORDER_ITERATION. The logic behind it is that these errors only occurs when the code * that was written while breaking some simdjson::ondemand requirement. They should not occur in released * code after these issues were fixed. */ /** * Get the error message for the given error code. * * dom::parser parser; * dom::element doc; * auto error = parser.parse("foo",3).get(doc); * if (error) { printf("Error: %s\n", error_message(error)); } * * @return The error message. */ inline const char *error_message(error_code error) noexcept; /** * Write the error message to the output stream */ inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept; /** * Exception thrown when an exception-supporting simdjson method is called */ struct simdjson_error : public std::exception { /** * Create an exception from a simdjson error code. * @param error The error code */ simdjson_error(error_code error) noexcept : _error{error} { } /** The error message */ const char *what() const noexcept { return error_message(error()); } /** The error code */ error_code error() const noexcept { return _error; } private: /** The error code that was used */ error_code _error; }; namespace internal { /** * The result of a simdjson operation that could fail. * * Gives the option of reading error codes, or throwing an exception by casting to the desired result. * * This is a base class for implementations that want to add functions to the result type for * chaining. * * Override like: * * struct simdjson_result : public internal::simdjson_result_base { * simdjson_result() noexcept : internal::simdjson_result_base() {} * simdjson_result(error_code error) noexcept : internal::simdjson_result_base(error) {} * simdjson_result(T &&value) noexcept : internal::simdjson_result_base(std::forward(value)) {} * simdjson_result(T &&value, error_code error) noexcept : internal::simdjson_result_base(value, error) {} * // Your extra methods here * } * * Then any method returning simdjson_result will be chainable with your methods. */ template struct simdjson_result_base : protected std::pair { /** * Create a new empty result with error = UNINITIALIZED. */ simdjson_inline simdjson_result_base() noexcept; /** * Create a new error result. */ simdjson_inline simdjson_result_base(error_code error) noexcept; /** * Create a new successful result. */ simdjson_inline simdjson_result_base(T &&value) noexcept; /** * Create a new result with both things (use if you don't want to branch when creating the result). */ simdjson_inline simdjson_result_base(T &&value, error_code error) noexcept; /** * Move the value and the error to the provided variables. * * @param value The variable to assign the value to. May not be set if there is an error. * @param error The variable to assign the error to. Set to SUCCESS if there is no error. */ simdjson_inline void tie(T &value, error_code &error) && noexcept; /** * Move the value to the provided variable. * * @param value The variable to assign the value to. May not be set if there is an error. */ simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ simdjson_inline error_code error() const noexcept; #if SIMDJSON_EXCEPTIONS /** * Get the result value. * * @throw simdjson_error if there was an error. */ simdjson_inline T& value() & noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ simdjson_inline T&& value() && noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ simdjson_inline T&& take_value() && noexcept(false); /** * Cast to the value (will throw on error). * * @throw simdjson_error if there was an error. */ simdjson_inline operator T&&() && noexcept(false); #endif // SIMDJSON_EXCEPTIONS /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. */ simdjson_inline const T& value_unsafe() const& noexcept; /** * Take the result value (move it). This function is safe if and only * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; }; // struct simdjson_result_base } // namespace internal /** * The result of a simdjson operation that could fail. * * Gives the option of reading error codes, or throwing an exception by casting to the desired result. */ template struct simdjson_result : public internal::simdjson_result_base { /** * @private Create a new empty result with error = UNINITIALIZED. */ simdjson_inline simdjson_result() noexcept; /** * @private Create a new successful result. */ simdjson_inline simdjson_result(T &&value) noexcept; /** * @private Create a new error result. */ simdjson_inline simdjson_result(error_code error_code) noexcept; /** * @private Create a new result with both things (use if you don't want to branch when creating the result). */ simdjson_inline simdjson_result(T &&value, error_code error) noexcept; /** * Move the value and the error to the provided variables. * * @param value The variable to assign the value to. May not be set if there is an error. * @param error The variable to assign the error to. Set to SUCCESS if there is no error. */ simdjson_inline void tie(T &value, error_code &error) && noexcept; /** * Move the value to the provided variable. * * @param value The variable to assign the value to. May not be set if there is an error. */ simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ simdjson_inline error_code error() const noexcept; #if SIMDJSON_EXCEPTIONS /** * Get the result value. * * @throw simdjson_error if there was an error. */ simdjson_inline T& value() & noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ simdjson_inline T&& value() && noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ simdjson_inline T&& take_value() && noexcept(false); /** * Cast to the value (will throw on error). * * @throw simdjson_error if there was an error. */ simdjson_inline operator T&&() && noexcept(false); #endif // SIMDJSON_EXCEPTIONS /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. */ simdjson_inline const T& value_unsafe() const& noexcept; /** * Take the result value (move it). This function is safe if and only * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; }; // struct simdjson_result #if SIMDJSON_EXCEPTIONS template inline std::ostream& operator<<(std::ostream& out, simdjson_result value) { return out << value.value(); } #endif // SIMDJSON_EXCEPTIONS #ifndef SIMDJSON_DISABLE_DEPRECATED_API /** * @deprecated This is an alias and will be removed, use error_code instead */ using ErrorValues [[deprecated("This is an alias and will be removed, use error_code instead")]] = error_code; /** * @deprecated Error codes should be stored and returned as `error_code`, use `error_message()` instead. */ [[deprecated("Error codes should be stored and returned as `error_code`, use `error_message()` instead.")]] inline const std::string error_message(int error) noexcept; #endif // SIMDJSON_DISABLE_DEPRECATED_API } // namespace simdjson #endif // SIMDJSON_ERROR_H /* end file simdjson/error.h */ /* skipped duplicate #include "simdjson/portability.h" */ /** * @brief The top level simdjson namespace, containing everything the library provides. */ namespace simdjson { SIMDJSON_PUSH_DISABLE_UNUSED_WARNINGS /** The maximum document size supported by simdjson. */ constexpr size_t SIMDJSON_MAXSIZE_BYTES = 0xFFFFFFFF; /** * The amount of padding needed in a buffer to parse JSON. * * The input buf should be readable up to buf + SIMDJSON_PADDING * this is a stopgap; there should be a better description of the * main loop and its behavior that abstracts over this * See https://github.com/simdjson/simdjson/issues/174 */ constexpr size_t SIMDJSON_PADDING = 64; /** * By default, simdjson supports this many nested objects and arrays. * * This is the default for parser::max_depth(). */ constexpr size_t DEFAULT_MAX_DEPTH = 1024; SIMDJSON_POP_DISABLE_UNUSED_WARNINGS class implementation; struct padded_string; class padded_string_view; enum class stage1_mode; namespace internal { template class atomic_ptr; class dom_parser_implementation; class escape_json_string; class tape_ref; struct value128; enum class tape_type; } // namespace internal } // namespace simdjson #endif // SIMDJSON_BASE_H /* end file simdjson/base.h */ /* skipped duplicate #include "simdjson/error.h" */ /* including simdjson/error-inl.h: #include "simdjson/error-inl.h" */ /* begin file simdjson/error-inl.h */ #ifndef SIMDJSON_ERROR_INL_H #define SIMDJSON_ERROR_INL_H /* skipped duplicate #include "simdjson/error.h" */ #include namespace simdjson { namespace internal { // We store the error code so we can validate the error message is associated with the right code struct error_code_info { error_code code; const char* message; // do not use a fancy std::string where a simple C string will do (no alloc, no destructor) }; // These MUST match the codes in error_code. We check this constraint in basictests. extern SIMDJSON_DLLIMPORTEXPORT const error_code_info error_codes[]; } // namespace internal inline const char *error_message(error_code error) noexcept { // If you're using error_code, we're trusting you got it from the enum. return internal::error_codes[int(error)].message; } // deprecated function #ifndef SIMDJSON_DISABLE_DEPRECATED_API inline const std::string error_message(int error) noexcept { if (error < 0 || error >= error_code::NUM_ERROR_CODES) { return internal::error_codes[UNEXPECTED_ERROR].message; } return internal::error_codes[error].message; } #endif // SIMDJSON_DISABLE_DEPRECATED_API inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept { return out << error_message(error); } namespace internal { // // internal::simdjson_result_base inline implementation // template simdjson_inline void simdjson_result_base::tie(T &value, error_code &error) && noexcept { error = this->second; if (!error) { value = std::forward>(*this).first; } } template simdjson_warn_unused simdjson_inline error_code simdjson_result_base::get(T &value) && noexcept { error_code error; std::forward>(*this).tie(value, error); return error; } template simdjson_inline error_code simdjson_result_base::error() const noexcept { return this->second; } #if SIMDJSON_EXCEPTIONS template simdjson_inline T& simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return this->first; } template simdjson_inline T&& simdjson_result_base::value() && noexcept(false) { return std::forward>(*this).take_value(); } template simdjson_inline T&& simdjson_result_base::take_value() && noexcept(false) { if (error()) { throw simdjson_error(error()); } return std::forward(this->first); } template simdjson_inline simdjson_result_base::operator T&&() && noexcept(false) { return std::forward>(*this).take_value(); } #endif // SIMDJSON_EXCEPTIONS template simdjson_inline const T& simdjson_result_base::value_unsafe() const& noexcept { return this->first; } template simdjson_inline T&& simdjson_result_base::value_unsafe() && noexcept { return std::forward(this->first); } template simdjson_inline simdjson_result_base::simdjson_result_base(T &&value, error_code error) noexcept : std::pair(std::forward(value), error) {} template simdjson_inline simdjson_result_base::simdjson_result_base(error_code error) noexcept : simdjson_result_base(T{}, error) {} template simdjson_inline simdjson_result_base::simdjson_result_base(T &&value) noexcept : simdjson_result_base(std::forward(value), SUCCESS) {} template simdjson_inline simdjson_result_base::simdjson_result_base() noexcept : simdjson_result_base(T{}, UNINITIALIZED) {} } // namespace internal /// /// simdjson_result inline implementation /// template simdjson_inline void simdjson_result::tie(T &value, error_code &error) && noexcept { std::forward>(*this).tie(value, error); } template simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &value) && noexcept { return std::forward>(*this).get(value); } template simdjson_inline error_code simdjson_result::error() const noexcept { return internal::simdjson_result_base::error(); } #if SIMDJSON_EXCEPTIONS template simdjson_inline T& simdjson_result::value() & noexcept(false) { return internal::simdjson_result_base::value(); } template simdjson_inline T&& simdjson_result::value() && noexcept(false) { return std::forward>(*this).value(); } template simdjson_inline T&& simdjson_result::take_value() && noexcept(false) { return std::forward>(*this).take_value(); } template simdjson_inline simdjson_result::operator T&&() && noexcept(false) { return std::forward>(*this).take_value(); } #endif // SIMDJSON_EXCEPTIONS template simdjson_inline const T& simdjson_result::value_unsafe() const& noexcept { return internal::simdjson_result_base::value_unsafe(); } template simdjson_inline T&& simdjson_result::value_unsafe() && noexcept { return std::forward>(*this).value_unsafe(); } template simdjson_inline simdjson_result::simdjson_result(T &&value, error_code error) noexcept : internal::simdjson_result_base(std::forward(value), error) {} template simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : internal::simdjson_result_base(error) {} template simdjson_inline simdjson_result::simdjson_result(T &&value) noexcept : internal::simdjson_result_base(std::forward(value)) {} template simdjson_inline simdjson_result::simdjson_result() noexcept : internal::simdjson_result_base() {} } // namespace simdjson #endif // SIMDJSON_ERROR_INL_H /* end file simdjson/error-inl.h */ /* including simdjson/implementation.h: #include "simdjson/implementation.h" */ /* begin file simdjson/implementation.h */ #ifndef SIMDJSON_IMPLEMENTATION_H #define SIMDJSON_IMPLEMENTATION_H /* including simdjson/internal/atomic_ptr.h: #include "simdjson/internal/atomic_ptr.h" */ /* begin file simdjson/internal/atomic_ptr.h */ #ifndef SIMDJSON_INTERNAL_ATOMIC_PTR_H #define SIMDJSON_INTERNAL_ATOMIC_PTR_H /* skipped duplicate #include "simdjson/base.h" */ #include namespace simdjson { namespace internal { template class atomic_ptr { public: atomic_ptr(T *_ptr) : ptr{_ptr} {} operator const T*() const { return ptr.load(); } const T& operator*() const { return *ptr; } const T* operator->() const { return ptr.load(); } operator T*() { return ptr.load(); } T& operator*() { return *ptr; } T* operator->() { return ptr.load(); } atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; } private: std::atomic ptr; }; } // namespace internal } // namespace simdjson #endif // SIMDJSON_INTERNAL_ATOMIC_PTR_H /* end file simdjson/internal/atomic_ptr.h */ /* including simdjson/internal/dom_parser_implementation.h: #include "simdjson/internal/dom_parser_implementation.h" */ /* begin file simdjson/internal/dom_parser_implementation.h */ #ifndef SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H #define SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H /* skipped duplicate #include "simdjson/base.h" */ /* skipped duplicate #include "simdjson/error.h" */ #include namespace simdjson { namespace dom { class document; } // namespace dom /** * This enum is used with the dom_parser_implementation::stage1 function. * 1) The regular mode expects a fully formed JSON document. * 2) The streaming_partial mode expects a possibly truncated * input within a stream on JSON documents. * 3) The stream_final mode allows us to truncate final * unterminated strings. It is useful in conjunction with streaming_partial. */ enum class stage1_mode { regular, streaming_partial, streaming_final}; /** * Returns true if mode == streaming_partial or mode == streaming_final */ inline bool is_streaming(stage1_mode mode) { // performance note: it is probably faster to check that mode is different // from regular than checking that it is either streaming_partial or streaming_final. return (mode != stage1_mode::regular); // return (mode == stage1_mode::streaming_partial || mode == stage1_mode::streaming_final); } namespace internal { /** * An implementation of simdjson's DOM parser for a particular CPU architecture. * * This class is expected to be accessed only by pointer, and never move in memory (though the * pointer can move). */ class dom_parser_implementation { public: /** * @private For internal implementation use * * Run a full JSON parse on a single document (stage1 + stage2). * * Guaranteed only to be called when capacity > document length. * * Overridden by each implementation. * * @param buf The json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. * @param len The length of the json document. * @return The error code, or SUCCESS if there was no error. */ simdjson_warn_unused virtual error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept = 0; /** * @private For internal implementation use * * Stage 1 of the document parser. * * Guaranteed only to be called when capacity > document length. * * Overridden by each implementation. * * @param buf The json document to parse. * @param len The length of the json document. * @param streaming Whether this is being called by parser::parse_many. * @return The error code, or SUCCESS if there was no error. */ simdjson_warn_unused virtual error_code stage1(const uint8_t *buf, size_t len, stage1_mode streaming) noexcept = 0; /** * @private For internal implementation use * * Stage 2 of the document parser. * * Called after stage1(). * * Overridden by each implementation. * * @param doc The document to output to. * @return The error code, or SUCCESS if there was no error. */ simdjson_warn_unused virtual error_code stage2(dom::document &doc) noexcept = 0; /** * @private For internal implementation use * * Stage 2 of the document parser for parser::parse_many. * * Guaranteed only to be called after stage1(). * Overridden by each implementation. * * @param doc The document to output to. * @return The error code, SUCCESS if there was no error, or EMPTY if all documents have been parsed. */ simdjson_warn_unused virtual error_code stage2_next(dom::document &doc) noexcept = 0; /** * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), * then null_nullptrptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. * * Overridden by each implementation. * * @param str pointer to the beginning of a valid UTF-8 JSON string, must end with an unescaped quote. * @param dst pointer to a destination buffer, it must point a region in memory of sufficient size. * @param allow_replacement whether we allow a replacement character when the UTF-8 contains unmatched surrogate pairs. * @return end of the of the written region (exclusive) or nullptr in case of error. */ simdjson_warn_unused virtual uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept = 0; /** * Unescape a NON-valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), * then null_nullptrptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. * * Overridden by each implementation. * * @param str pointer to the beginning of a possibly invalid UTF-8 JSON string, must end with an unescaped quote. * @param dst pointer to a destination buffer, it must point a region in memory of sufficient size. * @return end of the of the written region (exclusive) or nullptr in case of error. */ simdjson_warn_unused virtual uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept = 0; /** * Change the capacity of this parser. * * The capacity can never exceed SIMDJSON_MAXSIZE_BYTES (e.g., 4 GB) * and an CAPACITY error is returned if it is attempted. * * Generally used for reallocation. * * @param capacity The new capacity. * @param max_depth The new max_depth. * @return The error code, or SUCCESS if there was no error. */ virtual error_code set_capacity(size_t capacity) noexcept = 0; /** * Change the max depth of this parser. * * Generally used for reallocation. * * @param capacity The new capacity. * @param max_depth The new max_depth. * @return The error code, or SUCCESS if there was no error. */ virtual error_code set_max_depth(size_t max_depth) noexcept = 0; /** * Deallocate this parser. */ virtual ~dom_parser_implementation() = default; /** Number of structural indices passed from stage 1 to stage 2 */ uint32_t n_structural_indexes{0}; /** Structural indices passed from stage 1 to stage 2 */ std::unique_ptr structural_indexes{}; /** Next structural index to parse */ uint32_t next_structural_index{0}; /** * The largest document this parser can support without reallocating. * * @return Current capacity, in bytes. */ simdjson_inline size_t capacity() const noexcept; /** * The maximum level of nested object and arrays supported by this parser. * * @return Maximum depth, in bytes. */ simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length * and `max_depth` depth. * * @param capacity The new capacity. * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. * @return The error, if there is one. */ simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth) noexcept; protected: /** * The maximum document length this parser supports. * * Buffers are large enough to handle any document up to this length. */ size_t _capacity{0}; /** * The maximum depth (number of nested objects and arrays) supported by this parser. * * Defaults to DEFAULT_MAX_DEPTH. */ size_t _max_depth{0}; // Declaring these so that subclasses can use them to implement their constructors. simdjson_inline dom_parser_implementation() noexcept; simdjson_inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; simdjson_inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; simdjson_inline dom_parser_implementation(const dom_parser_implementation &) noexcept = delete; simdjson_inline dom_parser_implementation &operator=(const dom_parser_implementation &other) noexcept = delete; }; // class dom_parser_implementation simdjson_inline dom_parser_implementation::dom_parser_implementation() noexcept = default; simdjson_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; simdjson_inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; simdjson_inline size_t dom_parser_implementation::capacity() const noexcept { return _capacity; } simdjson_inline size_t dom_parser_implementation::max_depth() const noexcept { return _max_depth; } simdjson_warn_unused inline error_code dom_parser_implementation::allocate(size_t capacity, size_t max_depth) noexcept { if (this->max_depth() != max_depth) { error_code err = set_max_depth(max_depth); if (err) { return err; } } if (_capacity != capacity) { error_code err = set_capacity(capacity); if (err) { return err; } } return SUCCESS; } } // namespace internal } // namespace simdjson #endif // SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H /* end file simdjson/internal/dom_parser_implementation.h */ #include namespace simdjson { /** * Validate the UTF-8 string. * * @param buf the string to validate. * @param len the length of the string in bytes. * @return true if the string is valid UTF-8. */ simdjson_warn_unused bool validate_utf8(const char * buf, size_t len) noexcept; /** * Validate the UTF-8 string. * * @param sv the string_view to validate. * @return true if the string is valid UTF-8. */ simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string_view sv) noexcept { return validate_utf8(sv.data(), sv.size()); } /** * Validate the UTF-8 string. * * @param p the string to validate. * @return true if the string is valid UTF-8. */ simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string& s) noexcept { return validate_utf8(s.data(), s.size()); } /** * An implementation of simdjson for a particular CPU architecture. * * Also used to maintain the currently active implementation. The active implementation is * automatically initialized on first use to the most advanced implementation supported by the host. */ class implementation { public: /** * The name of this implementation. * * const implementation *impl = simdjson::get_active_implementation(); * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; * * @return the name of the implementation, e.g. "haswell", "westmere", "arm64". */ virtual const std::string &name() const { return _name; } /** * The description of this implementation. * * const implementation *impl = simdjson::get_active_implementation(); * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; * * @return the description of the implementation, e.g. "Intel/AMD AVX2", "Intel/AMD SSE4.2", "ARM NEON". */ virtual const std::string &description() const { return _description; } /** * The instruction sets this implementation is compiled against * and the current CPU match. This function may poll the current CPU/system * and should therefore not be called too often if performance is a concern. * * @return true if the implementation can be safely used on the current system (determined at runtime). */ bool supported_by_runtime_system() const; /** * @private For internal implementation use * * The instruction sets this implementation is compiled against. * * @return a mask of all required `internal::instruction_set::` values. */ virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; } /** * @private For internal implementation use * * const implementation *impl = simdjson::get_active_implementation(); * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; * * @param capacity The largest document that will be passed to the parser. * @param max_depth The maximum JSON object/array nesting this parser is expected to handle. * @param dst The place to put the resulting parser implementation. * @return the error code, or SUCCESS if there was no error. */ virtual error_code create_dom_parser_implementation( size_t capacity, size_t max_depth, std::unique_ptr &dst ) const noexcept = 0; /** * @private For internal implementation use * * Minify the input string assuming that it represents a JSON string, does not parse or validate. * * Overridden by each implementation. * * @param buf the json document to minify. * @param len the length of the json document. * @param dst the buffer to write the minified document to. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. * @param dst_len the number of bytes written. Output only. * @return the error code, or SUCCESS if there was no error. */ simdjson_warn_unused virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0; /** * Validate the UTF-8 string. * * Overridden by each implementation. * * @param buf the string to validate. * @param len the length of the string in bytes. * @return true if and only if the string is valid UTF-8. */ simdjson_warn_unused virtual bool validate_utf8(const char *buf, size_t len) const noexcept = 0; protected: /** @private Construct an implementation with the given name and description. For subclasses. */ simdjson_inline implementation( std::string_view name, std::string_view description, uint32_t required_instruction_sets ) : _name(name), _description(description), _required_instruction_sets(required_instruction_sets) { } virtual ~implementation()=default; private: /** * The name of this implementation. */ const std::string _name; /** * The description of this implementation. */ const std::string _description; /** * Instruction sets required for this implementation. */ const uint32_t _required_instruction_sets; }; /** @private */ namespace internal { /** * The list of available implementations compiled into simdjson. */ class available_implementation_list { public: /** Get the list of available implementations compiled into simdjson */ simdjson_inline available_implementation_list() {} /** Number of implementations */ size_t size() const noexcept; /** STL const begin() iterator */ const implementation * const *begin() const noexcept; /** STL const end() iterator */ const implementation * const *end() const noexcept; /** * Get the implementation with the given name. * * Case sensitive. * * const implementation *impl = simdjson::get_available_implementations()["westmere"]; * if (!impl) { exit(1); } * if (!imp->supported_by_runtime_system()) { exit(1); } * simdjson::get_active_implementation() = impl; * * @param name the implementation to find, e.g. "westmere", "haswell", "arm64" * @return the implementation, or nullptr if the parse failed. */ const implementation * operator[](const std::string_view &name) const noexcept { for (const implementation * impl : *this) { if (impl->name() == name) { return impl; } } return nullptr; } /** * Detect the most advanced implementation supported by the current host. * * This is used to initialize the implementation on startup. * * const implementation *impl = simdjson::available_implementation::detect_best_supported(); * simdjson::get_active_implementation() = impl; * * @return the most advanced supported implementation for the current host, or an * implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported * implementation. Will never return nullptr. */ const implementation *detect_best_supported() const noexcept; }; } // namespace internal /** * The list of available implementations compiled into simdjson. */ extern SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations(); /** * The active implementation. * * Automatically initialized on first use to the most advanced implementation supported by this hardware. */ extern SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr& get_active_implementation(); } // namespace simdjson #endif // SIMDJSON_IMPLEMENTATION_H /* end file simdjson/implementation.h */ /* including simdjson/minify.h: #include "simdjson/minify.h" */ /* begin file simdjson/minify.h */ #ifndef SIMDJSON_MINIFY_H #define SIMDJSON_MINIFY_H /* skipped duplicate #include "simdjson/base.h" */ /* including simdjson/padded_string.h: #include "simdjson/padded_string.h" */ /* begin file simdjson/padded_string.h */ #ifndef SIMDJSON_PADDED_STRING_H #define SIMDJSON_PADDED_STRING_H /* skipped duplicate #include "simdjson/base.h" */ /* skipped duplicate #include "simdjson/error.h" */ /* skipped duplicate #include "simdjson/error-inl.h" */ #include #include #include #include namespace simdjson { class padded_string_view; /** * String with extra allocation for ease of use with parser::parse() * * This is a move-only class, it cannot be copied. */ struct padded_string final { /** * Create a new, empty padded string. */ explicit inline padded_string() noexcept; /** * Create a new padded string buffer. * * @param length the size of the string. */ explicit inline padded_string(size_t length) noexcept; /** * Create a new padded string by copying the given input. * * @param data the buffer to copy * @param length the number of bytes to copy */ explicit inline padded_string(const char *data, size_t length) noexcept; /** * Create a new padded string by copying the given input. * * @param str_ the string to copy */ inline padded_string(const std::string & str_ ) noexcept; /** * Create a new padded string by copying the given input. * * @param sv_ the string to copy */ inline padded_string(std::string_view sv_) noexcept; /** * Move one padded string into another. * * The original padded string will be reduced to zero capacity. * * @param o the string to move. */ inline padded_string(padded_string &&o) noexcept; /** * Move one padded string into another. * * The original padded string will be reduced to zero capacity. * * @param o the string to move. */ inline padded_string &operator=(padded_string &&o) noexcept; inline void swap(padded_string &o) noexcept; ~padded_string() noexcept; /** * The length of the string. * * Does not include padding. */ size_t size() const noexcept; /** * The length of the string. * * Does not include padding. */ size_t length() const noexcept; /** * The string data. **/ const char *data() const noexcept; const uint8_t *u8data() const noexcept { return static_cast(static_cast(data_ptr));} /** * The string data. **/ char *data() noexcept; /** * Create a std::string_view with the same content. */ operator std::string_view() const; /** * Create a padded_string_view with the same content. */ operator padded_string_view() const noexcept; /** * Load this padded string from a file. * * @return IO_ERROR on error. Be mindful that on some 32-bit systems, * the file size might be limited to 2 GB. * * @param path the path to the file. **/ inline static simdjson_result load(std::string_view path) noexcept; private: padded_string &operator=(const padded_string &o) = delete; padded_string(const padded_string &o) = delete; size_t viable_size{0}; char *data_ptr{nullptr}; }; // padded_string /** * Send padded_string instance to an output stream. * * @param out The output stream. * @param s The padded_string instance. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, const padded_string& s) { return out << s.data(); } #if SIMDJSON_EXCEPTIONS /** * Send padded_string instance to an output stream. * * @param out The output stream. * @param s The padded_string instance. * @throw simdjson_error if the result being printed has an error. If there is an error with the * underlying output stream, that error will be propagated (simdjson_error will not be * thrown). */ inline std::ostream& operator<<(std::ostream& out, simdjson_result &s) noexcept(false) { return out << s.value(); } #endif } // namespace simdjson // This is deliberately outside of simdjson so that people get it without having to use the namespace inline simdjson::padded_string operator "" _padded(const char *str, size_t len); namespace simdjson { namespace internal { // The allocate_padded_buffer function is a low-level function to allocate memory // with padding so we can read past the "length" bytes safely. It is used by // the padded_string class automatically. It returns nullptr in case // of error: the caller should check for a null pointer. // The length parameter is the maximum size in bytes of the string. // The caller is responsible to free the memory (e.g., delete[] (...)). inline char *allocate_padded_buffer(size_t length) noexcept; } // namespace internal } // namespace simdjson #endif // SIMDJSON_PADDED_STRING_H /* end file simdjson/padded_string.h */ #include #include #include namespace simdjson { /** * * Minify the input string assuming that it represents a JSON string, does not parse or validate. * This function is much faster than parsing a JSON string and then writing a minified version of it. * However, it does not validate the input. It will merely return an error in simple cases (e.g., if * there is a string that was never terminated). * * * @param buf the json document to minify. * @param len the length of the json document. * @param dst the buffer to write the minified document to. *MUST* be allocated up to len bytes. * @param dst_len the number of bytes written. Output only. * @return the error code, or SUCCESS if there was no error. */ simdjson_warn_unused error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept; } // namespace simdjson #endif // SIMDJSON_MINIFY_H /* end file simdjson/minify.h */ /* skipped duplicate #include "simdjson/padded_string.h" */ /* including simdjson/padded_string-inl.h: #include "simdjson/padded_string-inl.h" */ /* begin file simdjson/padded_string-inl.h */ #ifndef SIMDJSON_PADDED_STRING_INL_H #define SIMDJSON_PADDED_STRING_INL_H /* skipped duplicate #include "simdjson/padded_string.h" */ /* including simdjson/padded_string_view.h: #include "simdjson/padded_string_view.h" */ /* begin file simdjson/padded_string_view.h */ #ifndef SIMDJSON_PADDED_STRING_VIEW_H #define SIMDJSON_PADDED_STRING_VIEW_H /* skipped duplicate #include "simdjson/portability.h" */ /* skipped duplicate #include "simdjson/base.h" // for SIMDJSON_PADDING */ /* skipped duplicate #include "simdjson/error.h" */ #include #include #include #include namespace simdjson { /** * User-provided string that promises it has extra padded bytes at the end for use with parser::parse(). */ class padded_string_view : public std::string_view { private: size_t _capacity; public: /** Create an empty padded_string_view. */ inline padded_string_view() noexcept = default; /** * Promise the given buffer has at least SIMDJSON_PADDING extra bytes allocated to it. * * @param s The string. * @param len The length of the string (not including padding). * @param capacity The allocated length of the string, including padding. */ explicit inline padded_string_view(const char* s, size_t len, size_t capacity) noexcept; /** overload explicit inline padded_string_view(const char* s, size_t len) noexcept */ explicit inline padded_string_view(const uint8_t* s, size_t len, size_t capacity) noexcept; /** * Promise the given string has at least SIMDJSON_PADDING extra bytes allocated to it. * * The capacity of the string will be used to determine its padding. * * @param s The string. */ explicit inline padded_string_view(const std::string &s) noexcept; /** * Promise the given string_view has at least SIMDJSON_PADDING extra bytes allocated to it. * * @param s The string. * @param capacity The allocated length of the string, including padding. */ explicit inline padded_string_view(std::string_view s, size_t capacity) noexcept; /** The number of allocated bytes. */ inline size_t capacity() const noexcept; /** * Remove the UTF-8 Byte Order Mark (BOM) if it exists. * * @return whether a BOM was found and removed */ inline bool remove_utf8_bom() noexcept; /** The amount of padding on the string (capacity() - length()) */ inline size_t padding() const noexcept; }; // padded_string_view #if SIMDJSON_EXCEPTIONS /** * Send padded_string instance to an output stream. * * @param out The output stream. * @param s The padded_string_view. * @throw simdjson_error if the result being printed has an error. If there is an error with the * underlying output stream, that error will be propagated (simdjson_error will not be * thrown). */ inline std::ostream& operator<<(std::ostream& out, simdjson_result &s) noexcept(false); #endif } // namespace simdjson #endif // SIMDJSON_PADDED_STRING_VIEW_H /* end file simdjson/padded_string_view.h */ /* skipped duplicate #include "simdjson/error-inl.h" */ /* including simdjson/padded_string_view-inl.h: #include "simdjson/padded_string_view-inl.h" */ /* begin file simdjson/padded_string_view-inl.h */ #ifndef SIMDJSON_PADDED_STRING_VIEW_INL_H #define SIMDJSON_PADDED_STRING_VIEW_INL_H /* skipped duplicate #include "simdjson/padded_string_view.h" */ /* skipped duplicate #include "simdjson/error-inl.h" */ #include /* memcmp */ namespace simdjson { inline padded_string_view::padded_string_view(const char* s, size_t len, size_t capacity) noexcept : std::string_view(s, len), _capacity(capacity) { } inline padded_string_view::padded_string_view(const uint8_t* s, size_t len, size_t capacity) noexcept : padded_string_view(reinterpret_cast(s), len, capacity) { } inline padded_string_view::padded_string_view(const std::string &s) noexcept : std::string_view(s), _capacity(s.capacity()) { } inline padded_string_view::padded_string_view(std::string_view s, size_t capacity) noexcept : std::string_view(s), _capacity(capacity) { } inline size_t padded_string_view::capacity() const noexcept { return _capacity; } inline size_t padded_string_view::padding() const noexcept { return capacity() - length(); } inline bool padded_string_view::remove_utf8_bom() noexcept { if(length() < 3) { return false; } if (std::memcmp(data(), "\xEF\xBB\xBF", 3) == 0) { remove_prefix(3); _capacity -= 3; return true; } return false; } #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson_result &s) noexcept(false) { return out << s.value(); } #endif } // namespace simdjson #endif // SIMDJSON_PADDED_STRING_VIEW_INL_H /* end file simdjson/padded_string_view-inl.h */ #include namespace simdjson { namespace internal { // The allocate_padded_buffer function is a low-level function to allocate memory // with padding so we can read past the "length" bytes safely. It is used by // the padded_string class automatically. It returns nullptr in case // of error: the caller should check for a null pointer. // The length parameter is the maximum size in bytes of the string. // The caller is responsible to free the memory (e.g., delete[] (...)). inline char *allocate_padded_buffer(size_t length) noexcept { const size_t totalpaddedlength = length + SIMDJSON_PADDING; if(totalpaddedlength(1UL<<20)) { return nullptr; } #endif char *padded_buffer = new (std::nothrow) char[totalpaddedlength]; if (padded_buffer == nullptr) { return nullptr; } // We write nulls in the padded region to avoid having uninitialized // content which may trigger warning for some sanitizers std::memset(padded_buffer + length, 0, totalpaddedlength - length); return padded_buffer; } // allocate_padded_buffer() } // namespace internal inline padded_string::padded_string() noexcept = default; inline padded_string::padded_string(size_t length) noexcept : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) { } inline padded_string::padded_string(const char *data, size_t length) noexcept : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) { if ((data != nullptr) && (data_ptr != nullptr)) { std::memcpy(data_ptr, data, length); } } // note: do not pass std::string arguments by value inline padded_string::padded_string(const std::string & str_ ) noexcept : viable_size(str_.size()), data_ptr(internal::allocate_padded_buffer(str_.size())) { if (data_ptr != nullptr) { std::memcpy(data_ptr, str_.data(), str_.size()); } } // note: do pass std::string_view arguments by value inline padded_string::padded_string(std::string_view sv_) noexcept : viable_size(sv_.size()), data_ptr(internal::allocate_padded_buffer(sv_.size())) { if(simdjson_unlikely(!data_ptr)) { //allocation failed or zero size viable_size = 0; return; } if (sv_.size()) { std::memcpy(data_ptr, sv_.data(), sv_.size()); } } inline padded_string::padded_string(padded_string &&o) noexcept : viable_size(o.viable_size), data_ptr(o.data_ptr) { o.data_ptr = nullptr; // we take ownership } inline padded_string &padded_string::operator=(padded_string &&o) noexcept { delete[] data_ptr; data_ptr = o.data_ptr; viable_size = o.viable_size; o.data_ptr = nullptr; // we take ownership o.viable_size = 0; return *this; } inline void padded_string::swap(padded_string &o) noexcept { size_t tmp_viable_size = viable_size; char *tmp_data_ptr = data_ptr; viable_size = o.viable_size; data_ptr = o.data_ptr; o.data_ptr = tmp_data_ptr; o.viable_size = tmp_viable_size; } inline padded_string::~padded_string() noexcept { delete[] data_ptr; } inline size_t padded_string::size() const noexcept { return viable_size; } inline size_t padded_string::length() const noexcept { return viable_size; } inline const char *padded_string::data() const noexcept { return data_ptr; } inline char *padded_string::data() noexcept { return data_ptr; } inline padded_string::operator std::string_view() const { return std::string_view(data(), length()); } inline padded_string::operator padded_string_view() const noexcept { return padded_string_view(data(), length(), length() + SIMDJSON_PADDING); } inline simdjson_result padded_string::load(std::string_view filename) noexcept { // Open the file SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe std::FILE *fp = std::fopen(filename.data(), "rb"); SIMDJSON_POP_DISABLE_WARNINGS if (fp == nullptr) { return IO_ERROR; } // Get the file size int ret; #if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS ret = _fseeki64(fp, 0, SEEK_END); #else ret = std::fseek(fp, 0, SEEK_END); #endif // _WIN64 if(ret < 0) { std::fclose(fp); return IO_ERROR; } #if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS __int64 llen = _ftelli64(fp); if(llen == -1L) { std::fclose(fp); return IO_ERROR; } #else long llen = std::ftell(fp); if((llen < 0) || (llen == LONG_MAX)) { std::fclose(fp); return IO_ERROR; } #endif // Allocate the padded_string size_t len = static_cast(llen); padded_string s(len); if (s.data() == nullptr) { std::fclose(fp); return MEMALLOC; } // Read the padded_string std::rewind(fp); size_t bytes_read = std::fread(s.data(), 1, len, fp); if (std::fclose(fp) != 0 || bytes_read != len) { return IO_ERROR; } return s; } } // namespace simdjson inline simdjson::padded_string operator "" _padded(const char *str, size_t len) { return simdjson::padded_string(str, len); } #endif // SIMDJSON_PADDED_STRING_INL_H /* end file simdjson/padded_string-inl.h */ /* skipped duplicate #include "simdjson/padded_string_view.h" */ /* skipped duplicate #include "simdjson/padded_string_view-inl.h" */ /* including simdjson/dom.h: #include "simdjson/dom.h" */ /* begin file simdjson/dom.h */ #ifndef SIMDJSON_DOM_H #define SIMDJSON_DOM_H /* including simdjson/dom/base.h: #include "simdjson/dom/base.h" */ /* begin file simdjson/dom/base.h */ #ifndef SIMDJSON_DOM_BASE_H #define SIMDJSON_DOM_BASE_H /* skipped duplicate #include "simdjson/base.h" */ namespace simdjson { /** * @brief A DOM API on top of the simdjson parser. */ namespace dom { /** The default batch size for parser.parse_many() and parser.load_many() */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; /** * Some adversary might try to set the batch size to 0 or 1, which might cause problems. * We set a minimum of 32B since anything else is highly likely to be an error. In practice, * most users will want a much larger batch size. * * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. */ static constexpr size_t MINIMAL_BATCH_SIZE = 32; /** * It is wasteful to allocate memory for tiny documents (e.g., 4 bytes). */ static constexpr size_t MINIMAL_DOCUMENT_CAPACITY = 32; class array; class document; class document_stream; class element; class key_value_pair; class object; class parser; #ifdef SIMDJSON_THREADS_ENABLED struct stage1_worker; #endif // SIMDJSON_THREADS_ENABLED } // namespace dom namespace internal { template class string_builder; class tape_ref; } // namespace internal } // namespace simdjson #endif // SIMDJSON_DOM_BASE_H /* end file simdjson/dom/base.h */ /* including simdjson/dom/array.h: #include "simdjson/dom/array.h" */ /* begin file simdjson/dom/array.h */ #ifndef SIMDJSON_DOM_ARRAY_H #define SIMDJSON_DOM_ARRAY_H /* skipped duplicate #include "simdjson/dom/base.h" */ /* including simdjson/internal/tape_ref.h: #include "simdjson/internal/tape_ref.h" */ /* begin file simdjson/internal/tape_ref.h */ #ifndef SIMDJSON_INTERNAL_TAPE_REF_H #define SIMDJSON_INTERNAL_TAPE_REF_H /* skipped duplicate #include "simdjson/base.h" */ namespace simdjson { namespace dom { class document; } // namespace dom namespace internal { /** * A reference to an element on the tape. Internal only. */ class tape_ref { public: simdjson_inline tape_ref() noexcept; simdjson_inline tape_ref(const dom::document *doc, size_t json_index) noexcept; inline size_t after_element() const noexcept; simdjson_inline tape_type tape_ref_type() const noexcept; simdjson_inline uint64_t tape_value() const noexcept; simdjson_inline bool is_double() const noexcept; simdjson_inline bool is_int64() const noexcept; simdjson_inline bool is_uint64() const noexcept; simdjson_inline bool is_false() const noexcept; simdjson_inline bool is_true() const noexcept; simdjson_inline bool is_null_on_tape() const noexcept;// different name to avoid clash with is_null. simdjson_inline uint32_t matching_brace_index() const noexcept; simdjson_inline uint32_t scope_count() const noexcept; template simdjson_inline T next_tape_value() const noexcept; simdjson_inline uint32_t get_string_length() const noexcept; simdjson_inline const char * get_c_str() const noexcept; inline std::string_view get_string_view() const noexcept; simdjson_inline bool is_document_root() const noexcept; simdjson_inline bool usable() const noexcept; /** The document this element references. */ const dom::document *doc; /** The index of this element on `doc.tape[]` */ size_t json_index; }; } // namespace internal } // namespace simdjson #endif // SIMDJSON_INTERNAL_TAPE_REF_H /* end file simdjson/internal/tape_ref.h */ namespace simdjson { namespace dom { /** * JSON array. */ class array { public: /** Create a new, invalid array */ simdjson_inline array() noexcept; class iterator { public: using value_type = element; using difference_type = std::ptrdiff_t; /** * Get the actual value */ inline value_type operator*() const noexcept; /** * Get the next value. * * Part of the std::iterator interface. */ inline iterator& operator++() noexcept; /** * Get the next value. * * Part of the std::iterator interface. */ inline iterator operator++(int) noexcept; /** * Check if these values come from the same place in the JSON. * * Part of the std::iterator interface. */ inline bool operator!=(const iterator& other) const noexcept; inline bool operator==(const iterator& other) const noexcept; inline bool operator<(const iterator& other) const noexcept; inline bool operator<=(const iterator& other) const noexcept; inline bool operator>=(const iterator& other) const noexcept; inline bool operator>(const iterator& other) const noexcept; iterator() noexcept = default; iterator(const iterator&) noexcept = default; iterator& operator=(const iterator&) noexcept = default; private: simdjson_inline iterator(const internal::tape_ref &tape) noexcept; internal::tape_ref tape; friend class array; }; /** * Return the first array element. * * Part of the std::iterable interface. */ inline iterator begin() const noexcept; /** * One past the last array element. * * Part of the std::iterable interface. */ inline iterator end() const noexcept; /** * Get the size of the array (number of immediate children). * It is a saturated value with a maximum of 0xFFFFFF: if the value * is 0xFFFFFF then the size is 0xFFFFFF or greater. */ inline size_t size() const noexcept; /** * Get the total number of slots used by this array on the tape. * * Note that this is not the same thing as `size()`, which reports the * number of actual elements within an array (not counting its children). * * Since an element can use 1 or 2 slots on the tape, you can only use this * to figure out the total size of an array (including its children, * recursively) if you know its structure ahead of time. **/ inline size_t number_of_slots() const noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node * as the root of its own JSON document. * * dom::parser parser; * array a = parser.parse(R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded); * a.at_pointer("/0/foo/a/1") == 20 * a.at_pointer("0")["foo"]["a"].at(1) == 20 * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; /** * Get the value at the given index. This function has linear-time complexity and * is equivalent to the following: * * size_t i=0; * for (auto element : *this) { * if (i == index) { return element; } * i++; * } * return INDEX_OUT_OF_BOUNDS; * * Avoid calling the at() function repeatedly. * * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ inline simdjson_result at(size_t index) const noexcept; private: simdjson_inline array(const internal::tape_ref &tape) noexcept; internal::tape_ref tape; friend class element; friend struct simdjson_result; template friend class simdjson::internal::string_builder; }; } // namespace dom /** The result of a JSON conversion that may fail. */ template<> struct simdjson_result : public internal::simdjson_result_base { public: simdjson_inline simdjson_result() noexcept; ///< @private simdjson_inline simdjson_result(dom::array value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; inline simdjson_result at(size_t index) const noexcept; #if SIMDJSON_EXCEPTIONS inline dom::array::iterator begin() const noexcept(false); inline dom::array::iterator end() const noexcept(false); inline size_t size() const noexcept(false); #endif // SIMDJSON_EXCEPTIONS }; } // namespace simdjson #if defined(__cpp_lib_ranges) #include namespace std { namespace ranges { template<> inline constexpr bool enable_view = true; #if SIMDJSON_EXCEPTIONS template<> inline constexpr bool enable_view> = true; #endif // SIMDJSON_EXCEPTIONS } // namespace ranges } // namespace std #endif // defined(__cpp_lib_ranges) #endif // SIMDJSON_DOM_ARRAY_H /* end file simdjson/dom/array.h */ /* including simdjson/dom/document_stream.h: #include "simdjson/dom/document_stream.h" */ /* begin file simdjson/dom/document_stream.h */ #ifndef SIMDJSON_DOCUMENT_STREAM_H #define SIMDJSON_DOCUMENT_STREAM_H /* skipped duplicate #include "simdjson/dom/base.h" */ /* including simdjson/dom/parser.h: #include "simdjson/dom/parser.h" */ /* begin file simdjson/dom/parser.h */ #ifndef SIMDJSON_DOM_PARSER_H #define SIMDJSON_DOM_PARSER_H /* skipped duplicate #include "simdjson/dom/base.h" */ /* including simdjson/dom/document.h: #include "simdjson/dom/document.h" */ /* begin file simdjson/dom/document.h */ #ifndef SIMDJSON_DOM_DOCUMENT_H #define SIMDJSON_DOM_DOCUMENT_H /* skipped duplicate #include "simdjson/dom/base.h" */ #include namespace simdjson { namespace dom { /** * A parsed JSON document. * * This class cannot be copied, only moved, to avoid unintended allocations. */ class document { public: /** * Create a document container with zero capacity. * * The parser will allocate capacity as needed. */ document() noexcept = default; ~document() noexcept = default; /** * Take another document's buffers. * * @param other The document to take. Its capacity is zeroed and it is invalidated. */ document(document &&other) noexcept = default; /** @private */ document(const document &) = delete; // Disallow copying /** * Take another document's buffers. * * @param other The document to take. Its capacity is zeroed. */ document &operator=(document &&other) noexcept = default; /** @private */ document &operator=(const document &) = delete; // Disallow copying /** * Get the root element of this document as a JSON array. */ element root() const noexcept; /** * @private Dump the raw tape for debugging. * * @param os the stream to output to. * @return false if the tape is likely wrong (e.g., you did not parse a valid JSON). */ bool dump_raw_tape(std::ostream &os) const noexcept; /** @private Structural values. */ std::unique_ptr tape{}; /** @private String values. * * Should be at least byte_capacity. */ std::unique_ptr string_buf{}; /** @private Allocate memory to support * input JSON documents of up to len bytes. * * When calling this function, you lose * all the data. * * The memory allocation is strict: you * can you use this function to increase * or lower the amount of allocated memory. * Passsing zero clears the memory. */ error_code allocate(size_t len) noexcept; /** @private Capacity in bytes, in terms * of how many bytes of input JSON we can * support. */ size_t capacity() const noexcept; private: size_t allocated_capacity{0}; friend class parser; }; // class document } // namespace dom } // namespace simdjson #endif // SIMDJSON_DOM_DOCUMENT_H /* end file simdjson/dom/document.h */ namespace simdjson { namespace dom { /** * A persistent document parser. * * The parser is designed to be reused, holding the internal buffers necessary to do parsing, * as well as memory for a single document. The parsed document is overwritten on each parse. * * This class cannot be copied, only moved, to avoid unintended allocations. * * @note Moving a parser instance may invalidate "dom::element" instances. If you need to * preserve both the "dom::element" instances and the parser, consider wrapping the parser * instance in a std::unique_ptr instance: * * std::unique_ptr parser(new dom::parser{}); * auto error = parser->load(f).get(root); * * You can then move std::unique_ptr safely. * * @note This is not thread safe: one parser cannot produce two documents at the same time! */ class parser { public: /** * Create a JSON parser. * * The new parser will have zero capacity. * * @param max_capacity The maximum document length the parser can automatically handle. The parser * will allocate more capacity on an as needed basis (when it sees documents too big to handle) * up to this amount. The parser still starts with zero capacity no matter what this number is: * to allocate an initial capacity, call allocate() after constructing the parser. * Defaults to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process). */ simdjson_inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; /** * Take another parser's buffers and state. * * @param other The parser to take. Its capacity is zeroed. */ simdjson_inline parser(parser &&other) noexcept; parser(const parser &) = delete; ///< @private Disallow copying /** * Take another parser's buffers and state. * * @param other The parser to take. Its capacity is zeroed. */ simdjson_inline parser &operator=(parser &&other) noexcept; parser &operator=(const parser &) = delete; ///< @private Disallow copying /** Deallocate the JSON parser. */ ~parser()=default; /** * Load a JSON document from a file and return a reference to it. * * dom::parser parser; * const element doc = parser.load("jsonexamples/twitter.json"); * * The function is eager: the file's content is loaded in memory inside the parser instance * and immediately parsed. The file can be deleted after the `parser.load` call. * * ### IMPORTANT: Document Lifetime * * The JSON document still lives in the parser: this is the most efficient way to parse JSON * documents because it reuses the same buffers, but you *must* use the document before you * destroy the parser or call parse() again. * * Moving the parser instance is safe, but it invalidates the element instances. You may store * the parser instance without moving it by wrapping it inside an `unique_ptr` instance like * so: `std::unique_ptr parser(new dom::parser{});`. * * ### Parser Capacity * * If the parser's current capacity is less than the file length, it will allocate enough capacity * to handle it (up to max_capacity). * * @param path The path to load. * @return The document, or an error: * - IO_ERROR if there was an error opening or reading the file. * Be mindful that on some 32-bit systems, * the file size might be limited to 2 GB. * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. * - CAPACITY if the parser does not have enough capacity and len > max_capacity. * - other json errors if parsing fails. You should not rely on these errors to always the same for the * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). */ inline simdjson_result load(const std::string &path) & noexcept; inline simdjson_result load(const std::string &path) && = delete ; /** * Parse a JSON document and return a temporary reference to it. * * dom::parser parser; * element doc_root = parser.parse(buf, len); * * The function eagerly parses the input: the input can be modified and discarded after * the `parser.parse(buf, len)` call has completed. * * ### IMPORTANT: Document Lifetime * * The JSON document still lives in the parser: this is the most efficient way to parse JSON * documents because it reuses the same buffers, but you *must* use the document before you * destroy the parser or call parse() again. * * Moving the parser instance is safe, but it invalidates the element instances. You may store * the parser instance without moving it by wrapping it inside an `unique_ptr` instance like * so: `std::unique_ptr parser(new dom::parser{});`. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * * If realloc_if_needed is true (the default), it is assumed that the buffer does *not* have enough padding, * and it is copied into an enlarged temporary buffer before parsing. Thus the following is safe: * * const char *json = R"({"key":"value"})"; * const size_t json_len = std::strlen(json); * simdjson::dom::parser parser; * simdjson::dom::element element = parser.parse(json, json_len); * * If you set realloc_if_needed to false (e.g., parser.parse(json, json_len, false)), * you must provide a buffer with at least SIMDJSON_PADDING extra bytes at the end. * The benefit of setting realloc_if_needed to false is that you avoid a temporary * memory allocation and a copy. * * The padded bytes may be read. It is not important how you initialize * these bytes though we recommend a sensible default like null character values or spaces. * For example, the following low-level code is safe: * * const char *json = R"({"key":"value"})"; * const size_t json_len = std::strlen(json); * std::unique_ptr padded_json_copy{new char[json_len + SIMDJSON_PADDING]}; * std::memcpy(padded_json_copy.get(), json, json_len); * std::memset(padded_json_copy.get() + json_len, '\0', SIMDJSON_PADDING); * simdjson::dom::parser parser; * simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false); * * ### Parser Capacity * * If the parser's current capacity is less than len, it will allocate enough capacity * to handle it (up to max_capacity). * * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless * realloc_if_needed is true. * @param len The length of the JSON. * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding. * @return An element pointing at the root of the document, or an error: * - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity, * and memory allocation fails. * - CAPACITY if the parser does not have enough capacity and len > max_capacity. * - other json errors if parsing fails. You should not rely on these errors to always the same for the * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). */ inline simdjson_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept; inline simdjson_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete; /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ simdjson_inline simdjson_result parse(const char *buf, size_t len, bool realloc_if_needed = true) & noexcept; simdjson_inline simdjson_result parse(const char *buf, size_t len, bool realloc_if_needed = true) && =delete; /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ simdjson_inline simdjson_result parse(const std::string &s) & noexcept; simdjson_inline simdjson_result parse(const std::string &s) && =delete; /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ simdjson_inline simdjson_result parse(const padded_string &s) & noexcept; simdjson_inline simdjson_result parse(const padded_string &s) && =delete; /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ simdjson_inline simdjson_result parse(const padded_string_view &v) & noexcept; simdjson_inline simdjson_result parse(const padded_string_view &v) && =delete; /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_inline simdjson_result parse(const char *buf) noexcept = delete; /** * Parse a JSON document into a provide document instance and return a temporary reference to it. * It is similar to the function `parse` except that instead of parsing into the internal * `document` instance associated with the parser, it allows the user to provide a document * instance. * * dom::parser parser; * dom::document doc; * element doc_root = parser.parse_into_document(doc, buf, len); * * The function eagerly parses the input: the input can be modified and discarded after * the `parser.parse(buf, len)` call has completed. * * ### IMPORTANT: Document Lifetime * * After the call to parse_into_document, the parser is no longer needed. * * The JSON document lives in the document instance: you must keep the document * instance alive while you navigate through it (i.e., used the returned value from * parse_into_document). You are encourage to reuse the document instance * many times with new data to avoid reallocations: * * dom::document doc; * element doc_root1 = parser.parse_into_document(doc, buf1, len); * //... doc_root1 is a pointer inside doc * element doc_root2 = parser.parse_into_document(doc, buf1, len); * //... doc_root2 is a pointer inside doc * // at this point doc_root1 is no longer safe * * Moving the document instance is safe, but it invalidates the element instances. After * moving a document, you can recover safe access to the document root with its `root()` method. * * @param doc The document instance where the parsed data will be stored (on success). * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless * realloc_if_needed is true. * @param len The length of the JSON. * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding. * @return An element pointing at the root of document, or an error: * - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity, * and memory allocation fails. * - CAPACITY if the parser does not have enough capacity and len > max_capacity. * - other json errors if parsing fails. You should not rely on these errors to always the same for the * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). */ inline simdjson_result parse_into_document(document& doc, const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept; inline simdjson_result parse_into_document(document& doc, const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete; /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ simdjson_inline simdjson_result parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) & noexcept; simdjson_inline simdjson_result parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) && =delete; /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ simdjson_inline simdjson_result parse_into_document(document& doc, const std::string &s) & noexcept; simdjson_inline simdjson_result parse_into_document(document& doc, const std::string &s) && =delete; /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ simdjson_inline simdjson_result parse_into_document(document& doc, const padded_string &s) & noexcept; simdjson_inline simdjson_result parse_into_document(document& doc, const padded_string &s) && =delete; /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_inline simdjson_result parse_into_document(document& doc, const char *buf) noexcept = delete; /** * Load a file containing many JSON documents. * * dom::parser parser; * for (const element doc : parser.load_many(path)) { * cout << std::string(doc["title"]) << endl; * } * * The file is loaded in memory and can be safely deleted after the `parser.load_many(path)` * function has returned. The memory is held by the `parser` instance. * * The function is lazy: it may be that no more than one JSON document at a time is parsed. * And, possibly, no document many have been parsed when the `parser.load_many(path)` function * returned. * * If there is a UTF-8 BOM, the parser skips it. * * ### Format * * The file must contain a series of one or more JSON documents, concatenated into a single * buffer, separated by whitespace. It effectively parses until it has a fully valid document, * then starts parsing the next document at that point. (It does this with more parallelism and * lookahead than you might think, though.) * * Documents that consist of an object or array may omit the whitespace between them, concatenating * with no separator. documents that consist of a single primitive (i.e. documents that are not * arrays or objects) MUST be separated with whitespace. * * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. * Setting batch_size to excessively large or excesively small values may impact negatively the * performance. * * ### Error Handling * * All errors are returned during iteration: if there is a global error such as memory allocation, * it will be yielded as the first result. Iteration always stops after the first error. * * As with all other simdjson methods, non-exception error handling is readily available through * the same interface, requiring you to check the error before using the document: * * dom::parser parser; * dom::document_stream docs; * auto error = parser.load_many(path).get(docs); * if (error) { cerr << error << endl; exit(1); } * for (auto doc : docs) { * std::string_view title; * if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); } * cout << title << endl; * } * * ### Threads * * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the * hood to do some lookahead. * * ### Parser Capacity * * If the parser's current capacity is less than batch_size, it will allocate enough capacity * to handle it (up to max_capacity). * * @param path File name pointing at the concatenated JSON to parse. * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet * spot is cache-related: small enough to fit in cache, yet big enough to * parse as many documents as possible in one tight loop. * Defaults to 1MB (as simdjson::dom::DEFAULT_BATCH_SIZE), which has been a reasonable sweet * spot in our tests. * If you set the batch_size to a value smaller than simdjson::dom::MINIMAL_BATCH_SIZE * (currently 32B), it will be replaced by simdjson::dom::MINIMAL_BATCH_SIZE. * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: * - IO_ERROR if there was an error opening or reading the file. * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. * - other json errors if parsing fails. You should not rely on these errors to always the same for the * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). */ inline simdjson_result load_many(const std::string &path, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; /** * Parse a buffer containing many JSON documents. * * dom::parser parser; * for (element doc : parser.parse_many(buf, len)) { * cout << std::string(doc["title"]) << endl; * } * * No copy of the input buffer is made. * * The function is lazy: it may be that no more than one JSON document at a time is parsed. * And, possibly, no document many have been parsed when the `parser.load_many(path)` function * returned. * * The caller is responsabile to ensure that the input string data remains unchanged and is * not deleted during the loop. In particular, the following is unsafe and will not compile: * * auto docs = parser.parse_many("[\"temporary data\"]"_padded); * // here the string "[\"temporary data\"]" may no longer exist in memory * // the parser instance may not have even accessed the input yet * for (element doc : docs) { * cout << std::string(doc["title"]) << endl; * } * * The following is safe: * * auto json = "[\"temporary data\"]"_padded; * auto docs = parser.parse_many(json); * for (element doc : docs) { * cout << std::string(doc["title"]) << endl; * } * * If there is a UTF-8 BOM, the parser skips it. * * ### Format * * The buffer must contain a series of one or more JSON documents, concatenated into a single * buffer, separated by whitespace. It effectively parses until it has a fully valid document, * then starts parsing the next document at that point. (It does this with more parallelism and * lookahead than you might think, though.) * * documents that consist of an object or array may omit the whitespace between them, concatenating * with no separator. documents that consist of a single primitive (i.e. documents that are not * arrays or objects) MUST be separated with whitespace. * * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. * Setting batch_size to excessively large or excesively small values may impact negatively the * performance. * * ### Error Handling * * All errors are returned during iteration: if there is a global error such as memory allocation, * it will be yielded as the first result. Iteration always stops after the first error. * * As with all other simdjson methods, non-exception error handling is readily available through * the same interface, requiring you to check the error before using the document: * * dom::parser parser; * dom::document_stream docs; * auto error = parser.load_many(path).get(docs); * if (error) { cerr << error << endl; exit(1); } * for (auto doc : docs) { * std::string_view title; * if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); } * cout << title << endl; * } * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * * ### Threads * * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the * hood to do some lookahead. * * ### Parser Capacity * * If the parser's current capacity is less than batch_size, it will allocate enough capacity * to handle it (up to max_capacity). * * @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes. * @param len The length of the concatenated JSON. * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet * spot is cache-related: small enough to fit in cache, yet big enough to * parse as many documents as possible in one tight loop. * Defaults to 10MB, which has been a reasonable sweet spot in our tests. * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: * - MEMALLOC if the parser does not have enough capacity and memory allocation fails * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. * - other json errors if parsing fails. You should not rely on these errors to always the same for the * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). */ inline simdjson_result parse_many(const uint8_t *buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result parse_many(const char *buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result parse_many(const std::string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; inline simdjson_result parse_many(const std::string &&s, size_t batch_size) = delete;// unsafe /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result parse_many(const padded_string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; inline simdjson_result parse_many(const padded_string &&s, size_t batch_size) = delete;// unsafe /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result parse_many(const char *buf, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept = delete; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length * and `max_depth` depth. * * @param capacity The new capacity. * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. * @return The error, if there is one. */ simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept; #ifndef SIMDJSON_DISABLE_DEPRECATED_API /** * @private deprecated because it returns bool instead of error_code, which is our standard for * failures. Use allocate() instead. * * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length * and `max_depth` depth. * * @param capacity The new capacity. * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. * @return true if successful, false if allocation failed. */ [[deprecated("Use allocate() instead.")]] simdjson_warn_unused inline bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept; #endif // SIMDJSON_DISABLE_DEPRECATED_API /** * The largest document this parser can support without reallocating. * * @return Current capacity, in bytes. */ simdjson_inline size_t capacity() const noexcept; /** * The largest document this parser can automatically support. * * The parser may reallocate internal buffers as needed up to this amount. * * @return Maximum capacity, in bytes. */ simdjson_inline size_t max_capacity() const noexcept; /** * The maximum level of nested object and arrays supported by this parser. * * @return Maximum depth, in bytes. */ simdjson_inline size_t max_depth() const noexcept; /** * Set max_capacity. This is the largest document this parser can automatically support. * * The parser may reallocate internal buffers as needed up to this amount as documents are passed * to it. * * Note: To avoid limiting the memory to an absurd value, such as zero or two bytes, * iff you try to set max_capacity to a value lower than MINIMAL_DOCUMENT_CAPACITY, * then the maximal capacity is set to MINIMAL_DOCUMENT_CAPACITY. * * This call will not allocate or deallocate, even if capacity is currently above max_capacity. * * @param max_capacity The new maximum capacity, in bytes. */ simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; #ifdef SIMDJSON_THREADS_ENABLED /** * The parser instance can use threads when they are available to speed up some * operations. It is enabled by default. Changing this attribute will change the * behavior of the parser for future operations. */ bool threaded{true}; #endif /** @private Use the new DOM API instead */ class Iterator; /** @private Use simdjson_error instead */ using InvalidJSON [[deprecated("Use simdjson_error instead")]] = simdjson_error; /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; /** @private Use `if (parser.parse(...).error())` instead */ bool valid{false}; /** @private Use `parser.parse(...).error()` instead */ error_code error{UNINITIALIZED}; /** @private Use `parser.parse(...).value()` instead */ document doc{}; /** @private returns true if the document parsed was valid */ [[deprecated("Use the result of parser.parse() instead")]] inline bool is_valid() const noexcept; /** * @private return an error code corresponding to the last parsing attempt, see * simdjson.h will return UNINITIALIZED if no parsing was attempted */ [[deprecated("Use the result of parser.parse() instead")]] inline int get_error_code() const noexcept; /** @private return the string equivalent of "get_error_code" */ [[deprecated("Use error_message() on the result of parser.parse() instead, or cout << error")]] inline std::string get_error_message() const noexcept; /** @private */ [[deprecated("Use cout << on the result of parser.parse() instead")]] inline bool print_json(std::ostream &os) const noexcept; /** @private Private and deprecated: use `parser.parse(...).doc.dump_raw_tape()` instead */ inline bool dump_raw_tape(std::ostream &os) const noexcept; private: /** * The maximum document length this parser will automatically support. * * The parser will not be automatically allocated above this amount. */ size_t _max_capacity; /** * The loaded buffer (reused each time load() is called) */ std::unique_ptr loaded_bytes; /** Capacity of loaded_bytes buffer. */ size_t _loaded_bytes_capacity{0}; // all nodes are stored on the doc.tape using a 64-bit word. // // strings, double and ints are stored as // a 64-bit word with a pointer to the actual value // // // // for objects or arrays, store [ or { at the beginning and } and ] at the // end. For the openings ([ or {), we annotate them with a reference to the // location on the doc.tape of the end, and for then closings (} and ]), we // annotate them with a reference to the location of the opening // // /** * Ensure we have enough capacity to handle at least desired_capacity bytes, * and auto-allocate if not. This also allocates memory if needed in the * internal document. */ inline error_code ensure_capacity(size_t desired_capacity) noexcept; /** * Ensure we have enough capacity to handle at least desired_capacity bytes, * and auto-allocate if not. This also allocates memory if needed in the * provided document. */ inline error_code ensure_capacity(document& doc, size_t desired_capacity) noexcept; /** Read the file into loaded_bytes */ inline simdjson_result read_file(const std::string &path) noexcept; friend class parser::Iterator; friend class document_stream; }; // class parser } // namespace dom } // namespace simdjson #endif // SIMDJSON_DOM_PARSER_H /* end file simdjson/dom/parser.h */ #ifdef SIMDJSON_THREADS_ENABLED #include #include #include #endif namespace simdjson { namespace dom { #ifdef SIMDJSON_THREADS_ENABLED /** @private Custom worker class **/ struct stage1_worker { stage1_worker() noexcept = default; stage1_worker(const stage1_worker&) = delete; stage1_worker(stage1_worker&&) = delete; stage1_worker operator=(const stage1_worker&) = delete; ~stage1_worker(); /** * We only start the thread when it is needed, not at object construction, this may throw. * You should only call this once. **/ void start_thread(); /** * Start a stage 1 job. You should first call 'run', then 'finish'. * You must call start_thread once before. */ void run(document_stream * ds, dom::parser * stage1, size_t next_batch_start); /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ void finish(); private: /** * Normally, we would never stop the thread. But we do in the destructor. * This function is only safe assuming that you are not waiting for results. You * should have called run, then finish, and be done. **/ void stop_thread(); std::thread thread{}; /** These three variables define the work done by the thread. **/ dom::parser * stage1_thread_parser{}; size_t _next_batch_start{}; document_stream * owner{}; /** * We have two state variables. This could be streamlined to one variable in the future but * we use two for clarity. */ bool has_work{false}; bool can_work{true}; /** * We lock using a mutex. */ std::mutex locking_mutex{}; std::condition_variable cond_var{}; }; #endif /** * A forward-only stream of documents. * * Produced by parser::parse_many. * */ class document_stream { public: /** * Construct an uninitialized document_stream. * * ```c++ * document_stream docs; * error = parser.parse_many(json).get(docs); * ``` */ simdjson_inline document_stream() noexcept; /** Move one document_stream to another. */ simdjson_inline document_stream(document_stream &&other) noexcept = default; /** Move one document_stream to another. */ simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; simdjson_inline ~document_stream() noexcept; /** * Returns the input size in bytes. */ inline size_t size_in_bytes() const noexcept; /** * After iterating through the stream, this method * returns the number of bytes that were not parsed at the end * of the stream. If truncated_bytes() differs from zero, * then the input was truncated maybe because incomplete JSON * documents were found at the end of the stream. You * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). * * You should only call truncated_bytes() after streaming through all * documents, like so: * * document_stream stream = parser.parse_many(json,window); * for(auto doc : stream) { * // do something with doc * } * size_t truncated = stream.truncated_bytes(); * */ inline size_t truncated_bytes() const noexcept; /** * An iterator through a forward-only stream of documents. */ class iterator { public: using value_type = simdjson_result; using reference = value_type; using difference_type = std::ptrdiff_t; using iterator_category = std::input_iterator_tag; /** * Default constructor. */ simdjson_inline iterator() noexcept; /** * Get the current document (or error). */ simdjson_inline reference operator*() noexcept; /** * Advance to the next document (prefix). */ inline iterator& operator++() noexcept; /** * Check if we're at the end yet. * @param other the end iterator to compare to. */ simdjson_inline bool operator!=(const iterator &other) const noexcept; /** * @private * * Gives the current index in the input document in bytes. * * document_stream stream = parser.parse_many(json,window); * for(auto i = stream.begin(); i != stream.end(); ++i) { * auto doc = *i; * size_t index = i.current_index(); * } * * This function (current_index()) is experimental and the usage * may change in future versions of simdjson: we find the API somewhat * awkward and we would like to offer something friendlier. */ simdjson_inline size_t current_index() const noexcept; /** * @private * * Gives a view of the current document. * * document_stream stream = parser.parse_many(json,window); * for(auto i = stream.begin(); i != stream.end(); ++i) { * auto doc = *i; * std::string_view v = i->source(); * } * * The returned string_view instance is simply a map to the (unparsed) * source string: it may thus include white-space characters and all manner * of padding. * * This function (source()) is experimental and the usage * may change in future versions of simdjson: we find the API somewhat * awkward and we would like to offer something friendlier. */ simdjson_inline std::string_view source() const noexcept; private: simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ document_stream* stream; /** Whether we're finished or not. */ bool finished; friend class document_stream; }; /** * Start iterating the documents in the stream. */ simdjson_inline iterator begin() noexcept; /** * The end of the stream, for iterator comparison purposes. */ simdjson_inline iterator end() noexcept; private: document_stream &operator=(const document_stream &) = delete; // Disallow copying document_stream(const document_stream &other) = delete; // Disallow copying /** * Construct a document_stream. Does not allocate or parse anything until the iterator is * used. * * @param parser is a reference to the parser instance used to generate this document_stream * @param buf is the raw byte buffer we need to process * @param len is the length of the raw byte buffer in bytes * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) */ simdjson_inline document_stream( dom::parser &parser, const uint8_t *buf, size_t len, size_t batch_size ) noexcept; /** * Parse the first document in the buffer. Used by begin(), to handle allocation and * initialization. */ inline void start() noexcept; /** * Parse the next document found in the buffer previously given to document_stream. * * The content should be a valid JSON document encoded as UTF-8. If there is a * UTF-8 BOM, the parser skips it. * * You do NOT need to pre-allocate a parser. This function takes care of * pre-allocating a capacity defined by the batch_size defined when creating the * document_stream object. * * The function returns simdjson::EMPTY if there is no more data to be parsed. * * The function returns simdjson::SUCCESS (as integer = 0) in case of success * and indicates that the buffer has successfully been parsed to the end. * Every document it contained has been parsed without error. * * The function returns an error code from simdjson/simdjson.h in case of failure * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; * the simdjson::error_message function converts these error codes into a string). * * You can also check validity by calling parser.is_valid(). The same parser can * and should be reused for the other documents in the buffer. */ inline void next() noexcept; /** * Pass the next batch through stage 1 and return when finished. * When threads are enabled, this may wait for the stage 1 thread to finish. */ inline void load_batch() noexcept; /** Get the next document index. */ inline size_t next_batch_start() const noexcept; /** Pass the next batch through stage 1 with the given parser. */ inline error_code run_stage1(dom::parser &p, size_t batch_start) noexcept; dom::parser *parser; const uint8_t *buf; size_t len; size_t batch_size; /** The error (or lack thereof) from the current document. */ error_code error; size_t batch_start{0}; size_t doc_index{}; #ifdef SIMDJSON_THREADS_ENABLED /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ bool use_thread; inline void load_from_stage1_thread() noexcept; /** Start a thread to run stage 1 on the next batch. */ inline void start_stage1_thread() noexcept; /** Wait for the stage 1 thread to finish and capture the results. */ inline void finish_stage1_thread() noexcept; /** The error returned from the stage 1 thread. */ error_code stage1_thread_error{UNINITIALIZED}; /** The thread used to run stage 1 against the next batch in the background. */ friend struct stage1_worker; std::unique_ptr worker{new(std::nothrow) stage1_worker()}; /** * The parser used to run stage 1 in the background. Will be swapped * with the regular parser when finished. */ dom::parser stage1_thread_parser{}; #endif // SIMDJSON_THREADS_ENABLED friend class dom::parser; friend struct simdjson_result; friend struct internal::simdjson_result_base; }; // class document_stream } // namespace dom template<> struct simdjson_result : public internal::simdjson_result_base { public: simdjson_inline simdjson_result() noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result(dom::document_stream &&value) noexcept; ///< @private #if SIMDJSON_EXCEPTIONS simdjson_inline dom::document_stream::iterator begin() noexcept(false); simdjson_inline dom::document_stream::iterator end() noexcept(false); #else // SIMDJSON_EXCEPTIONS #ifndef SIMDJSON_DISABLE_DEPRECATED_API [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]] simdjson_inline dom::document_stream::iterator begin() noexcept; [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]] simdjson_inline dom::document_stream::iterator end() noexcept; #endif // SIMDJSON_DISABLE_DEPRECATED_API #endif // SIMDJSON_EXCEPTIONS }; // struct simdjson_result } // namespace simdjson #endif // SIMDJSON_DOCUMENT_STREAM_H /* end file simdjson/dom/document_stream.h */ /* skipped duplicate #include "simdjson/dom/document.h" */ /* including simdjson/dom/element.h: #include "simdjson/dom/element.h" */ /* begin file simdjson/dom/element.h */ #ifndef SIMDJSON_DOM_ELEMENT_H #define SIMDJSON_DOM_ELEMENT_H /* skipped duplicate #include "simdjson/dom/base.h" */ /* skipped duplicate #include "simdjson/dom/array.h" */ namespace simdjson { namespace dom { /** * The actual concrete type of a JSON element * This is the type it is most easily cast to with get<>. */ enum class element_type { ARRAY = '[', ///< dom::array OBJECT = '{', ///< dom::object INT64 = 'l', ///< int64_t UINT64 = 'u', ///< uint64_t: any integer that fits in uint64_t but *not* int64_t DOUBLE = 'd', ///< double: Any number with a "." or "e" that fits in double. STRING = '"', ///< std::string_view BOOL = 't', ///< bool NULL_VALUE = 'n' ///< null }; /** * A JSON element. * * References an element in a JSON document, representing a JSON null, boolean, string, number, * array or object. */ class element { public: /** Create a new, invalid element. */ simdjson_inline element() noexcept; /** The type of this element. */ simdjson_inline element_type type() const noexcept; /** * Cast this element to an array. * * @returns An object that can be used to iterate the array, or: * INCORRECT_TYPE if the JSON element is not an array. */ inline simdjson_result get_array() const noexcept; /** * Cast this element to an object. * * @returns An object that can be used to look up or iterate the object's fields, or: * INCORRECT_TYPE if the JSON element is not an object. */ inline simdjson_result get_object() const noexcept; /** * Cast this element to a null-terminated C string. * * The string is guaranteed to be valid UTF-8. * * The length of the string is given by get_string_length(). Because JSON strings * may contain null characters, it may be incorrect to use strlen to determine the * string length. * * It is possible to get a single string_view instance which represents both the string * content and its length: see get_string(). * * @returns A pointer to a null-terminated UTF-8 string. This string is stored in the parser and will * be invalidated the next time it parses a document or when it is destroyed. * Returns INCORRECT_TYPE if the JSON element is not a string. */ inline simdjson_result get_c_str() const noexcept; /** * Gives the length in bytes of the string. * * It is possible to get a single string_view instance which represents both the string * content and its length: see get_string(). * * @returns A string length in bytes. * Returns INCORRECT_TYPE if the JSON element is not a string. */ inline simdjson_result get_string_length() const noexcept; /** * Cast this element to a string. * * The string is guaranteed to be valid UTF-8. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next time it * parses a document or when it is destroyed. * Returns INCORRECT_TYPE if the JSON element is not a string. */ inline simdjson_result get_string() const noexcept; /** * Cast this element to a signed integer. * * @returns A signed 64-bit integer. * Returns INCORRECT_TYPE if the JSON element is not an integer, or NUMBER_OUT_OF_RANGE * if it is negative. */ inline simdjson_result get_int64() const noexcept; /** * Cast this element to an unsigned integer. * * @returns An unsigned 64-bit integer. * Returns INCORRECT_TYPE if the JSON element is not an integer, or NUMBER_OUT_OF_RANGE * if it is too large. */ inline simdjson_result get_uint64() const noexcept; /** * Cast this element to a double floating-point. * * @returns A double value. * Returns INCORRECT_TYPE if the JSON element is not a number. */ inline simdjson_result get_double() const noexcept; /** * Cast this element to a bool. * * @returns A bool value. * Returns INCORRECT_TYPE if the JSON element is not a boolean. */ inline simdjson_result get_bool() const noexcept; /** * Whether this element is a json array. * * Equivalent to is(). */ inline bool is_array() const noexcept; /** * Whether this element is a json object. * * Equivalent to is(). */ inline bool is_object() const noexcept; /** * Whether this element is a json string. * * Equivalent to is() or is(). */ inline bool is_string() const noexcept; /** * Whether this element is a json number that fits in a signed 64-bit integer. * * Equivalent to is(). */ inline bool is_int64() const noexcept; /** * Whether this element is a json number that fits in an unsigned 64-bit integer. * * Equivalent to is(). */ inline bool is_uint64() const noexcept; /** * Whether this element is a json number that fits in a double. * * Equivalent to is(). */ inline bool is_double() const noexcept; /** * Whether this element is a json number. * * Both integers and floating points will return true. */ inline bool is_number() const noexcept; /** * Whether this element is a json `true` or `false`. * * Equivalent to is(). */ inline bool is_bool() const noexcept; /** * Whether this element is a json `null`. */ inline bool is_null() const noexcept; /** * Tell whether the value can be cast to provided type (T). * * Supported types: * - Boolean: bool * - Number: double, uint64_t, int64_t * - String: std::string_view, const char * * - Array: dom::array * - Object: dom::object * * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object */ template simdjson_inline bool is() const noexcept; /** * Get the value as the provided type (T). * * Supported types: * - Boolean: bool * - Number: double, uint64_t, int64_t * - String: std::string_view, const char * * - Array: dom::array * - Object: dom::object * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array() or get_string() instead. * * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object * * @returns The value cast to the given type, or: * INCORRECT_TYPE if the value cannot be cast to the given type. */ template inline simdjson_result get() const noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " "The supported types are Boolean (bool), numbers (double, uint64_t, int64_t), " "strings (std::string_view, const char *), arrays (dom::array) and objects (dom::object). " "We recommand you use get_double(), get_bool(), get_uint64(), get_int64(), " "get_object(), get_array() or get_string() instead of the get template."); } /** * Get the value as the provided type (T). * * Supported types: * - Boolean: bool * - Number: double, uint64_t, int64_t * - String: std::string_view, const char * * - Array: dom::array * - Object: dom::object * * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object * * @param value The variable to set to the value. May not be set if there is an error. * * @returns The error that occurred, or SUCCESS if there was no error. */ template simdjson_warn_unused simdjson_inline error_code get(T &value) const noexcept; /** * Get the value as the provided type (T), setting error if it's not the given type. * * Supported types: * - Boolean: bool * - Number: double, uint64_t, int64_t * - String: std::string_view, const char * * - Array: dom::array * - Object: dom::object * * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object * * @param value The variable to set to the given type. value is undefined if there is an error. * @param error The variable to store the error. error is set to error_code::SUCCEED if there is an error. */ template inline void tie(T &value, error_code &error) && noexcept; #if SIMDJSON_EXCEPTIONS /** * Read this element as a boolean. * * @return The boolean value * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a boolean. */ inline operator bool() const noexcept(false); /** * Read this element as a null-terminated UTF-8 string. * * Be mindful that JSON allows strings to contain null characters. * * Does *not* convert other types to a string; requires that the JSON type of the element was * an actual string. * * @return The string value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string. */ inline explicit operator const char*() const noexcept(false); /** * Read this element as a null-terminated UTF-8 string. * * Does *not* convert other types to a string; requires that the JSON type of the element was * an actual string. * * @return The string value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string. */ inline operator std::string_view() const noexcept(false); /** * Read this element as an unsigned integer. * * @return The integer value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative */ inline operator uint64_t() const noexcept(false); /** * Read this element as an signed integer. * * @return The integer value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits */ inline operator int64_t() const noexcept(false); /** * Read this element as an double. * * @return The double value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a number * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative */ inline operator double() const noexcept(false); /** * Read this element as a JSON array. * * @return The JSON array. * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array */ inline operator array() const noexcept(false); /** * Read this element as a JSON object (key/value pairs). * * @return The JSON object. * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an object */ inline operator object() const noexcept(false); /** * Iterate over each element in this array. * * @return The beginning of the iteration. * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array */ inline dom::array::iterator begin() const noexcept(false); /** * Iterate over each element in this array. * * @return The end of the iteration. * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array */ inline dom::array::iterator end() const noexcept(false); #endif // SIMDJSON_EXCEPTIONS /** * Get the value associated with the given key. * * The key will be matched against **unescaped** JSON: * * dom::parser parser; * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object * - INCORRECT_TYPE if this is not an object */ inline simdjson_result operator[](std::string_view key) const noexcept; /** * Get the value associated with the given key. * * The key will be matched against **unescaped** JSON: * * dom::parser parser; * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object * - INCORRECT_TYPE if this is not an object */ inline simdjson_result operator[](const char *key) const noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard. * * dom::parser parser; * element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded); * doc.at_pointer("/foo/a/1") == 20 * doc.at_pointer("/foo")["a"].at(1) == 20 * doc.at_pointer("")["foo"]["a"].at(1) == 20 * * It is allowed for a key to be the empty string: * * dom::parser parser; * object obj = parser.parse(R"({ "": { "a": [ 10, 20, 30 ] }})"_padded); * obj.at_pointer("//a/1") == 20 * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result at_pointer(const std::string_view json_pointer) const noexcept; #ifndef SIMDJSON_DISABLE_DEPRECATED_API /** * * Version 0.4 of simdjson used an incorrect interpretation of the JSON Pointer standard * and allowed the following : * * dom::parser parser; * element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded); * doc.at("foo/a/1") == 20 * * Though it is intuitive, it is not compliant with RFC 6901 * https://tools.ietf.org/html/rfc6901 * * For standard compliance, use the at_pointer function instead. * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] inline simdjson_result at(const std::string_view json_pointer) const noexcept; #endif // SIMDJSON_DISABLE_DEPRECATED_API /** * Get the value at the given index. * * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ inline simdjson_result at(size_t index) const noexcept; /** * Get the value associated with the given key. * * The key will be matched against **unescaped** JSON: * * dom::parser parser; * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object */ inline simdjson_result at_key(std::string_view key) const noexcept; /** * Get the value associated with the given key in a case-insensitive manner. * * Note: The key will be matched against **unescaped** JSON. * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object */ inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; /** * operator< defines a total order for element allowing to use them in * ordered C++ STL containers * * @return TRUE if the key appears before the other one in the tape */ inline bool operator<(const element &other) const noexcept; /** * operator== allows to verify if two element values reference the * same JSON item * * @return TRUE if the two values references the same JSON element */ inline bool operator==(const element &other) const noexcept; /** @private for debugging. Prints out the root element. */ inline bool dump_raw_tape(std::ostream &out) const noexcept; private: simdjson_inline element(const internal::tape_ref &tape) noexcept; internal::tape_ref tape; friend class document; friend class object; friend class array; friend struct simdjson_result; template friend class simdjson::internal::string_builder; }; } // namespace dom /** The result of a JSON navigation that may fail. */ template<> struct simdjson_result : public internal::simdjson_result_base { public: simdjson_inline simdjson_result() noexcept; ///< @private simdjson_inline simdjson_result(dom::element &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result type() const noexcept; template simdjson_inline bool is() const noexcept; template simdjson_inline simdjson_result get() const noexcept; template simdjson_warn_unused simdjson_inline error_code get(T &value) const noexcept; simdjson_inline simdjson_result get_array() const noexcept; simdjson_inline simdjson_result get_object() const noexcept; simdjson_inline simdjson_result get_c_str() const noexcept; simdjson_inline simdjson_result get_string_length() const noexcept; simdjson_inline simdjson_result get_string() const noexcept; simdjson_inline simdjson_result get_int64() const noexcept; simdjson_inline simdjson_result get_uint64() const noexcept; simdjson_inline simdjson_result get_double() const noexcept; simdjson_inline simdjson_result get_bool() const noexcept; simdjson_inline bool is_array() const noexcept; simdjson_inline bool is_object() const noexcept; simdjson_inline bool is_string() const noexcept; simdjson_inline bool is_int64() const noexcept; simdjson_inline bool is_uint64() const noexcept; simdjson_inline bool is_double() const noexcept; simdjson_inline bool is_number() const noexcept; simdjson_inline bool is_bool() const noexcept; simdjson_inline bool is_null() const noexcept; simdjson_inline simdjson_result operator[](std::string_view key) const noexcept; simdjson_inline simdjson_result operator[](const char *key) const noexcept; simdjson_inline simdjson_result at_pointer(const std::string_view json_pointer) const noexcept; [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] simdjson_inline simdjson_result at(const std::string_view json_pointer) const noexcept; simdjson_inline simdjson_result at(size_t index) const noexcept; simdjson_inline simdjson_result at_key(std::string_view key) const noexcept; simdjson_inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator bool() const noexcept(false); simdjson_inline explicit operator const char*() const noexcept(false); simdjson_inline operator std::string_view() const noexcept(false); simdjson_inline operator uint64_t() const noexcept(false); simdjson_inline operator int64_t() const noexcept(false); simdjson_inline operator double() const noexcept(false); simdjson_inline operator dom::array() const noexcept(false); simdjson_inline operator dom::object() const noexcept(false); simdjson_inline dom::array::iterator begin() const noexcept(false); simdjson_inline dom::array::iterator end() const noexcept(false); #endif // SIMDJSON_EXCEPTIONS }; } // namespace simdjson #endif // SIMDJSON_DOM_DOCUMENT_H /* end file simdjson/dom/element.h */ /* including simdjson/dom/object.h: #include "simdjson/dom/object.h" */ /* begin file simdjson/dom/object.h */ #ifndef SIMDJSON_DOM_OBJECT_H #define SIMDJSON_DOM_OBJECT_H /* skipped duplicate #include "simdjson/dom/base.h" */ /* skipped duplicate #include "simdjson/dom/element.h" */ /* skipped duplicate #include "simdjson/internal/tape_ref.h" */ namespace simdjson { namespace dom { /** * JSON object. */ class object { public: /** Create a new, invalid object */ simdjson_inline object() noexcept; class iterator { public: using value_type = key_value_pair; using difference_type = std::ptrdiff_t; /** * Get the actual key/value pair */ inline const value_type operator*() const noexcept; /** * Get the next key/value pair. * * Part of the std::iterator interface. * */ inline iterator& operator++() noexcept; /** * Get the next key/value pair. * * Part of the std::iterator interface. * */ inline iterator operator++(int) noexcept; /** * Check if these values come from the same place in the JSON. * * Part of the std::iterator interface. */ inline bool operator!=(const iterator& other) const noexcept; inline bool operator==(const iterator& other) const noexcept; inline bool operator<(const iterator& other) const noexcept; inline bool operator<=(const iterator& other) const noexcept; inline bool operator>=(const iterator& other) const noexcept; inline bool operator>(const iterator& other) const noexcept; /** * Get the key of this key/value pair. */ inline std::string_view key() const noexcept; /** * Get the length (in bytes) of the key in this key/value pair. * You should expect this function to be faster than key().size(). */ inline uint32_t key_length() const noexcept; /** * Returns true if the key in this key/value pair is equal * to the provided string_view. */ inline bool key_equals(std::string_view o) const noexcept; /** * Returns true if the key in this key/value pair is equal * to the provided string_view in a case-insensitive manner. * Case comparisons may only be handled correctly for ASCII strings. */ inline bool key_equals_case_insensitive(std::string_view o) const noexcept; /** * Get the key of this key/value pair. */ inline const char *key_c_str() const noexcept; /** * Get the value of this key/value pair. */ inline element value() const noexcept; iterator() noexcept = default; iterator(const iterator&) noexcept = default; iterator& operator=(const iterator&) noexcept = default; private: simdjson_inline iterator(const internal::tape_ref &tape) noexcept; internal::tape_ref tape; friend class object; }; /** * Return the first key/value pair. * * Part of the std::iterable interface. */ inline iterator begin() const noexcept; /** * One past the last key/value pair. * * Part of the std::iterable interface. */ inline iterator end() const noexcept; /** * Get the size of the object (number of keys). * It is a saturated value with a maximum of 0xFFFFFF: if the value * is 0xFFFFFF then the size is 0xFFFFFF or greater. */ inline size_t size() const noexcept; /** * Get the value associated with the given key. * * The key will be matched against **unescaped** JSON: * * dom::parser parser; * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD * * This function has linear-time complexity: the keys are checked one by one. * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object * - INCORRECT_TYPE if this is not an object */ inline simdjson_result operator[](std::string_view key) const noexcept; /** * Get the value associated with the given key. * * The key will be matched against **unescaped** JSON: * * dom::parser parser; * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD * * This function has linear-time complexity: the keys are checked one by one. * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object * - INCORRECT_TYPE if this is not an object */ inline simdjson_result operator[](const char *key) const noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node * as the root of its own JSON document. * * dom::parser parser; * object obj = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded); * obj.at_pointer("/foo/a/1") == 20 * obj.at_pointer("/foo")["a"].at(1) == 20 * * It is allowed for a key to be the empty string: * * dom::parser parser; * object obj = parser.parse(R"({ "": { "a": [ 10, 20, 30 ] }})"_padded); * obj.at_pointer("//a/1") == 20 * obj.at_pointer("/")["a"].at(1) == 20 * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; /** * Get the value associated with the given key. * * The key will be matched against **unescaped** JSON: * * dom::parser parser; * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD * * This function has linear-time complexity: the keys are checked one by one. * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object */ inline simdjson_result at_key(std::string_view key) const noexcept; /** * Get the value associated with the given key in a case-insensitive manner. * It is only guaranteed to work over ASCII inputs. * * Note: The key will be matched against **unescaped** JSON. * * This function has linear-time complexity: the keys are checked one by one. * * @return The value associated with this field, or: * - NO_SUCH_FIELD if the field does not exist in the object */ inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; private: simdjson_inline object(const internal::tape_ref &tape) noexcept; internal::tape_ref tape; friend class element; friend struct simdjson_result; template friend class simdjson::internal::string_builder; }; /** * Key/value pair in an object. */ class key_value_pair { public: /** key in the key-value pair **/ std::string_view key; /** value in the key-value pair **/ element value; private: simdjson_inline key_value_pair(std::string_view _key, element _value) noexcept; friend class object; }; } // namespace dom /** The result of a JSON conversion that may fail. */ template<> struct simdjson_result : public internal::simdjson_result_base { public: simdjson_inline simdjson_result() noexcept; ///< @private simdjson_inline simdjson_result(dom::object value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private inline simdjson_result operator[](std::string_view key) const noexcept; inline simdjson_result operator[](const char *key) const noexcept; inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; inline simdjson_result at_key(std::string_view key) const noexcept; inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; #if SIMDJSON_EXCEPTIONS inline dom::object::iterator begin() const noexcept(false); inline dom::object::iterator end() const noexcept(false); inline size_t size() const noexcept(false); #endif // SIMDJSON_EXCEPTIONS }; } // namespace simdjson #if defined(__cpp_lib_ranges) #include namespace std { namespace ranges { template<> inline constexpr bool enable_view = true; #if SIMDJSON_EXCEPTIONS template<> inline constexpr bool enable_view> = true; #endif // SIMDJSON_EXCEPTIONS } // namespace ranges } // namespace std #endif // defined(__cpp_lib_ranges) #endif // SIMDJSON_DOM_OBJECT_H /* end file simdjson/dom/object.h */ /* skipped duplicate #include "simdjson/dom/parser.h" */ /* including simdjson/dom/serialization.h: #include "simdjson/dom/serialization.h" */ /* begin file simdjson/dom/serialization.h */ #ifndef SIMDJSON_SERIALIZATION_H #define SIMDJSON_SERIALIZATION_H /* skipped duplicate #include "simdjson/dom/base.h" */ /* skipped duplicate #include "simdjson/dom/element.h" */ /* skipped duplicate #include "simdjson/dom/object.h" */ #include namespace simdjson { /** * The string_builder template and mini_formatter class * are not part of our public API and are subject to change * at any time! */ namespace internal { template class base_formatter { public: /** Add a comma **/ simdjson_inline void comma(); /** Start an array, prints [ **/ simdjson_inline void start_array(); /** End an array, prints ] **/ simdjson_inline void end_array(); /** Start an array, prints { **/ simdjson_inline void start_object(); /** Start an array, prints } **/ simdjson_inline void end_object(); /** Prints a true **/ simdjson_inline void true_atom(); /** Prints a false **/ simdjson_inline void false_atom(); /** Prints a null **/ simdjson_inline void null_atom(); /** Prints a number **/ simdjson_inline void number(int64_t x); /** Prints a number **/ simdjson_inline void number(uint64_t x); /** Prints a number **/ simdjson_inline void number(double x); /** Prints a key (string + colon) **/ simdjson_inline void key(std::string_view unescaped); /** Prints a string. The string is escaped as needed. **/ simdjson_inline void string(std::string_view unescaped); /** Clears out the content. **/ simdjson_inline void clear(); /** * Get access to the buffer, it is owned by the instance, but * the user can make a copy. **/ simdjson_inline std::string_view str() const; /** Prints one character **/ simdjson_inline void one_char(char c); simdjson_inline void call_print_newline() { this->print_newline(); } simdjson_inline void call_print_indents(size_t depth) { this->print_indents(depth); } simdjson_inline void call_print_space() { this->print_space(); } protected: // implementation details (subject to change) /** Backing buffer **/ std::vector buffer{}; // not ideal! }; /** * @private This is the class that we expect to use with the string_builder * template. It tries to produce a compact version of the JSON element * as quickly as possible. */ class mini_formatter : public base_formatter { public: simdjson_inline void print_newline(); simdjson_inline void print_indents(size_t depth); simdjson_inline void print_space(); }; class pretty_formatter : public base_formatter { public: simdjson_inline void print_newline(); simdjson_inline void print_indents(size_t depth); simdjson_inline void print_space(); protected: int indent_step = 4; }; /** * @private The string_builder template allows us to construct * a string from a document element. It is parametrized * by a "formatter" which handles the details. Thus * the string_builder template could support both minification * and prettification, and various other tradeoffs. */ template class string_builder { public: /** Construct an initially empty builder, would print the empty string **/ string_builder() = default; /** Append an element to the builder (to be printed) **/ inline void append(simdjson::dom::element value); /** Append an array to the builder (to be printed) **/ inline void append(simdjson::dom::array value); /** Append an object to the builder (to be printed) **/ inline void append(simdjson::dom::object value); /** Reset the builder (so that it would print the empty string) **/ simdjson_inline void clear(); /** * Get access to the string. The string_view is owned by the builder * and it is invalid to use it after the string_builder has been * destroyed. * However you can make a copy of the string_view on memory that you * own. */ simdjson_inline std::string_view str() const; /** Append a key_value_pair to the builder (to be printed) **/ simdjson_inline void append(simdjson::dom::key_value_pair value); private: formatter format{}; }; } // internal namespace dom { /** * Print JSON to an output stream. * * @param out The output stream. * @param value The element. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. * * @param out The output stream. * @param value The array. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. * * @param out The output stream. * @param value The object. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif } // namespace dom /** * Converts JSON to a string. * * dom::parser parser; * element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded); * cout << to_string(doc) << endl; // prints [1,2,3] * */ template std::string to_string(T x) { // in C++, to_string is standard: http://www.cplusplus.com/reference/string/to_string/ // Currently minify and to_string are identical but in the future, they may // differ. simdjson::internal::string_builder<> sb; sb.append(x); std::string_view answer = sb.str(); return std::string(answer.data(), answer.size()); } #if SIMDJSON_EXCEPTIONS template std::string to_string(simdjson_result x) { if (x.error()) { throw simdjson_error(x.error()); } return to_string(x.value()); } #endif /** * Minifies a JSON element or document, printing the smallest possible valid JSON. * * dom::parser parser; * element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded); * cout << minify(doc) << endl; // prints [1,2,3] * */ template std::string minify(T x) { return to_string(x); } #if SIMDJSON_EXCEPTIONS template std::string minify(simdjson_result x) { if (x.error()) { throw simdjson_error(x.error()); } return to_string(x.value()); } #endif /** * Prettifies a JSON element or document, printing the valid JSON with indentation. * * dom::parser parser; * element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded); * * // Prints: * // { * // [ * // 1, * // 2, * // 3 * // ] * // } * cout << prettify(doc) << endl; * */ template std::string prettify(T x) { simdjson::internal::string_builder sb; sb.append(x); std::string_view answer = sb.str(); return std::string(answer.data(), answer.size()); } #if SIMDJSON_EXCEPTIONS template std::string prettify(simdjson_result x) { if (x.error()) { throw simdjson_error(x.error()); } return to_string(x.value()); } #endif } // namespace simdjson #endif /* end file simdjson/dom/serialization.h */ // Deprecated API /* including simdjson/dom/jsonparser.h: #include "simdjson/dom/jsonparser.h" */ /* begin file simdjson/dom/jsonparser.h */ // TODO Remove this -- deprecated API and files #ifndef SIMDJSON_DOM_JSONPARSER_H #define SIMDJSON_DOM_JSONPARSER_H /* skipped duplicate #include "simdjson/dom/base.h" */ /* skipped duplicate #include "simdjson/dom/parser.h" */ /* skipped duplicate #include "simdjson/dom/element.h" */ /* including simdjson/dom/parser-inl.h: #include "simdjson/dom/parser-inl.h" */ /* begin file simdjson/dom/parser-inl.h */ #ifndef SIMDJSON_PARSER_INL_H #define SIMDJSON_PARSER_INL_H /* skipped duplicate #include "simdjson/dom/base.h" */ /* skipped duplicate #include "simdjson/dom/document_stream.h" */ /* skipped duplicate #include "simdjson/implementation.h" */ /* skipped duplicate #include "simdjson/internal/dom_parser_implementation.h" */ /* skipped duplicate #include "simdjson/error-inl.h" */ /* skipped duplicate #include "simdjson/padded_string-inl.h" */ /* including simdjson/dom/document_stream-inl.h: #include "simdjson/dom/document_stream-inl.h" */ /* begin file simdjson/dom/document_stream-inl.h */ #ifndef SIMDJSON_DOCUMENT_STREAM_INL_H #define SIMDJSON_DOCUMENT_STREAM_INL_H /* skipped duplicate #include "simdjson/dom/base.h" */ /* skipped duplicate #include "simdjson/dom/document_stream.h" */ /* including simdjson/dom/element-inl.h: #include "simdjson/dom/element-inl.h" */ /* begin file simdjson/dom/element-inl.h */ #ifndef SIMDJSON_ELEMENT_INL_H #define SIMDJSON_ELEMENT_INL_H /* skipped duplicate #include "simdjson/dom/base.h" */ /* skipped duplicate #include "simdjson/dom/element.h" */ /* skipped duplicate #include "simdjson/dom/document.h" */ /* skipped duplicate #include "simdjson/dom/object.h" */ /* including simdjson/internal/tape_type.h: #include "simdjson/internal/tape_type.h" */ /* begin file simdjson/internal/tape_type.h */ #ifndef SIMDJSON_INTERNAL_TAPE_TYPE_H #define SIMDJSON_INTERNAL_TAPE_TYPE_H namespace simdjson { namespace internal { /** * The possible types in the tape. */ enum class tape_type { ROOT = 'r', START_ARRAY = '[', START_OBJECT = '{', END_ARRAY = ']', END_OBJECT = '}', STRING = '"', INT64 = 'l', UINT64 = 'u', DOUBLE = 'd', TRUE_VALUE = 't', FALSE_VALUE = 'f', NULL_VALUE = 'n' }; // enum class tape_type } // namespace internal } // namespace simdjson #endif // SIMDJSON_INTERNAL_TAPE_TYPE_H /* end file simdjson/internal/tape_type.h */ /* including simdjson/dom/object-inl.h: #include "simdjson/dom/object-inl.h" */ /* begin file simdjson/dom/object-inl.h */ #ifndef SIMDJSON_OBJECT_INL_H #define SIMDJSON_OBJECT_INL_H /* skipped duplicate #include "simdjson/dom/base.h" */ /* skipped duplicate #include "simdjson/dom/object.h" */ /* skipped duplicate #include "simdjson/dom/document.h" */ /* skipped duplicate #include "simdjson/dom/element-inl.h" */ /* skipped duplicate #include "simdjson/error-inl.h" */ #include namespace simdjson { // // simdjson_result inline implementation // simdjson_inline simdjson_result::simdjson_result() noexcept : internal::simdjson_result_base() {} simdjson_inline simdjson_result::simdjson_result(dom::object value) noexcept : internal::simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : internal::simdjson_result_base(error) {} inline simdjson_result simdjson_result::operator[](std::string_view key) const noexcept { if (error()) { return error(); } return first[key]; } inline simdjson_result simdjson_result::operator[](const char *key) const noexcept { if (error()) { return error(); } return first[key]; } inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) const noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } inline simdjson_result simdjson_result::at_key(std::string_view key) const noexcept { if (error()) { return error(); } return first.at_key(key); } inline simdjson_result simdjson_result::at_key_case_insensitive(std::string_view key) const noexcept { if (error()) { return error(); } return first.at_key_case_insensitive(key); } #if SIMDJSON_EXCEPTIONS inline dom::object::iterator simdjson_result::begin() const noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.begin(); } inline dom::object::iterator simdjson_result::end() const noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.end(); } inline size_t simdjson_result::size() const noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.size(); } #endif // SIMDJSON_EXCEPTIONS namespace dom { // // object inline implementation // simdjson_inline object::object() noexcept : tape{} {} simdjson_inline object::object(const internal::tape_ref &_tape) noexcept : tape{_tape} { } inline object::iterator object::begin() const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return internal::tape_ref(tape.doc, tape.json_index + 1); } inline object::iterator object::end() const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return internal::tape_ref(tape.doc, tape.after_element() - 1); } inline size_t object::size() const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return tape.scope_count(); } inline simdjson_result object::operator[](std::string_view key) const noexcept { return at_key(key); } inline simdjson_result object::operator[](const char *key) const noexcept { return at_key(key); } inline simdjson_result object::at_pointer(std::string_view json_pointer) const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 if(json_pointer.empty()) { // an empty string means that we return the current node return element(this->tape); // copy the current node } else if(json_pointer[0] != '/') { // otherwise there is an error return INVALID_JSON_POINTER; } json_pointer = json_pointer.substr(1); size_t slash = json_pointer.find('/'); std::string_view key = json_pointer.substr(0, slash); // Grab the child with the given key simdjson_result child; // If there is an escape character in the key, unescape it and then get the child. size_t escape = key.find('~'); if (escape != std::string_view::npos) { // Unescape the key std::string unescaped(key); do { switch (unescaped[escape+1]) { case '0': unescaped.replace(escape, 2, "~"); break; case '1': unescaped.replace(escape, 2, "/"); break; default: return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); } escape = unescaped.find('~', escape+1); } while (escape != std::string::npos); child = at_key(unescaped); } else { child = at_key(key); } if(child.error()) { return child; // we do not continue if there was an error } // If there is a /, we have to recurse and look up more of the path if (slash != std::string_view::npos) { child = child.at_pointer(json_pointer.substr(slash)); } return child; } inline simdjson_result object::at_key(std::string_view key) const noexcept { iterator end_field = end(); for (iterator field = begin(); field != end_field; ++field) { if (field.key_equals(key)) { return field.value(); } } return NO_SUCH_FIELD; } // In case you wonder why we need this, please see // https://github.com/simdjson/simdjson/issues/323 // People do seek keys in a case-insensitive manner. inline simdjson_result object::at_key_case_insensitive(std::string_view key) const noexcept { iterator end_field = end(); for (iterator field = begin(); field != end_field; ++field) { if (field.key_equals_case_insensitive(key)) { return field.value(); } } return NO_SUCH_FIELD; } // // object::iterator inline implementation // simdjson_inline object::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { } inline const key_value_pair object::iterator::operator*() const noexcept { return key_value_pair(key(), value()); } inline bool object::iterator::operator!=(const object::iterator& other) const noexcept { return tape.json_index != other.tape.json_index; } inline bool object::iterator::operator==(const object::iterator& other) const noexcept { return tape.json_index == other.tape.json_index; } inline bool object::iterator::operator<(const object::iterator& other) const noexcept { return tape.json_index < other.tape.json_index; } inline bool object::iterator::operator<=(const object::iterator& other) const noexcept { return tape.json_index <= other.tape.json_index; } inline bool object::iterator::operator>=(const object::iterator& other) const noexcept { return tape.json_index >= other.tape.json_index; } inline bool object::iterator::operator>(const object::iterator& other) const noexcept { return tape.json_index > other.tape.json_index; } inline object::iterator& object::iterator::operator++() noexcept { tape.json_index++; tape.json_index = tape.after_element(); return *this; } inline object::iterator object::iterator::operator++(int) noexcept { object::iterator out = *this; ++*this; return out; } inline std::string_view object::iterator::key() const noexcept { return tape.get_string_view(); } inline uint32_t object::iterator::key_length() const noexcept { return tape.get_string_length(); } inline const char* object::iterator::key_c_str() const noexcept { return reinterpret_cast(&tape.doc->string_buf[size_t(tape.tape_value()) + sizeof(uint32_t)]); } inline element object::iterator::value() const noexcept { return element(internal::tape_ref(tape.doc, tape.json_index + 1)); } /** * Design notes: * Instead of constructing a string_view and then comparing it with a * user-provided strings, it is probably more performant to have dedicated * functions taking as a parameter the string we want to compare against * and return true when they are equal. That avoids the creation of a temporary * std::string_view. Though it is possible for the compiler to avoid entirely * any overhead due to string_view, relying too much on compiler magic is * problematic: compiler magic sometimes fail, and then what do you do? * Also, enticing users to rely on high-performance function is probably better * on the long run. */ inline bool object::iterator::key_equals(std::string_view o) const noexcept { // We use the fact that the key length can be computed quickly // without access to the string buffer. const uint32_t len = key_length(); if(o.size() == len) { // We avoid construction of a temporary string_view instance. return (memcmp(o.data(), key_c_str(), len) == 0); } return false; } inline bool object::iterator::key_equals_case_insensitive(std::string_view o) const noexcept { // We use the fact that the key length can be computed quickly // without access to the string buffer. const uint32_t len = key_length(); if(o.size() == len) { // See For case-insensitive string comparisons, avoid char-by-char functions // https://lemire.me/blog/2020/04/30/for-case-insensitive-string-comparisons-avoid-char-by-char-functions/ // Note that it might be worth rolling our own strncasecmp function, with vectorization. return (simdjson_strncasecmp(o.data(), key_c_str(), len) == 0); } return false; } // // key_value_pair inline implementation // inline key_value_pair::key_value_pair(std::string_view _key, element _value) noexcept : key(_key), value(_value) {} } // namespace dom } // namespace simdjson #if defined(__cpp_lib_ranges) static_assert(std::ranges::view); static_assert(std::ranges::sized_range); #if SIMDJSON_EXCEPTIONS static_assert(std::ranges::view>); static_assert(std::ranges::sized_range>); #endif // SIMDJSON_EXCEPTIONS #endif // defined(__cpp_lib_ranges) #endif // SIMDJSON_OBJECT_INL_H /* end file simdjson/dom/object-inl.h */ /* skipped duplicate #include "simdjson/error-inl.h" */ #include #include namespace simdjson { // // simdjson_result inline implementation // simdjson_inline simdjson_result::simdjson_result() noexcept : internal::simdjson_result_base() {} simdjson_inline simdjson_result::simdjson_result(dom::element &&value) noexcept : internal::simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : internal::simdjson_result_base(error) {} inline simdjson_result simdjson_result::type() const noexcept { if (error()) { return error(); } return first.type(); } template simdjson_inline bool simdjson_result::is() const noexcept { return !error() && first.is(); } template simdjson_inline simdjson_result simdjson_result::get() const noexcept { if (error()) { return error(); } return first.get(); } template simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &value) const noexcept { if (error()) { return error(); } return first.get(value); } simdjson_inline simdjson_result simdjson_result::get_array() const noexcept { if (error()) { return error(); } return first.get_array(); } simdjson_inline simdjson_result simdjson_result::get_object() const noexcept { if (error()) { return error(); } return first.get_object(); } simdjson_inline simdjson_result simdjson_result::get_c_str() const noexcept { if (error()) { return error(); } return first.get_c_str(); } simdjson_inline simdjson_result simdjson_result::get_string_length() const noexcept { if (error()) { return error(); } return first.get_string_length(); } simdjson_inline simdjson_result simdjson_result::get_string() const noexcept { if (error()) { return error(); } return first.get_string(); } simdjson_inline simdjson_result simdjson_result::get_int64() const noexcept { if (error()) { return error(); } return first.get_int64(); } simdjson_inline simdjson_result simdjson_result::get_uint64() const noexcept { if (error()) { return error(); } return first.get_uint64(); } simdjson_inline simdjson_result simdjson_result::get_double() const noexcept { if (error()) { return error(); } return first.get_double(); } simdjson_inline simdjson_result simdjson_result::get_bool() const noexcept { if (error()) { return error(); } return first.get_bool(); } simdjson_inline bool simdjson_result::is_array() const noexcept { return !error() && first.is_array(); } simdjson_inline bool simdjson_result::is_object() const noexcept { return !error() && first.is_object(); } simdjson_inline bool simdjson_result::is_string() const noexcept { return !error() && first.is_string(); } simdjson_inline bool simdjson_result::is_int64() const noexcept { return !error() && first.is_int64(); } simdjson_inline bool simdjson_result::is_uint64() const noexcept { return !error() && first.is_uint64(); } simdjson_inline bool simdjson_result::is_double() const noexcept { return !error() && first.is_double(); } simdjson_inline bool simdjson_result::is_number() const noexcept { return !error() && first.is_number(); } simdjson_inline bool simdjson_result::is_bool() const noexcept { return !error() && first.is_bool(); } simdjson_inline bool simdjson_result::is_null() const noexcept { return !error() && first.is_null(); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) const noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](const char *key) const noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::at_pointer(const std::string_view json_pointer) const noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } #ifndef SIMDJSON_DISABLE_DEPRECATED_API [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] simdjson_inline simdjson_result simdjson_result::at(const std::string_view json_pointer) const noexcept { SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_DEPRECATED_WARNING if (error()) { return error(); } return first.at(json_pointer); SIMDJSON_POP_DISABLE_WARNINGS } #endif // SIMDJSON_DISABLE_DEPRECATED_API simdjson_inline simdjson_result simdjson_result::at(size_t index) const noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline simdjson_result simdjson_result::at_key(std::string_view key) const noexcept { if (error()) { return error(); } return first.at_key(key); } simdjson_inline simdjson_result simdjson_result::at_key_case_insensitive(std::string_view key) const noexcept { if (error()) { return error(); } return first.at_key_case_insensitive(key); } #if SIMDJSON_EXCEPTIONS simdjson_inline simdjson_result::operator bool() const noexcept(false) { return get(); } simdjson_inline simdjson_result::operator const char *() const noexcept(false) { return get(); } simdjson_inline simdjson_result::operator std::string_view() const noexcept(false) { return get(); } simdjson_inline simdjson_result::operator uint64_t() const noexcept(false) { return get(); } simdjson_inline simdjson_result::operator int64_t() const noexcept(false) { return get(); } simdjson_inline simdjson_result::operator double() const noexcept(false) { return get(); } simdjson_inline simdjson_result::operator dom::array() const noexcept(false) { return get(); } simdjson_inline simdjson_result::operator dom::object() const noexcept(false) { return get(); } simdjson_inline dom::array::iterator simdjson_result::begin() const noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.begin(); } simdjson_inline dom::array::iterator simdjson_result::end() const noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.end(); } #endif // SIMDJSON_EXCEPTIONS namespace dom { // // element inline implementation // simdjson_inline element::element() noexcept : tape{} {} simdjson_inline element::element(const internal::tape_ref &_tape) noexcept : tape{_tape} { } inline element_type element::type() const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 auto tape_type = tape.tape_ref_type(); return tape_type == internal::tape_type::FALSE_VALUE ? element_type::BOOL : static_cast(tape_type); } inline simdjson_result element::get_bool() const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 if(tape.is_true()) { return true; } else if(tape.is_false()) { return false; } return INCORRECT_TYPE; } inline simdjson_result element::get_c_str() const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 switch (tape.tape_ref_type()) { case internal::tape_type::STRING: { return tape.get_c_str(); } default: return INCORRECT_TYPE; } } inline simdjson_result element::get_string_length() const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 switch (tape.tape_ref_type()) { case internal::tape_type::STRING: { return tape.get_string_length(); } default: return INCORRECT_TYPE; } } inline simdjson_result element::get_string() const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 switch (tape.tape_ref_type()) { case internal::tape_type::STRING: return tape.get_string_view(); default: return INCORRECT_TYPE; } } inline simdjson_result element::get_uint64() const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 if(simdjson_unlikely(!tape.is_uint64())) { // branch rarely taken if(tape.is_int64()) { int64_t result = tape.next_tape_value(); if (result < 0) { return NUMBER_OUT_OF_RANGE; } return uint64_t(result); } return INCORRECT_TYPE; } return tape.next_tape_value(); } inline simdjson_result element::get_int64() const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 if(simdjson_unlikely(!tape.is_int64())) { // branch rarely taken if(tape.is_uint64()) { uint64_t result = tape.next_tape_value(); // Wrapping max in parens to handle Windows issue: https://stackoverflow.com/questions/11544073/how-do-i-deal-with-the-max-macro-in-windows-h-colliding-with-max-in-std if (result > uint64_t((std::numeric_limits::max)())) { return NUMBER_OUT_OF_RANGE; } return static_cast(result); } return INCORRECT_TYPE; } return tape.next_tape_value(); } inline simdjson_result element::get_double() const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 // Performance considerations: // 1. Querying tape_ref_type() implies doing a shift, it is fast to just do a straight // comparison. // 2. Using a switch-case relies on the compiler guessing what kind of code generation // we want... But the compiler cannot know that we expect the type to be "double" // most of the time. // We can expect get to refer to a double type almost all the time. // It is important to craft the code accordingly so that the compiler can use this // information. (This could also be solved with profile-guided optimization.) if(simdjson_unlikely(!tape.is_double())) { // branch rarely taken if(tape.is_uint64()) { return double(tape.next_tape_value()); } else if(tape.is_int64()) { return double(tape.next_tape_value()); } return INCORRECT_TYPE; } // this is common: return tape.next_tape_value(); } inline simdjson_result element::get_array() const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 switch (tape.tape_ref_type()) { case internal::tape_type::START_ARRAY: return array(tape); default: return INCORRECT_TYPE; } } inline simdjson_result element::get_object() const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 switch (tape.tape_ref_type()) { case internal::tape_type::START_OBJECT: return object(tape); default: return INCORRECT_TYPE; } } template simdjson_warn_unused simdjson_inline error_code element::get(T &value) const noexcept { return get().get(value); } // An element-specific version prevents recursion with simdjson_result::get(value) template<> simdjson_warn_unused simdjson_inline error_code element::get(element &value) const noexcept { value = element(tape); return SUCCESS; } template inline void element::tie(T &value, error_code &error) && noexcept { error = get(value); } template simdjson_inline bool element::is() const noexcept { auto result = get(); return !result.error(); } template<> inline simdjson_result element::get() const noexcept { return get_array(); } template<> inline simdjson_result element::get() const noexcept { return get_object(); } template<> inline simdjson_result element::get() const noexcept { return get_c_str(); } template<> inline simdjson_result element::get() const noexcept { return get_string(); } template<> inline simdjson_result element::get() const noexcept { return get_int64(); } template<> inline simdjson_result element::get() const noexcept { return get_uint64(); } template<> inline simdjson_result element::get() const noexcept { return get_double(); } template<> inline simdjson_result element::get() const noexcept { return get_bool(); } inline bool element::is_array() const noexcept { return is(); } inline bool element::is_object() const noexcept { return is(); } inline bool element::is_string() const noexcept { return is(); } inline bool element::is_int64() const noexcept { return is(); } inline bool element::is_uint64() const noexcept { return is(); } inline bool element::is_double() const noexcept { return is(); } inline bool element::is_bool() const noexcept { return is(); } inline bool element::is_number() const noexcept { return is_int64() || is_uint64() || is_double(); } inline bool element::is_null() const noexcept { return tape.is_null_on_tape(); } #if SIMDJSON_EXCEPTIONS inline element::operator bool() const noexcept(false) { return get(); } inline element::operator const char*() const noexcept(false) { return get(); } inline element::operator std::string_view() const noexcept(false) { return get(); } inline element::operator uint64_t() const noexcept(false) { return get(); } inline element::operator int64_t() const noexcept(false) { return get(); } inline element::operator double() const noexcept(false) { return get(); } inline element::operator array() const noexcept(false) { return get(); } inline element::operator object() const noexcept(false) { return get(); } inline array::iterator element::begin() const noexcept(false) { return get().begin(); } inline array::iterator element::end() const noexcept(false) { return get().end(); } #endif // SIMDJSON_EXCEPTIONS inline simdjson_result element::operator[](std::string_view key) const noexcept { return at_key(key); } inline simdjson_result element::operator[](const char *key) const noexcept { return at_key(key); } inline simdjson_result element::at_pointer(std::string_view json_pointer) const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 switch (tape.tape_ref_type()) { case internal::tape_type::START_OBJECT: return object(tape).at_pointer(json_pointer); case internal::tape_type::START_ARRAY: return array(tape).at_pointer(json_pointer); default: { if(!json_pointer.empty()) { // a non-empty string is invalid on an atom return INVALID_JSON_POINTER; } // an empty string means that we return the current node dom::element copy(*this); return simdjson_result(std::move(copy)); } } } #ifndef SIMDJSON_DISABLE_DEPRECATED_API [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] inline simdjson_result element::at(std::string_view json_pointer) const noexcept { // version 0.4 of simdjson allowed non-compliant pointers auto std_pointer = (json_pointer.empty() ? "" : "/") + std::string(json_pointer.begin(), json_pointer.end()); return at_pointer(std_pointer); } #endif // SIMDJSON_DISABLE_DEPRECATED_API inline simdjson_result element::at(size_t index) const noexcept { return get().at(index); } inline simdjson_result element::at_key(std::string_view key) const noexcept { return get().at_key(key); } inline simdjson_result element::at_key_case_insensitive(std::string_view key) const noexcept { return get().at_key_case_insensitive(key); } inline bool element::operator<(const element &other) const noexcept { return tape.json_index < other.tape.json_index; } inline bool element::operator==(const element &other) const noexcept { return tape.json_index == other.tape.json_index; } inline bool element::dump_raw_tape(std::ostream &out) const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return tape.doc->dump_raw_tape(out); } inline std::ostream& operator<<(std::ostream& out, element_type type) { switch (type) { case element_type::ARRAY: return out << "array"; case element_type::OBJECT: return out << "object"; case element_type::INT64: return out << "int64_t"; case element_type::UINT64: return out << "uint64_t"; case element_type::DOUBLE: return out << "double"; case element_type::STRING: return out << "string"; case element_type::BOOL: return out << "bool"; case element_type::NULL_VALUE: return out << "null"; default: return out << "unexpected content!!!"; // abort() usage is forbidden in the library } } } // namespace dom } // namespace simdjson #endif // SIMDJSON_ELEMENT_INL_H /* end file simdjson/dom/element-inl.h */ /* skipped duplicate #include "simdjson/dom/parser-inl.h" */ /* skipped duplicate #include "simdjson/error-inl.h" */ /* skipped duplicate #include "simdjson/internal/dom_parser_implementation.h" */ namespace simdjson { namespace dom { #ifdef SIMDJSON_THREADS_ENABLED inline void stage1_worker::finish() { // After calling "run" someone would call finish() to wait // for the end of the processing. // This function will wait until either the thread has done // the processing or, else, the destructor has been called. std::unique_lock lock(locking_mutex); cond_var.wait(lock, [this]{return has_work == false;}); } inline stage1_worker::~stage1_worker() { // The thread may never outlive the stage1_worker instance // and will always be stopped/joined before the stage1_worker // instance is gone. stop_thread(); } inline void stage1_worker::start_thread() { std::unique_lock lock(locking_mutex); if(thread.joinable()) { return; // This should never happen but we never want to create more than one thread. } thread = std::thread([this]{ while(true) { std::unique_lock thread_lock(locking_mutex); // We wait for either "run" or "stop_thread" to be called. cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); // If, for some reason, the stop_thread() method was called (i.e., the // destructor of stage1_worker is called, then we want to immediately destroy // the thread (and not do any more processing). if(!can_work) { break; } this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, this->_next_batch_start); this->has_work = false; // The condition variable call should be moved after thread_lock.unlock() for performance // reasons but thread sanitizers may report it as a data race if we do. // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock cond_var.notify_one(); // will notify "finish" thread_lock.unlock(); } } ); } inline void stage1_worker::stop_thread() { std::unique_lock lock(locking_mutex); // We have to make sure that all locks can be released. can_work = false; has_work = false; cond_var.notify_all(); lock.unlock(); if(thread.joinable()) { thread.join(); } } inline void stage1_worker::run(document_stream * ds, dom::parser * stage1, size_t next_batch_start) { std::unique_lock lock(locking_mutex); owner = ds; _next_batch_start = next_batch_start; stage1_thread_parser = stage1; has_work = true; // The condition variable call should be moved after thread_lock.unlock() for performance // reasons but thread sanitizers may report it as a data race if we do. // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock cond_var.notify_one(); // will notify the thread lock that we have work lock.unlock(); } #endif simdjson_inline document_stream::document_stream( dom::parser &_parser, const uint8_t *_buf, size_t _len, size_t _batch_size ) noexcept : parser{&_parser}, buf{_buf}, len{_len}, batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, error{SUCCESS} #ifdef SIMDJSON_THREADS_ENABLED , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change #endif { #ifdef SIMDJSON_THREADS_ENABLED if(worker.get() == nullptr) { error = MEMALLOC; } #endif } simdjson_inline document_stream::document_stream() noexcept : parser{nullptr}, buf{nullptr}, len{0}, batch_size{0}, error{UNINITIALIZED} #ifdef SIMDJSON_THREADS_ENABLED , use_thread(false) #endif { } simdjson_inline document_stream::~document_stream() noexcept { #ifdef SIMDJSON_THREADS_ENABLED worker.reset(); #endif } simdjson_inline document_stream::iterator::iterator() noexcept : stream{nullptr}, finished{true} { } simdjson_inline document_stream::iterator document_stream::begin() noexcept { start(); // If there are no documents, we're finished. return iterator(this, error == EMPTY); } simdjson_inline document_stream::iterator document_stream::end() noexcept { return iterator(this, true); } simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept : stream{_stream}, finished{is_end} { } simdjson_inline document_stream::iterator::reference document_stream::iterator::operator*() noexcept { // Note that in case of error, we do not yet mark // the iterator as "finished": this detection is done // in the operator++ function since it is possible // to call operator++ repeatedly while omitting // calls to operator*. if (stream->error) { return stream->error; } return stream->parser->doc.root(); } simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { // If there is an error, then we want the iterator // to be finished, no matter what. (E.g., we do not // keep generating documents with errors, or go beyond // a document with errors.) // // Users do not have to call "operator*()" when they use operator++, // so we need to end the stream in the operator++ function. // // Note that setting finished = true is essential otherwise // we would enter an infinite loop. if (stream->error) { finished = true; } // Note that stream->error() is guarded against error conditions // (it will immediately return if stream->error casts to false). // In effect, this next function does nothing when (stream->error) // is true (hence the risk of an infinite loop). stream->next(); // If that was the last document, we're finished. // It is the only type of error we do not want to appear // in operator*. if (stream->error == EMPTY) { finished = true; } // If we had any other kind of error (not EMPTY) then we want // to pass it along to the operator* and we cannot mark the result // as "finished" just yet. return *this; } simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { return finished != other.finished; } inline void document_stream::start() noexcept { if (error) { return; } error = parser->ensure_capacity(batch_size); if (error) { return; } // Always run the first stage 1 parse immediately batch_start = 0; error = run_stage1(*parser, batch_start); while(error == EMPTY) { // In exceptional cases, we may start with an empty block batch_start = next_batch_start(); if (batch_start >= len) { return; } error = run_stage1(*parser, batch_start); } if (error) { return; } #ifdef SIMDJSON_THREADS_ENABLED if (use_thread && next_batch_start() < len) { // Kick off the first thread if needed error = stage1_thread_parser.ensure_capacity(batch_size); if (error) { return; } worker->start_thread(); start_stage1_thread(); if (error) { return; } } #endif // SIMDJSON_THREADS_ENABLED next(); } simdjson_inline size_t document_stream::iterator::current_index() const noexcept { return stream->doc_index; } simdjson_inline std::string_view document_stream::iterator::source() const noexcept { const char* start = reinterpret_cast(stream->buf) + current_index(); bool object_or_array = ((*start == '[') || (*start == '{')); if(object_or_array) { size_t next_doc_index = stream->batch_start + stream->parser->implementation->structural_indexes[stream->parser->implementation->next_structural_index - 1]; return std::string_view(start, next_doc_index - current_index() + 1); } else { size_t next_doc_index = stream->batch_start + stream->parser->implementation->structural_indexes[stream->parser->implementation->next_structural_index]; return std::string_view(reinterpret_cast(stream->buf) + current_index(), next_doc_index - current_index() - 1); } } inline void document_stream::next() noexcept { // We always exit at once, once in an error condition. if (error) { return; } // Load the next document from the batch doc_index = batch_start + parser->implementation->structural_indexes[parser->implementation->next_structural_index]; error = parser->implementation->stage2_next(parser->doc); // If that was the last document in the batch, load another batch (if available) while (error == EMPTY) { batch_start = next_batch_start(); if (batch_start >= len) { break; } #ifdef SIMDJSON_THREADS_ENABLED if(use_thread) { load_from_stage1_thread(); } else { error = run_stage1(*parser, batch_start); } #else error = run_stage1(*parser, batch_start); #endif if (error) { continue; } // If the error was EMPTY, we may want to load another batch. // Run stage 2 on the first document in the batch doc_index = batch_start + parser->implementation->structural_indexes[parser->implementation->next_structural_index]; error = parser->implementation->stage2_next(parser->doc); } } inline size_t document_stream::size_in_bytes() const noexcept { return len; } inline size_t document_stream::truncated_bytes() const noexcept { if(error == CAPACITY) { return len - batch_start; } return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; } inline size_t document_stream::next_batch_start() const noexcept { return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; } inline error_code document_stream::run_stage1(dom::parser &p, size_t _batch_start) noexcept { size_t remaining = len - _batch_start; if (remaining <= batch_size) { return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); } else { return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); } } #ifdef SIMDJSON_THREADS_ENABLED inline void document_stream::load_from_stage1_thread() noexcept { worker->finish(); // Swap to the parser that was loaded up in the thread. Make sure the parser has // enough memory to swap to, as well. std::swap(*parser, stage1_thread_parser); error = stage1_thread_error; if (error) { return; } // If there's anything left, start the stage 1 thread! if (next_batch_start() < len) { start_stage1_thread(); } } inline void document_stream::start_stage1_thread() noexcept { // we call the thread on a lambda that will update // this->stage1_thread_error // there is only one thread that may write to this value // TODO this is NOT exception-safe. this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error size_t _next_batch_start = this->next_batch_start(); worker->run(this, & this->stage1_thread_parser, _next_batch_start); } #endif // SIMDJSON_THREADS_ENABLED } // namespace dom simdjson_inline simdjson_result::simdjson_result() noexcept : simdjson_result_base() { } simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : simdjson_result_base(error) { } simdjson_inline simdjson_result::simdjson_result(dom::document_stream &&value) noexcept : simdjson_result_base(std::forward(value)) { } #if SIMDJSON_EXCEPTIONS simdjson_inline dom::document_stream::iterator simdjson_result::begin() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.begin(); } simdjson_inline dom::document_stream::iterator simdjson_result::end() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.end(); } #else // SIMDJSON_EXCEPTIONS #ifndef SIMDJSON_DISABLE_DEPRECATED_API simdjson_inline dom::document_stream::iterator simdjson_result::begin() noexcept { first.error = error(); return first.begin(); } simdjson_inline dom::document_stream::iterator simdjson_result::end() noexcept { first.error = error(); return first.end(); } #endif // SIMDJSON_DISABLE_DEPRECATED_API #endif // SIMDJSON_EXCEPTIONS } // namespace simdjson #endif // SIMDJSON_DOCUMENT_STREAM_INL_H /* end file simdjson/dom/document_stream-inl.h */ /* skipped duplicate #include "simdjson/dom/element-inl.h" */ #include #include /* memcmp */ namespace simdjson { namespace dom { // // parser inline implementation // simdjson_inline parser::parser(size_t max_capacity) noexcept : _max_capacity{max_capacity}, loaded_bytes(nullptr) { } simdjson_inline parser::parser(parser &&other) noexcept = default; simdjson_inline parser &parser::operator=(parser &&other) noexcept = default; inline bool parser::is_valid() const noexcept { return valid; } inline int parser::get_error_code() const noexcept { return error; } inline std::string parser::get_error_message() const noexcept { return error_message(error); } inline bool parser::dump_raw_tape(std::ostream &os) const noexcept { return valid ? doc.dump_raw_tape(os) : false; } inline simdjson_result parser::read_file(const std::string &path) noexcept { // Open the file SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe std::FILE *fp = std::fopen(path.c_str(), "rb"); SIMDJSON_POP_DISABLE_WARNINGS if (fp == nullptr) { return IO_ERROR; } // Get the file size int ret; #if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS ret = _fseeki64(fp, 0, SEEK_END); #else ret = std::fseek(fp, 0, SEEK_END); #endif // _WIN64 if(ret < 0) { std::fclose(fp); return IO_ERROR; } #if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS __int64 len = _ftelli64(fp); if(len == -1L) { std::fclose(fp); return IO_ERROR; } #else long len = std::ftell(fp); if((len < 0) || (len == LONG_MAX)) { std::fclose(fp); return IO_ERROR; } #endif // Make sure we have enough capacity to load the file if (_loaded_bytes_capacity < size_t(len)) { loaded_bytes.reset( internal::allocate_padded_buffer(len) ); if (!loaded_bytes) { std::fclose(fp); return MEMALLOC; } _loaded_bytes_capacity = len; } // Read the string std::rewind(fp); size_t bytes_read = std::fread(loaded_bytes.get(), 1, len, fp); if (std::fclose(fp) != 0 || bytes_read != size_t(len)) { return IO_ERROR; } return bytes_read; } inline simdjson_result parser::load(const std::string &path) & noexcept { size_t len; auto _error = read_file(path).get(len); if (_error) { return _error; } return parse(loaded_bytes.get(), len, false); } inline simdjson_result parser::load_many(const std::string &path, size_t batch_size) noexcept { size_t len; auto _error = read_file(path).get(len); if (_error) { return _error; } if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } return document_stream(*this, reinterpret_cast(loaded_bytes.get()), len, batch_size); } inline simdjson_result parser::parse_into_document(document& provided_doc, const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept { // Important: we need to ensure that document has enough capacity. // Important: It is possible that provided_doc is actually the internal 'doc' within the parser!!! error_code _error = ensure_capacity(provided_doc, len); if (_error) { return _error; } if (realloc_if_needed) { // Make sure we have enough capacity to copy len bytes if (!loaded_bytes || _loaded_bytes_capacity < len) { loaded_bytes.reset( internal::allocate_padded_buffer(len) ); if (!loaded_bytes) { return MEMALLOC; } _loaded_bytes_capacity = len; } std::memcpy(static_cast(loaded_bytes.get()), buf, len); buf = reinterpret_cast(loaded_bytes.get()); } if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { buf += 3; len -= 3; } _error = implementation->parse(buf, len, provided_doc); if (_error) { return _error; } return provided_doc.root(); } simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const char *buf, size_t len, bool realloc_if_needed) & noexcept { return parse_into_document(provided_doc, reinterpret_cast(buf), len, realloc_if_needed); } simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const std::string &s) & noexcept { return parse_into_document(provided_doc, s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); } simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const padded_string &s) & noexcept { return parse_into_document(provided_doc, s.data(), s.length(), false); } inline simdjson_result parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept { return parse_into_document(doc, buf, len, realloc_if_needed); } simdjson_inline simdjson_result parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept { return parse(reinterpret_cast(buf), len, realloc_if_needed); } simdjson_inline simdjson_result parser::parse(const std::string &s) & noexcept { return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); } simdjson_inline simdjson_result parser::parse(const padded_string &s) & noexcept { return parse(s.data(), s.length(), false); } simdjson_inline simdjson_result parser::parse(const padded_string_view &v) & noexcept { return parse(v.data(), v.length(), false); } inline simdjson_result parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept { if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { buf += 3; len -= 3; } return document_stream(*this, buf, len, batch_size); } inline simdjson_result parser::parse_many(const char *buf, size_t len, size_t batch_size) noexcept { return parse_many(reinterpret_cast(buf), len, batch_size); } inline simdjson_result parser::parse_many(const std::string &s, size_t batch_size) noexcept { return parse_many(s.data(), s.length(), batch_size); } inline simdjson_result parser::parse_many(const padded_string &s, size_t batch_size) noexcept { return parse_many(s.data(), s.length(), batch_size); } simdjson_inline size_t parser::capacity() const noexcept { return implementation ? implementation->capacity() : 0; } simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } simdjson_inline size_t parser::max_depth() const noexcept { return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH; } simdjson_warn_unused inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept { // // Reallocate implementation if needed // error_code err; if (implementation) { err = implementation->allocate(capacity, max_depth); } else { err = simdjson::get_active_implementation()->create_dom_parser_implementation(capacity, max_depth, implementation); } if (err) { return err; } return SUCCESS; } #ifndef SIMDJSON_DISABLE_DEPRECATED_API simdjson_warn_unused inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept { return !allocate(capacity, max_depth); } #endif // SIMDJSON_DISABLE_DEPRECATED_API inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept { return ensure_capacity(doc, desired_capacity); } inline error_code parser::ensure_capacity(document& target_document, size_t desired_capacity) noexcept { // 1. It is wasteful to allocate a document and a parser for documents spanning less than MINIMAL_DOCUMENT_CAPACITY bytes. // 2. If we allow desired_capacity = 0 then it is possible to exit this function with implementation == nullptr. if(desired_capacity < MINIMAL_DOCUMENT_CAPACITY) { desired_capacity = MINIMAL_DOCUMENT_CAPACITY; } // If we don't have enough capacity, (try to) automatically bump it. // If the document needs allocation, do it too. // Both in one if statement to minimize unlikely branching. // // Note: we must make sure that this function is called if capacity() == 0. We do so because we // ensure that desired_capacity > 0. if (simdjson_unlikely(capacity() < desired_capacity || target_document.capacity() < desired_capacity)) { if (desired_capacity > max_capacity()) { return error = CAPACITY; } error_code err1 = target_document.capacity() < desired_capacity ? target_document.allocate(desired_capacity) : SUCCESS; error_code err2 = capacity() < desired_capacity ? allocate(desired_capacity, max_depth()) : SUCCESS; if(err1 != SUCCESS) { return error = err1; } if(err2 != SUCCESS) { return error = err2; } } return SUCCESS; } simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { if(max_capacity > MINIMAL_DOCUMENT_CAPACITY) { _max_capacity = max_capacity; } else { _max_capacity = MINIMAL_DOCUMENT_CAPACITY; } } } // namespace dom } // namespace simdjson #endif // SIMDJSON_PARSER_INL_H /* end file simdjson/dom/parser-inl.h */ namespace simdjson { // // C API (json_parse and build_parsed_json) declarations // #ifndef SIMDJSON_DISABLE_DEPRECATED_API [[deprecated("Use parser.parse() instead")]] inline int json_parse(const uint8_t *buf, size_t len, dom::parser &parser, bool realloc_if_needed = true) noexcept { error_code code = parser.parse(buf, len, realloc_if_needed).error(); // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid // bits in the parser instead of heeding the result code. The normal parser unsets those in // anticipation of making the error code ephemeral. // Here we put the code back into the parser, until we've removed this method. parser.valid = code == SUCCESS; parser.error = code; return code; } [[deprecated("Use parser.parse() instead")]] inline int json_parse(const char *buf, size_t len, dom::parser &parser, bool realloc_if_needed = true) noexcept { error_code code = parser.parse(buf, len, realloc_if_needed).error(); // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid // bits in the parser instead of heeding the result code. The normal parser unsets those in // anticipation of making the error code ephemeral. // Here we put the code back into the parser, until we've removed this method. parser.valid = code == SUCCESS; parser.error = code; return code; } [[deprecated("Use parser.parse() instead")]] inline int json_parse(const std::string &s, dom::parser &parser, bool realloc_if_needed = true) noexcept { error_code code = parser.parse(s.data(), s.length(), realloc_if_needed).error(); // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid // bits in the parser instead of heeding the result code. The normal parser unsets those in // anticipation of making the error code ephemeral. // Here we put the code back into the parser, until we've removed this method. parser.valid = code == SUCCESS; parser.error = code; return code; } [[deprecated("Use parser.parse() instead")]] inline int json_parse(const padded_string &s, dom::parser &parser) noexcept { error_code code = parser.parse(s).error(); // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid // bits in the parser instead of heeding the result code. The normal parser unsets those in // anticipation of making the error code ephemeral. // Here we put the code back into the parser, until we've removed this method. parser.valid = code == SUCCESS; parser.error = code; return code; } [[deprecated("Use parser.parse() instead")]] simdjson_warn_unused inline dom::parser build_parsed_json(const uint8_t *buf, size_t len, bool realloc_if_needed = true) noexcept { dom::parser parser; error_code code = parser.parse(buf, len, realloc_if_needed).error(); // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid // bits in the parser instead of heeding the result code. The normal parser unsets those in // anticipation of making the error code ephemeral. // Here we put the code back into the parser, until we've removed this method. parser.valid = code == SUCCESS; parser.error = code; return parser; } [[deprecated("Use parser.parse() instead")]] simdjson_warn_unused inline dom::parser build_parsed_json(const char *buf, size_t len, bool realloc_if_needed = true) noexcept { dom::parser parser; error_code code = parser.parse(buf, len, realloc_if_needed).error(); // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid // bits in the parser instead of heeding the result code. The normal parser unsets those in // anticipation of making the error code ephemeral. // Here we put the code back into the parser, until we've removed this method. parser.valid = code == SUCCESS; parser.error = code; return parser; } [[deprecated("Use parser.parse() instead")]] simdjson_warn_unused inline dom::parser build_parsed_json(const std::string &s, bool realloc_if_needed = true) noexcept { dom::parser parser; error_code code = parser.parse(s.data(), s.length(), realloc_if_needed).error(); // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid // bits in the parser instead of heeding the result code. The normal parser unsets those in // anticipation of making the error code ephemeral. // Here we put the code back into the parser, until we've removed this method. parser.valid = code == SUCCESS; parser.error = code; return parser; } [[deprecated("Use parser.parse() instead")]] simdjson_warn_unused inline dom::parser build_parsed_json(const padded_string &s) noexcept { dom::parser parser; error_code code = parser.parse(s).error(); // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid // bits in the parser instead of heeding the result code. The normal parser unsets those in // anticipation of making the error code ephemeral. // Here we put the code back into the parser, until we've removed this method. parser.valid = code == SUCCESS; parser.error = code; return parser; } #endif // SIMDJSON_DISABLE_DEPRECATED_API /** @private We do not want to allow implicit conversion from C string to std::string. */ int json_parse(const char *buf, dom::parser &parser) noexcept = delete; /** @private We do not want to allow implicit conversion from C string to std::string. */ dom::parser build_parsed_json(const char *buf) noexcept = delete; } // namespace simdjson #endif // SIMDJSON_DOM_JSONPARSER_H /* end file simdjson/dom/jsonparser.h */ /* including simdjson/dom/parsedjson.h: #include "simdjson/dom/parsedjson.h" */ /* begin file simdjson/dom/parsedjson.h */ // TODO Remove this -- deprecated API and files #ifndef SIMDJSON_DOM_PARSEDJSON_H #define SIMDJSON_DOM_PARSEDJSON_H /* skipped duplicate #include "simdjson/dom/base.h" */ namespace simdjson { /** * @deprecated Use `dom::parser` instead. */ using ParsedJson [[deprecated("Use dom::parser instead")]] = dom::parser; } // namespace simdjson #endif // SIMDJSON_DOM_PARSEDJSON_H /* end file simdjson/dom/parsedjson.h */ /* including simdjson/dom/parsedjson_iterator.h: #include "simdjson/dom/parsedjson_iterator.h" */ /* begin file simdjson/dom/parsedjson_iterator.h */ // TODO Remove this -- deprecated API and files #ifndef SIMDJSON_DOM_PARSEDJSON_ITERATOR_H #define SIMDJSON_DOM_PARSEDJSON_ITERATOR_H /* skipped duplicate #include "simdjson/dom/base.h" */ /* skipped duplicate #include "simdjson/dom/parser.h" */ #ifndef SIMDJSON_DISABLE_DEPRECATED_API namespace simdjson { /** @private **/ class [[deprecated("Use the new DOM navigation API instead (see doc/basics.md)")]] dom::parser::Iterator { public: inline Iterator(const dom::parser &parser) noexcept(false); inline Iterator(const Iterator &o) noexcept; inline ~Iterator() noexcept; inline Iterator& operator=(const Iterator&) = delete; inline bool is_ok() const; // useful for debugging purposes inline size_t get_tape_location() const; // useful for debugging purposes inline size_t get_tape_length() const; // returns the current depth (start at 1 with 0 reserved for the fictitious // root node) inline size_t get_depth() const; // A scope is a series of nodes at the same depth, typically it is either an // object ({) or an array ([). The root node has type 'r'. inline uint8_t get_scope_type() const; // move forward in document order inline bool move_forward(); // retrieve the character code of what we're looking at: // [{"slutfn are the possibilities inline uint8_t get_type() const { return current_type; // short functions should be inlined! } // get the int64_t value at this node; valid only if get_type is "l" inline int64_t get_integer() const; // get the value as uint64; valid only if if get_type is "u" inline uint64_t get_unsigned_integer() const; // get the string value at this node (NULL ended); valid only if get_type is " // note that tabs, and line endings are escaped in the returned value (see // print_with_escapes) return value is valid UTF-8, it may contain NULL chars // within the string: get_string_length determines the true string length. inline const char *get_string() const; // return the length of the string in bytes inline uint32_t get_string_length() const; // get the double value at this node; valid only if // get_type() is "d" inline double get_double() const; inline bool is_object_or_array() const { return is_object() || is_array(); } inline bool is_object() const { return get_type() == '{'; } inline bool is_array() const { return get_type() == '['; } inline bool is_string() const { return get_type() == '"'; } // Returns true if the current type of the node is an signed integer. // You can get its value with `get_integer()`. inline bool is_integer() const { return get_type() == 'l'; } // Returns true if the current type of the node is an unsigned integer. // You can get its value with `get_unsigned_integer()`. // // NOTE: // Only a large value, which is out of range of a 64-bit signed integer, is // represented internally as an unsigned node. On the other hand, a typical // positive integer, such as 1, 42, or 1000000, is as a signed node. // Be aware this function returns false for a signed node. inline bool is_unsigned_integer() const { return get_type() == 'u'; } // Returns true if the current type of the node is a double floating-point number. inline bool is_double() const { return get_type() == 'd'; } // Returns true if the current type of the node is a number (integer or floating-point). inline bool is_number() const { return is_integer() || is_unsigned_integer() || is_double(); } // Returns true if the current type of the node is a bool with true value. inline bool is_true() const { return get_type() == 't'; } // Returns true if the current type of the node is a bool with false value. inline bool is_false() const { return get_type() == 'f'; } // Returns true if the current type of the node is null. inline bool is_null() const { return get_type() == 'n'; } // Returns true if the type byte represents an object of an array static bool is_object_or_array(uint8_t type) { return ((type == '[') || (type == '{')); } // when at {, go one level deep, looking for a given key // if successful, we are left pointing at the value, // if not, we are still pointing at the object ({) // (in case of repeated keys, this only finds the first one). // We seek the key using C's strcmp so if your JSON strings contain // NULL chars, this would trigger a false positive: if you expect that // to be the case, take extra precautions. // Furthermore, we do the comparison character-by-character // without taking into account Unicode equivalence. inline bool move_to_key(const char *key); // as above, but case insensitive lookup (strcmpi instead of strcmp) inline bool move_to_key_insensitive(const char *key); // when at {, go one level deep, looking for a given key // if successful, we are left pointing at the value, // if not, we are still pointing at the object ({) // (in case of repeated keys, this only finds the first one). // The string we search for can contain NULL values. // Furthermore, we do the comparison character-by-character // without taking into account Unicode equivalence. inline bool move_to_key(const char *key, uint32_t length); // when at a key location within an object, this moves to the accompanying // value (located next to it). This is equivalent but much faster than // calling "next()". inline void move_to_value(); // when at [, go one level deep, and advance to the given index. // if successful, we are left pointing at the value, // if not, we are still pointing at the array ([) inline bool move_to_index(uint32_t index); // Moves the iterator to the value corresponding to the json pointer. // Always search from the root of the document. // if successful, we are left pointing at the value, // if not, we are still pointing the same value we were pointing before the // call. The json pointer follows the rfc6901 standard's syntax: // https://tools.ietf.org/html/rfc6901 However, the standard says "If a // referenced member name is not unique in an object, the member that is // referenced is undefined, and evaluation fails". Here we just return the // first corresponding value. The length parameter is the length of the // jsonpointer string ('pointer'). inline bool move_to(const char *pointer, uint32_t length); // Moves the iterator to the value corresponding to the json pointer. // Always search from the root of the document. // if successful, we are left pointing at the value, // if not, we are still pointing the same value we were pointing before the // call. The json pointer implementation follows the rfc6901 standard's // syntax: https://tools.ietf.org/html/rfc6901 However, the standard says // "If a referenced member name is not unique in an object, the member that // is referenced is undefined, and evaluation fails". Here we just return // the first corresponding value. inline bool move_to(const std::string &pointer); private: // Almost the same as move_to(), except it searches from the current // position. The pointer's syntax is identical, though that case is not // handled by the rfc6901 standard. The '/' is still required at the // beginning. However, contrary to move_to(), the URI Fragment Identifier // Representation is not supported here. Also, in case of failure, we are // left pointing at the closest value it could reach. For these reasons it // is private. It exists because it is used by move_to(). inline bool relative_move_to(const char *pointer, uint32_t length); public: // throughout return true if we can do the navigation, false // otherwise // Within a given scope (series of nodes at the same depth within either an // array or an object), we move forward. // Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, { // and [. At the object ({) or at the array ([), you can issue a "down" to // visit their content. valid if we're not at the end of a scope (returns // true). inline bool next(); // Within a given scope (series of nodes at the same depth within either an // array or an object), we move backward. // Thus, given [true, null, {"a":1}, [1,2]], we would visit ], }, null, true // when starting at the end of the scope. At the object ({) or at the array // ([), you can issue a "down" to visit their content. // Performance warning: This function is implemented by starting again // from the beginning of the scope and scanning forward. You should expect // it to be relatively slow. inline bool prev(); // Moves back to either the containing array or object (type { or [) from // within a contained scope. // Valid unless we are at the first level of the document inline bool up(); // Valid if we're at a [ or { and it starts a non-empty scope; moves us to // start of that deeper scope if it not empty. Thus, given [true, null, // {"a":1}, [1,2]], if we are at the { node, we would move to the "a" node. inline bool down(); // move us to the start of our current scope, // a scope is a series of nodes at the same level inline void to_start_scope(); inline void rewind(); // print the node we are currently pointing at inline bool print(std::ostream &os, bool escape_strings = true) const; private: const document &doc; size_t max_depth{}; size_t depth{}; size_t location{}; // our current location on a tape size_t tape_length{}; uint8_t current_type{}; uint64_t current_val{}; typedef struct { size_t start_of_scope; uint8_t scope_type; } scopeindex_t; scopeindex_t *depth_index{}; }; } // namespace simdjson #endif // SIMDJSON_DISABLE_DEPRECATED_API #endif // SIMDJSON_DOM_PARSEDJSON_ITERATOR_H /* end file simdjson/dom/parsedjson_iterator.h */ // Inline functions /* including simdjson/dom/array-inl.h: #include "simdjson/dom/array-inl.h" */ /* begin file simdjson/dom/array-inl.h */ #ifndef SIMDJSON_ARRAY_INL_H #define SIMDJSON_ARRAY_INL_H #include /* skipped duplicate #include "simdjson/dom/base.h" */ /* skipped duplicate #include "simdjson/dom/array.h" */ /* skipped duplicate #include "simdjson/dom/element.h" */ /* skipped duplicate #include "simdjson/error-inl.h" */ /* including simdjson/internal/tape_ref-inl.h: #include "simdjson/internal/tape_ref-inl.h" */ /* begin file simdjson/internal/tape_ref-inl.h */ #ifndef SIMDJSON_TAPE_REF_INL_H #define SIMDJSON_TAPE_REF_INL_H /* skipped duplicate #include "simdjson/dom/document.h" */ /* skipped duplicate #include "simdjson/internal/tape_ref.h" */ /* skipped duplicate #include "simdjson/internal/tape_type.h" */ #include namespace simdjson { namespace internal { constexpr const uint64_t JSON_VALUE_MASK = 0x00FFFFFFFFFFFFFF; constexpr const uint32_t JSON_COUNT_MASK = 0xFFFFFF; // // tape_ref inline implementation // simdjson_inline tape_ref::tape_ref() noexcept : doc{nullptr}, json_index{0} {} simdjson_inline tape_ref::tape_ref(const dom::document *_doc, size_t _json_index) noexcept : doc{_doc}, json_index{_json_index} {} simdjson_inline bool tape_ref::is_document_root() const noexcept { return json_index == 1; // should we ever change the structure of the tape, this should get updated. } simdjson_inline bool tape_ref::usable() const noexcept { return doc != nullptr; // when the document pointer is null, this tape_ref is uninitialized (should not be accessed). } // Some value types have a specific on-tape word value. It can be faster // to check the type by doing a word-to-word comparison instead of extracting the // most significant 8 bits. simdjson_inline bool tape_ref::is_double() const noexcept { constexpr uint64_t tape_double = uint64_t(tape_type::DOUBLE)<<56; return doc->tape[json_index] == tape_double; } simdjson_inline bool tape_ref::is_int64() const noexcept { constexpr uint64_t tape_int64 = uint64_t(tape_type::INT64)<<56; return doc->tape[json_index] == tape_int64; } simdjson_inline bool tape_ref::is_uint64() const noexcept { constexpr uint64_t tape_uint64 = uint64_t(tape_type::UINT64)<<56; return doc->tape[json_index] == tape_uint64; } simdjson_inline bool tape_ref::is_false() const noexcept { constexpr uint64_t tape_false = uint64_t(tape_type::FALSE_VALUE)<<56; return doc->tape[json_index] == tape_false; } simdjson_inline bool tape_ref::is_true() const noexcept { constexpr uint64_t tape_true = uint64_t(tape_type::TRUE_VALUE)<<56; return doc->tape[json_index] == tape_true; } simdjson_inline bool tape_ref::is_null_on_tape() const noexcept { constexpr uint64_t tape_null = uint64_t(tape_type::NULL_VALUE)<<56; return doc->tape[json_index] == tape_null; } inline size_t tape_ref::after_element() const noexcept { switch (tape_ref_type()) { case tape_type::START_ARRAY: case tape_type::START_OBJECT: return matching_brace_index(); case tape_type::UINT64: case tape_type::INT64: case tape_type::DOUBLE: return json_index + 2; default: return json_index + 1; } } simdjson_inline tape_type tape_ref::tape_ref_type() const noexcept { return static_cast(doc->tape[json_index] >> 56); } simdjson_inline uint64_t internal::tape_ref::tape_value() const noexcept { return doc->tape[json_index] & internal::JSON_VALUE_MASK; } simdjson_inline uint32_t internal::tape_ref::matching_brace_index() const noexcept { return uint32_t(doc->tape[json_index]); } simdjson_inline uint32_t internal::tape_ref::scope_count() const noexcept { return uint32_t((doc->tape[json_index] >> 32) & internal::JSON_COUNT_MASK); } template simdjson_inline T tape_ref::next_tape_value() const noexcept { static_assert(sizeof(T) == sizeof(uint64_t), "next_tape_value() template parameter must be 64-bit"); // Though the following is tempting... // return *reinterpret_cast(&doc->tape[json_index + 1]); // It is not generally safe. It is safer, and often faster to rely // on memcpy. Yes, it is uglier, but it is also encapsulated. T x; std::memcpy(&x,&doc->tape[json_index + 1],sizeof(uint64_t)); return x; } simdjson_inline uint32_t internal::tape_ref::get_string_length() const noexcept { size_t string_buf_index = size_t(tape_value()); uint32_t len; std::memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len)); return len; } simdjson_inline const char * internal::tape_ref::get_c_str() const noexcept { size_t string_buf_index = size_t(tape_value()); return reinterpret_cast(&doc->string_buf[string_buf_index + sizeof(uint32_t)]); } inline std::string_view internal::tape_ref::get_string_view() const noexcept { return std::string_view( get_c_str(), get_string_length() ); } } // namespace internal } // namespace simdjson #endif // SIMDJSON_TAPE_REF_INL_H /* end file simdjson/internal/tape_ref-inl.h */ #include namespace simdjson { // // simdjson_result inline implementation // simdjson_inline simdjson_result::simdjson_result() noexcept : internal::simdjson_result_base() {} simdjson_inline simdjson_result::simdjson_result(dom::array value) noexcept : internal::simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : internal::simdjson_result_base(error) {} #if SIMDJSON_EXCEPTIONS inline dom::array::iterator simdjson_result::begin() const noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.begin(); } inline dom::array::iterator simdjson_result::end() const noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.end(); } inline size_t simdjson_result::size() const noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.size(); } #endif // SIMDJSON_EXCEPTIONS inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) const noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } inline simdjson_result simdjson_result::at(size_t index) const noexcept { if (error()) { return error(); } return first.at(index); } namespace dom { // // array inline implementation // simdjson_inline array::array() noexcept : tape{} {} simdjson_inline array::array(const internal::tape_ref &_tape) noexcept : tape{_tape} {} inline array::iterator array::begin() const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return internal::tape_ref(tape.doc, tape.json_index + 1); } inline array::iterator array::end() const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return internal::tape_ref(tape.doc, tape.after_element() - 1); } inline size_t array::size() const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return tape.scope_count(); } inline size_t array::number_of_slots() const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return tape.matching_brace_index() - tape.json_index; } inline simdjson_result array::at_pointer(std::string_view json_pointer) const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 if(json_pointer.empty()) { // an empty string means that we return the current node return element(this->tape); // copy the current node } else if(json_pointer[0] != '/') { // otherwise there is an error return INVALID_JSON_POINTER; } json_pointer = json_pointer.substr(1); // - means "the append position" or "the element after the end of the array" // We don't support this, because we're returning a real element, not a position. if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } // Read the array index size_t array_index = 0; size_t i; for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { uint8_t digit = uint8_t(json_pointer[i] - '0'); // Check for non-digit in array index. If it's there, we're trying to get a field in an object if (digit > 9) { return INCORRECT_TYPE; } array_index = array_index*10 + digit; } // 0 followed by other digits is invalid if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" // Empty string is invalid; so is a "/" with no digits before it if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" // Get the child auto child = array(tape).at(array_index); // If there is an error, it ends here if(child.error()) { return child; } // If there is a /, we're not done yet, call recursively. if (i < json_pointer.length()) { child = child.at_pointer(json_pointer.substr(i)); } return child; } inline simdjson_result array::at(size_t index) const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 size_t i=0; for (auto element : *this) { if (i == index) { return element; } i++; } return INDEX_OUT_OF_BOUNDS; } // // array::iterator inline implementation // simdjson_inline array::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { } inline element array::iterator::operator*() const noexcept { return element(tape); } inline array::iterator& array::iterator::operator++() noexcept { tape.json_index = tape.after_element(); return *this; } inline array::iterator array::iterator::operator++(int) noexcept { array::iterator out = *this; ++*this; return out; } inline bool array::iterator::operator!=(const array::iterator& other) const noexcept { return tape.json_index != other.tape.json_index; } inline bool array::iterator::operator==(const array::iterator& other) const noexcept { return tape.json_index == other.tape.json_index; } inline bool array::iterator::operator<(const array::iterator& other) const noexcept { return tape.json_index < other.tape.json_index; } inline bool array::iterator::operator<=(const array::iterator& other) const noexcept { return tape.json_index <= other.tape.json_index; } inline bool array::iterator::operator>=(const array::iterator& other) const noexcept { return tape.json_index >= other.tape.json_index; } inline bool array::iterator::operator>(const array::iterator& other) const noexcept { return tape.json_index > other.tape.json_index; } } // namespace dom } // namespace simdjson /* skipped duplicate #include "simdjson/dom/element-inl.h" */ #if defined(__cpp_lib_ranges) static_assert(std::ranges::view); static_assert(std::ranges::sized_range); #if SIMDJSON_EXCEPTIONS static_assert(std::ranges::view>); static_assert(std::ranges::sized_range>); #endif // SIMDJSON_EXCEPTIONS #endif // defined(__cpp_lib_ranges) #endif // SIMDJSON_ARRAY_INL_H /* end file simdjson/dom/array-inl.h */ /* skipped duplicate #include "simdjson/dom/document_stream-inl.h" */ /* including simdjson/dom/document-inl.h: #include "simdjson/dom/document-inl.h" */ /* begin file simdjson/dom/document-inl.h */ #ifndef SIMDJSON_DOCUMENT_INL_H #define SIMDJSON_DOCUMENT_INL_H // Inline implementations go in here. /* skipped duplicate #include "simdjson/dom/base.h" */ /* skipped duplicate #include "simdjson/dom/document.h" */ /* skipped duplicate #include "simdjson/dom/element-inl.h" */ /* skipped duplicate #include "simdjson/internal/tape_ref-inl.h" */ /* including simdjson/internal/jsonformatutils.h: #include "simdjson/internal/jsonformatutils.h" */ /* begin file simdjson/internal/jsonformatutils.h */ #ifndef SIMDJSON_INTERNAL_JSONFORMATUTILS_H #define SIMDJSON_INTERNAL_JSONFORMATUTILS_H /* skipped duplicate #include "simdjson/base.h" */ #include #include #include namespace simdjson { namespace internal { inline std::ostream& operator<<(std::ostream& out, const escape_json_string &str); class escape_json_string { public: escape_json_string(std::string_view _str) noexcept : str{_str} {} operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); } private: std::string_view str; friend std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped); }; inline std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped) { for (size_t i=0; i(unescaped.str[i]) <= 0x1F) { // TODO can this be done once at the beginning, or will it mess up << char? std::ios::fmtflags f(out.flags()); out << "\\u" << std::hex << std::setw(4) << std::setfill('0') << int(unescaped.str[i]); out.flags(f); } else { out << unescaped.str[i]; } } } return out; } } // namespace internal } // namespace simdjson #endif // SIMDJSON_INTERNAL_JSONFORMATUTILS_H /* end file simdjson/internal/jsonformatutils.h */ #include namespace simdjson { namespace dom { // // document inline implementation // inline element document::root() const noexcept { return element(internal::tape_ref(this, 1)); } simdjson_warn_unused inline size_t document::capacity() const noexcept { return allocated_capacity; } simdjson_warn_unused inline error_code document::allocate(size_t capacity) noexcept { if (capacity == 0) { string_buf.reset(); tape.reset(); allocated_capacity = 0; return SUCCESS; } // a pathological input like "[[[[..." would generate capacity tape elements, so // need a capacity of at least capacity + 1, but it is also possible to do // worse with "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6" //where capacity + 1 tape elements are // generated, see issue https://github.com/simdjson/simdjson/issues/345 size_t tape_capacity = SIMDJSON_ROUNDUP_N(capacity + 3, 64); // a document with only zero-length strings... could have capacity/3 string // and we would need capacity/3 * 5 bytes on the string buffer size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * capacity / 3 + SIMDJSON_PADDING, 64); string_buf.reset( new (std::nothrow) uint8_t[string_capacity]); tape.reset(new (std::nothrow) uint64_t[tape_capacity]); if(!(string_buf && tape)) { allocated_capacity = 0; string_buf.reset(); tape.reset(); return MEMALLOC; } // Technically the allocated_capacity might be larger than capacity // so the next line is pessimistic. allocated_capacity = capacity; return SUCCESS; } inline bool document::dump_raw_tape(std::ostream &os) const noexcept { uint32_t string_length; size_t tape_idx = 0; uint64_t tape_val = tape[tape_idx]; uint8_t type = uint8_t(tape_val >> 56); os << tape_idx << " : " << type; tape_idx++; size_t how_many = 0; if (type == 'r') { how_many = size_t(tape_val & internal::JSON_VALUE_MASK); } else { // Error: no starting root node? return false; } os << "\t// pointing to " << how_many << " (right after last node)\n"; uint64_t payload; for (; tape_idx < how_many; tape_idx++) { os << tape_idx << " : "; tape_val = tape[tape_idx]; payload = tape_val & internal::JSON_VALUE_MASK; type = uint8_t(tape_val >> 56); switch (type) { case '"': // we have a string os << "string \""; std::memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t)); os << internal::escape_json_string(std::string_view( reinterpret_cast(string_buf.get() + payload + sizeof(uint32_t)), string_length )); os << '"'; os << '\n'; break; case 'l': // we have a long int if (tape_idx + 1 >= how_many) { return false; } os << "integer " << static_cast(tape[++tape_idx]) << "\n"; break; case 'u': // we have a long uint if (tape_idx + 1 >= how_many) { return false; } os << "unsigned integer " << tape[++tape_idx] << "\n"; break; case 'd': // we have a double os << "float "; if (tape_idx + 1 >= how_many) { return false; } double answer; std::memcpy(&answer, &tape[++tape_idx], sizeof(answer)); os << answer << '\n'; break; case 'n': // we have a null os << "null\n"; break; case 't': // we have a true os << "true\n"; break; case 'f': // we have a false os << "false\n"; break; case '{': // we have an object os << "{\t// pointing to next tape location " << uint32_t(payload) << " (first node after the scope), " << " saturated count " << ((payload >> 32) & internal::JSON_COUNT_MASK)<< "\n"; break; case '}': // we end an object os << "}\t// pointing to previous tape location " << uint32_t(payload) << " (start of the scope)\n"; break; case '[': // we start an array os << "[\t// pointing to next tape location " << uint32_t(payload) << " (first node after the scope), " << " saturated count " << ((payload >> 32) & internal::JSON_COUNT_MASK)<< "\n"; break; case ']': // we end an array os << "]\t// pointing to previous tape location " << uint32_t(payload) << " (start of the scope)\n"; break; case 'r': // we start and end with the root node // should we be hitting the root node? return false; default: return false; } } tape_val = tape[tape_idx]; payload = tape_val & internal::JSON_VALUE_MASK; type = uint8_t(tape_val >> 56); os << tape_idx << " : " << type << "\t// pointing to " << payload << " (start root)\n"; return true; } } // namespace dom } // namespace simdjson #endif // SIMDJSON_DOCUMENT_INL_H /* end file simdjson/dom/document-inl.h */ /* skipped duplicate #include "simdjson/dom/element-inl.h" */ /* skipped duplicate #include "simdjson/dom/object-inl.h" */ /* including simdjson/dom/parsedjson_iterator-inl.h: #include "simdjson/dom/parsedjson_iterator-inl.h" */ /* begin file simdjson/dom/parsedjson_iterator-inl.h */ #ifndef SIMDJSON_PARSEDJSON_ITERATOR_INL_H #define SIMDJSON_PARSEDJSON_ITERATOR_INL_H /* skipped duplicate #include "simdjson/dom/base.h" */ /* skipped duplicate #include "simdjson/dom/parsedjson_iterator.h" */ /* skipped duplicate #include "simdjson/internal/jsonformatutils.h" */ /* skipped duplicate #include "simdjson/dom/parser-inl.h" */ /* skipped duplicate #include "simdjson/internal/tape_ref-inl.h" */ #include #include #include #include #ifndef SIMDJSON_DISABLE_DEPRECATED_API namespace simdjson { // VS2017 reports deprecated warnings when you define a deprecated class's methods. SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_DEPRECATED_WARNING // Because of template weirdness, the actual class definition is inline in the document class simdjson_warn_unused bool dom::parser::Iterator::is_ok() const { return location < tape_length; } // useful for debugging purposes size_t dom::parser::Iterator::get_tape_location() const { return location; } // useful for debugging purposes size_t dom::parser::Iterator::get_tape_length() const { return tape_length; } // returns the current depth (start at 1 with 0 reserved for the fictitious root // node) size_t dom::parser::Iterator::get_depth() const { return depth; } // A scope is a series of nodes at the same depth, typically it is either an // object ({) or an array ([). The root node has type 'r'. uint8_t dom::parser::Iterator::get_scope_type() const { return depth_index[depth].scope_type; } bool dom::parser::Iterator::move_forward() { if (location + 1 >= tape_length) { return false; // we are at the end! } if ((current_type == '[') || (current_type == '{')) { // We are entering a new scope depth++; assert(depth < max_depth); depth_index[depth].start_of_scope = location; depth_index[depth].scope_type = current_type; } else if ((current_type == ']') || (current_type == '}')) { // Leaving a scope. depth--; } else if (is_number()) { // these types use 2 locations on the tape, not just one. location += 1; } location += 1; current_val = doc.tape[location]; current_type = uint8_t(current_val >> 56); return true; } void dom::parser::Iterator::move_to_value() { // assume that we are on a key, so move by 1. location += 1; current_val = doc.tape[location]; current_type = uint8_t(current_val >> 56); } bool dom::parser::Iterator::move_to_key(const char *key) { if (down()) { do { const bool right_key = (strcmp(get_string(), key) == 0); move_to_value(); if (right_key) { return true; } } while (next()); up(); } return false; } bool dom::parser::Iterator::move_to_key_insensitive( const char *key) { if (down()) { do { const bool right_key = (simdjson_strcasecmp(get_string(), key) == 0); move_to_value(); if (right_key) { return true; } } while (next()); up(); } return false; } bool dom::parser::Iterator::move_to_key(const char *key, uint32_t length) { if (down()) { do { bool right_key = ((get_string_length() == length) && (memcmp(get_string(), key, length) == 0)); move_to_value(); if (right_key) { return true; } } while (next()); up(); } return false; } bool dom::parser::Iterator::move_to_index(uint32_t index) { if (down()) { uint32_t i = 0; for (; i < index; i++) { if (!next()) { break; } } if (i == index) { return true; } up(); } return false; } bool dom::parser::Iterator::prev() { size_t target_location = location; to_start_scope(); size_t npos = location; if (target_location == npos) { return false; // we were already at the start } size_t oldnpos; // we have that npos < target_location here do { oldnpos = npos; if ((current_type == '[') || (current_type == '{')) { // we need to jump npos = uint32_t(current_val); } else { npos = npos + ((current_type == 'd' || current_type == 'l') ? 2 : 1); } } while (npos < target_location); location = oldnpos; current_val = doc.tape[location]; current_type = uint8_t(current_val >> 56); return true; } bool dom::parser::Iterator::up() { if (depth == 1) { return false; // don't allow moving back to root } to_start_scope(); // next we just move to the previous value depth--; location -= 1; current_val = doc.tape[location]; current_type = uint8_t(current_val >> 56); return true; } bool dom::parser::Iterator::down() { if (location + 1 >= tape_length) { return false; } if ((current_type == '[') || (current_type == '{')) { size_t npos = uint32_t(current_val); if (npos == location + 2) { return false; // we have an empty scope } depth++; assert(depth < max_depth); location = location + 1; depth_index[depth].start_of_scope = location; depth_index[depth].scope_type = current_type; current_val = doc.tape[location]; current_type = uint8_t(current_val >> 56); return true; } return false; } void dom::parser::Iterator::to_start_scope() { location = depth_index[depth].start_of_scope; current_val = doc.tape[location]; current_type = uint8_t(current_val >> 56); } inline void dom::parser::Iterator::rewind() { while (up()) ; } bool dom::parser::Iterator::next() { size_t npos; if ((current_type == '[') || (current_type == '{')) { // we need to jump npos = uint32_t(current_val); } else { npos = location + (is_number() ? 2 : 1); } uint64_t next_val = doc.tape[npos]; uint8_t next_type = uint8_t(next_val >> 56); if ((next_type == ']') || (next_type == '}')) { return false; // we reached the end of the scope } location = npos; current_val = next_val; current_type = next_type; return true; } dom::parser::Iterator::Iterator(const dom::parser &pj) noexcept(false) : doc(pj.doc) { #if SIMDJSON_EXCEPTIONS if (!pj.valid) { throw simdjson_error(pj.error); } #else if (!pj.valid) { return; } // abort() usage is forbidden in the library #endif max_depth = pj.max_depth(); depth_index = new scopeindex_t[max_depth + 1]; depth_index[0].start_of_scope = location; current_val = doc.tape[location++]; current_type = uint8_t(current_val >> 56); depth_index[0].scope_type = current_type; tape_length = size_t(current_val & internal::JSON_VALUE_MASK); if (location < tape_length) { // If we make it here, then depth_capacity must >=2, but the compiler // may not know this. current_val = doc.tape[location]; current_type = uint8_t(current_val >> 56); depth++; assert(depth < max_depth); depth_index[depth].start_of_scope = location; depth_index[depth].scope_type = current_type; } } dom::parser::Iterator::Iterator( const dom::parser::Iterator &o) noexcept : doc(o.doc), max_depth(o.depth), depth(o.depth), location(o.location), tape_length(o.tape_length), current_type(o.current_type), current_val(o.current_val) { depth_index = new scopeindex_t[max_depth+1]; std::memcpy(depth_index, o.depth_index, (depth + 1) * sizeof(depth_index[0])); } dom::parser::Iterator::~Iterator() noexcept { if (depth_index) { delete[] depth_index; } } bool dom::parser::Iterator::print(std::ostream &os, bool escape_strings) const { if (!is_ok()) { return false; } switch (current_type) { case '"': // we have a string os << '"'; if (escape_strings) { os << internal::escape_json_string(std::string_view(get_string(), get_string_length())); } else { // was: os << get_string();, but given that we can include null chars, we // have to do something crazier: std::copy(get_string(), get_string() + get_string_length(), std::ostream_iterator(os)); } os << '"'; break; case 'l': // we have a long int os << get_integer(); break; case 'u': os << get_unsigned_integer(); break; case 'd': os << get_double(); break; case 'n': // we have a null os << "null"; break; case 't': // we have a true os << "true"; break; case 'f': // we have a false os << "false"; break; case '{': // we have an object case '}': // we end an object case '[': // we start an array case ']': // we end an array os << char(current_type); break; default: return false; } return true; } bool dom::parser::Iterator::move_to(const char *pointer, uint32_t length) { char *new_pointer = nullptr; if (pointer[0] == '#') { // Converting fragment representation to string representation new_pointer = new char[length]; uint32_t new_length = 0; for (uint32_t i = 1; i < length; i++) { if (pointer[i] == '%' && pointer[i + 1] == 'x') { #if __cpp_exceptions try { #endif int fragment = std::stoi(std::string(&pointer[i + 2], 2), nullptr, 16); if (fragment == '\\' || fragment == '"' || (fragment <= 0x1F)) { // escaping the character new_pointer[new_length] = '\\'; new_length++; } new_pointer[new_length] = char(fragment); i += 3; #if __cpp_exceptions } catch (std::invalid_argument &) { delete[] new_pointer; return false; // the fragment is invalid } #endif } else { new_pointer[new_length] = pointer[i]; } new_length++; } length = new_length; pointer = new_pointer; } // saving the current state size_t depth_s = depth; size_t location_s = location; uint8_t current_type_s = current_type; uint64_t current_val_s = current_val; rewind(); // The json pointer is used from the root of the document. bool found = relative_move_to(pointer, length); delete[] new_pointer; if (!found) { // since the pointer has found nothing, we get back to the original // position. depth = depth_s; location = location_s; current_type = current_type_s; current_val = current_val_s; } return found; } inline bool dom::parser::Iterator::move_to(const std::string &pointer) { return move_to(pointer.c_str(), uint32_t(pointer.length())); } inline int64_t dom::parser::Iterator::get_integer() const { if (location + 1 >= tape_length) { return 0; // default value in case of error } return static_cast(doc.tape[location + 1]); } inline uint64_t dom::parser::Iterator::get_unsigned_integer() const { if (location + 1 >= tape_length) { return 0; // default value in case of error } return doc.tape[location + 1]; } inline const char * dom::parser::Iterator::get_string() const { return reinterpret_cast( doc.string_buf.get() + (current_val & internal::JSON_VALUE_MASK) + sizeof(uint32_t)); } inline uint32_t dom::parser::Iterator::get_string_length() const { uint32_t answer; std::memcpy(&answer, reinterpret_cast(doc.string_buf.get() + (current_val & internal::JSON_VALUE_MASK)), sizeof(uint32_t)); return answer; } inline double dom::parser::Iterator::get_double() const { if (location + 1 >= tape_length) { return std::numeric_limits::quiet_NaN(); // default value in // case of error } double answer; std::memcpy(&answer, &doc.tape[location + 1], sizeof(answer)); return answer; } bool dom::parser::Iterator::relative_move_to(const char *pointer, uint32_t length) { if (length == 0) { // returns the whole document return true; } if (pointer[0] != '/') { // '/' must be the first character return false; } // finding the key in an object or the index in an array std::string key_or_index; uint32_t offset = 1; // checking for the "-" case if (is_array() && pointer[1] == '-') { if (length != 2) { // the pointer must be exactly "/-" // there can't be anything more after '-' as an index return false; } key_or_index = '-'; offset = length; // will skip the loop coming right after } // We either transform the first reference token to a valid json key // or we make sure it is a valid index in an array. for (; offset < length; offset++) { if (pointer[offset] == '/') { // beginning of the next key or index break; } if (is_array() && (pointer[offset] < '0' || pointer[offset] > '9')) { // the index of an array must be an integer // we also make sure std::stoi won't discard whitespaces later return false; } if (pointer[offset] == '~') { // "~1" represents "/" if (pointer[offset + 1] == '1') { key_or_index += '/'; offset++; continue; } // "~0" represents "~" if (pointer[offset + 1] == '0') { key_or_index += '~'; offset++; continue; } } if (pointer[offset] == '\\') { if (pointer[offset + 1] == '\\' || pointer[offset + 1] == '"' || (pointer[offset + 1] <= 0x1F)) { key_or_index += pointer[offset + 1]; offset++; continue; } return false; // invalid escaped character } if (pointer[offset] == '\"') { // unescaped quote character. this is an invalid case. // lets do nothing and assume most pointers will be valid. // it won't find any corresponding json key anyway. // return false; } key_or_index += pointer[offset]; } bool found = false; if (is_object()) { if (move_to_key(key_or_index.c_str(), uint32_t(key_or_index.length()))) { found = relative_move_to(pointer + offset, length - offset); } } else if (is_array()) { if (key_or_index == "-") { // handling "-" case first if (down()) { while (next()) ; // moving to the end of the array // moving to the nonexistent value right after... size_t npos; if ((current_type == '[') || (current_type == '{')) { // we need to jump npos = uint32_t(current_val); } else { npos = location + ((current_type == 'd' || current_type == 'l') ? 2 : 1); } location = npos; current_val = doc.tape[npos]; current_type = uint8_t(current_val >> 56); return true; // how could it fail ? } } else { // regular numeric index // The index can't have a leading '0' if (key_or_index[0] == '0' && key_or_index.length() > 1) { return false; } // it cannot be empty if (key_or_index.length() == 0) { return false; } // we already checked the index contains only valid digits uint32_t index = std::stoi(key_or_index); if (move_to_index(index)) { found = relative_move_to(pointer + offset, length - offset); } } } return found; } SIMDJSON_POP_DISABLE_WARNINGS } // namespace simdjson #endif // SIMDJSON_DISABLE_DEPRECATED_API #endif // SIMDJSON_PARSEDJSON_ITERATOR_INL_H /* end file simdjson/dom/parsedjson_iterator-inl.h */ /* skipped duplicate #include "simdjson/dom/parser-inl.h" */ /* skipped duplicate #include "simdjson/internal/tape_ref-inl.h" */ /* including simdjson/dom/serialization-inl.h: #include "simdjson/dom/serialization-inl.h" */ /* begin file simdjson/dom/serialization-inl.h */ #ifndef SIMDJSON_SERIALIZATION_INL_H #define SIMDJSON_SERIALIZATION_INL_H /* skipped duplicate #include "simdjson/dom/base.h" */ /* skipped duplicate #include "simdjson/dom/serialization.h" */ /* skipped duplicate #include "simdjson/dom/parser.h" */ /* skipped duplicate #include "simdjson/internal/tape_type.h" */ /* skipped duplicate #include "simdjson/dom/array-inl.h" */ /* skipped duplicate #include "simdjson/dom/object-inl.h" */ /* skipped duplicate #include "simdjson/internal/tape_ref-inl.h" */ #include namespace simdjson { namespace dom { inline bool parser::print_json(std::ostream &os) const noexcept { if (!valid) { return false; } simdjson::internal::string_builder<> sb; sb.append(doc.root()); std::string_view answer = sb.str(); os << answer; return true; } inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value) { simdjson::internal::string_builder<> sb; sb.append(value); return (out << sb.str()); } #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #endif inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value) { simdjson::internal::string_builder<> sb; sb.append(value); return (out << sb.str()); } #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #endif inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value) { simdjson::internal::string_builder<> sb; sb.append(value); return (out << sb.str()); } #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #endif } // namespace dom /*** * Number utility functions **/ namespace { /**@private * Escape sequence like \b or \u0001 * We expect that most compilers will use 8 bytes for this data structure. **/ struct escape_sequence { uint8_t length; const char string[7]; // technically, we only ever need 6 characters, we pad to 8 }; /**@private * This converts a signed integer into a character sequence. * The caller is responsible for providing enough memory (at least * 20 characters.) * Though various runtime libraries provide itoa functions, * it is not part of the C++ standard. The C++17 standard * adds the to_chars functions which would do as well, but * we want to support C++11. */ static char *fast_itoa(char *output, int64_t value) noexcept { // This is a standard implementation of itoa. char buffer[20]; uint64_t value_positive; // In general, negating a signed integer is unsafe. if(value < 0) { *output++ = '-'; // Doing value_positive = -value; while avoiding // undefined behavior warnings. // It assumes two complement's which is universal at this // point in time. std::memcpy(&value_positive, &value, sizeof(value)); value_positive = (~value_positive) + 1; // this is a negation } else { value_positive = value; } // We work solely with value_positive. It *might* be easier // for an optimizing compiler to deal with an unsigned variable // as far as performance goes. const char *const end_buffer = buffer + 20; char *write_pointer = buffer + 19; // A faster approach is possible if we expect large integers: // unroll the loop (work in 100s, 1000s) and use some kind of // memoization. while(value_positive >= 10) { *write_pointer-- = char('0' + (value_positive % 10)); value_positive /= 10; } *write_pointer = char('0' + value_positive); size_t len = end_buffer - write_pointer; std::memcpy(output, write_pointer, len); return output + len; } /**@private * This converts an unsigned integer into a character sequence. * The caller is responsible for providing enough memory (at least * 19 characters.) * Though various runtime libraries provide itoa functions, * it is not part of the C++ standard. The C++17 standard * adds the to_chars functions which would do as well, but * we want to support C++11. */ static char *fast_itoa(char *output, uint64_t value) noexcept { // This is a standard implementation of itoa. char buffer[20]; const char *const end_buffer = buffer + 20; char *write_pointer = buffer + 19; // A faster approach is possible if we expect large integers: // unroll the loop (work in 100s, 1000s) and use some kind of // memoization. while(value >= 10) { *write_pointer-- = char('0' + (value % 10)); value /= 10; }; *write_pointer = char('0' + value); size_t len = end_buffer - write_pointer; std::memcpy(output, write_pointer, len); return output + len; } } // anonymous namespace namespace internal { /*** * Minifier/formatter code. **/ template simdjson_inline void base_formatter::number(uint64_t x) { char number_buffer[24]; char *newp = fast_itoa(number_buffer, x); buffer.insert(buffer.end(), number_buffer, newp); } template simdjson_inline void base_formatter::number(int64_t x) { char number_buffer[24]; char *newp = fast_itoa(number_buffer, x); buffer.insert(buffer.end(), number_buffer, newp); } template simdjson_inline void base_formatter::number(double x) { char number_buffer[24]; // Currently, passing the nullptr to the second argument is // safe because our implementation does not check the second // argument. char *newp = internal::to_chars(number_buffer, nullptr, x); buffer.insert(buffer.end(), number_buffer, newp); } template simdjson_inline void base_formatter::start_array() { one_char('['); } template simdjson_inline void base_formatter::end_array() { one_char(']'); } template simdjson_inline void base_formatter::start_object() { one_char('{'); } template simdjson_inline void base_formatter::end_object() { one_char('}'); } template simdjson_inline void base_formatter::comma() { one_char(','); } template simdjson_inline void base_formatter::true_atom() { const char * s = "true"; buffer.insert(buffer.end(), s, s + 4); } template simdjson_inline void base_formatter::false_atom() { const char * s = "false"; buffer.insert(buffer.end(), s, s + 5); } template simdjson_inline void base_formatter::null_atom() { const char * s = "null"; buffer.insert(buffer.end(), s, s + 4); } template simdjson_inline void base_formatter::one_char(char c) { buffer.push_back(c); } template simdjson_inline void base_formatter::key(std::string_view unescaped) { string(unescaped); one_char(':'); } template simdjson_inline void base_formatter::string(std::string_view unescaped) { one_char('\"'); size_t i = 0; // Fast path for the case where we have no control character, no ", and no backslash. // This should include most keys. // // We would like to use 'bool' but some compilers take offense to bitwise operation // with bool types. constexpr static char needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; for(;i + 8 <= unescaped.length(); i += 8) { // Poor's man vectorization. This could get much faster if we used SIMD. // // It is not the case that replacing '|' with '||' would be neutral performance-wise. if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])] | needs_escaping[uint8_t(unescaped[i+2])] | needs_escaping[uint8_t(unescaped[i+3])] | needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])] | needs_escaping[uint8_t(unescaped[i+6])] | needs_escaping[uint8_t(unescaped[i+7])] ) { break; } } for(;i < unescaped.length(); i++) { if(needs_escaping[uint8_t(unescaped[i])]) { break; } } // The following is also possible and omits a 256-byte table, but it is slower: // for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F) // && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {} // At least for long strings, the following should be fast. We could // do better by integrating the checks and the insertion. buffer.insert(buffer.end(), unescaped.data(), unescaped.data() + i); // We caught a control character if we enter this loop (slow). // Note that we are do not restart from the beginning, but rather we continue // from the point where we encountered something that requires escaping. for (; i < unescaped.length(); i++) { switch (unescaped[i]) { case '\"': { const char * s = "\\\""; buffer.insert(buffer.end(), s, s + 2); } break; case '\\': { const char * s = "\\\\"; buffer.insert(buffer.end(), s, s + 2); } break; default: if (uint8_t(unescaped[i]) <= 0x1F) { // If packed, this uses 8 * 32 bytes. // Note that we expect most compilers to embed this code in the data // section. constexpr static escape_sequence escaped[32] = { {6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"}, {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"}, {6, "\\u0007"}, {2, "\\b"}, {2, "\\t"}, {2, "\\n"}, {6, "\\u000b"}, {2, "\\f"}, {2, "\\r"}, {6, "\\u000e"}, {6, "\\u000f"}, {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"}, {6, "\\u0014"}, {6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"}, {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"}, {6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}}; auto u = escaped[uint8_t(unescaped[i])]; buffer.insert(buffer.end(), u.string, u.string + u.length); } else { one_char(unescaped[i]); } } // switch } // for one_char('\"'); } template inline void base_formatter::clear() { buffer.clear(); } template simdjson_inline std::string_view base_formatter::str() const { return std::string_view(buffer.data(), buffer.size()); } simdjson_inline void mini_formatter::print_newline() { return; } simdjson_inline void mini_formatter::print_indents(size_t depth) { (void)depth; return; } simdjson_inline void mini_formatter::print_space() { return; } simdjson_inline void pretty_formatter::print_newline() { one_char('\n'); } simdjson_inline void pretty_formatter::print_indents(size_t depth) { if(this->indent_step <= 0) { return; } for(size_t i = 0; i < this->indent_step * depth; i++) { one_char(' '); } } simdjson_inline void pretty_formatter::print_space() { one_char(' '); } /*** * String building code. **/ template inline void string_builder::append(simdjson::dom::element value) { // using tape_type = simdjson::internal::tape_type; size_t depth = 0; constexpr size_t MAX_DEPTH = 16; bool is_object[MAX_DEPTH]; is_object[0] = false; bool after_value = false; internal::tape_ref iter(value.tape); do { // print commas after each value if (after_value) { format.comma(); format.print_newline(); } format.print_indents(depth); // If we are in an object, print the next key and :, and skip to the next // value. if (is_object[depth]) { format.key(iter.get_string_view()); format.print_space(); iter.json_index++; } switch (iter.tape_ref_type()) { // Arrays case tape_type::START_ARRAY: { // If we're too deep, we need to recurse to go deeper. depth++; if (simdjson_unlikely(depth >= MAX_DEPTH)) { append(simdjson::dom::array(iter)); iter.json_index = iter.matching_brace_index() - 1; // Jump to the ] depth--; break; } // Output start [ format.start_array(); iter.json_index++; // Handle empty [] (we don't want to come back around and print commas) if (iter.tape_ref_type() == tape_type::END_ARRAY) { format.end_array(); depth--; break; } is_object[depth] = false; after_value = false; format.print_newline(); continue; } // Objects case tape_type::START_OBJECT: { // If we're too deep, we need to recurse to go deeper. depth++; if (simdjson_unlikely(depth >= MAX_DEPTH)) { append(simdjson::dom::object(iter)); iter.json_index = iter.matching_brace_index() - 1; // Jump to the } depth--; break; } // Output start { format.start_object(); iter.json_index++; // Handle empty {} (we don't want to come back around and print commas) if (iter.tape_ref_type() == tape_type::END_OBJECT) { format.end_object(); depth--; break; } is_object[depth] = true; after_value = false; format.print_newline(); continue; } // Scalars case tape_type::STRING: format.string(iter.get_string_view()); break; case tape_type::INT64: format.number(iter.next_tape_value()); iter.json_index++; // numbers take up 2 spots, so we need to increment // extra break; case tape_type::UINT64: format.number(iter.next_tape_value()); iter.json_index++; // numbers take up 2 spots, so we need to increment // extra break; case tape_type::DOUBLE: format.number(iter.next_tape_value()); iter.json_index++; // numbers take up 2 spots, so we need to increment // extra break; case tape_type::TRUE_VALUE: format.true_atom(); break; case tape_type::FALSE_VALUE: format.false_atom(); break; case tape_type::NULL_VALUE: format.null_atom(); break; // These are impossible case tape_type::END_ARRAY: case tape_type::END_OBJECT: case tape_type::ROOT: SIMDJSON_UNREACHABLE(); } iter.json_index++; after_value = true; // Handle multiple ends in a row while (depth != 0 && (iter.tape_ref_type() == tape_type::END_ARRAY || iter.tape_ref_type() == tape_type::END_OBJECT)) { format.print_newline(); depth--; format.print_indents(depth); if (iter.tape_ref_type() == tape_type::END_ARRAY) { format.end_array(); } else { format.end_object(); } iter.json_index++; } // Stop when we're at depth 0 } while (depth != 0); format.print_newline(); } template inline void string_builder::append(simdjson::dom::object value) { format.start_object(); auto pair = value.begin(); auto end = value.end(); if (pair != end) { append(*pair); for (++pair; pair != end; ++pair) { format.comma(); append(*pair); } } format.end_object(); } template inline void string_builder::append(simdjson::dom::array value) { format.start_array(); auto iter = value.begin(); auto end = value.end(); if (iter != end) { append(*iter); for (++iter; iter != end; ++iter) { format.comma(); append(*iter); } } format.end_array(); } template simdjson_inline void string_builder::append(simdjson::dom::key_value_pair kv) { format.key(kv.key); append(kv.value); } template simdjson_inline void string_builder::clear() { format.clear(); } template simdjson_inline std::string_view string_builder::str() const { return format.str(); } } // namespace internal } // namespace simdjson #endif /* end file simdjson/dom/serialization-inl.h */ #endif // SIMDJSON_DOM_H /* end file simdjson/dom.h */ /* including simdjson/ondemand.h: #include "simdjson/ondemand.h" */ /* begin file simdjson/ondemand.h */ #ifndef SIMDJSON_ONDEMAND_H #define SIMDJSON_ONDEMAND_H /* including simdjson/builtin/ondemand.h: #include "simdjson/builtin/ondemand.h" */ /* begin file simdjson/builtin/ondemand.h */ #ifndef SIMDJSON_BUILTIN_ONDEMAND_H #define SIMDJSON_BUILTIN_ONDEMAND_H /* including simdjson/builtin.h: #include "simdjson/builtin.h" */ /* begin file simdjson/builtin.h */ #ifndef SIMDJSON_BUILTIN_H #define SIMDJSON_BUILTIN_H /* including simdjson/builtin/base.h: #include "simdjson/builtin/base.h" */ /* begin file simdjson/builtin/base.h */ #ifndef SIMDJSON_BUILTIN_BASE_H #define SIMDJSON_BUILTIN_BASE_H /* skipped duplicate #include "simdjson/base.h" */ /* including simdjson/implementation_detection.h: #include "simdjson/implementation_detection.h" */ /* begin file simdjson/implementation_detection.h */ #ifndef SIMDJSON_IMPLEMENTATION_DETECTION_H #define SIMDJSON_IMPLEMENTATION_DETECTION_H /* skipped duplicate #include "simdjson/base.h" */ // 0 is reserved, because undefined SIMDJSON_IMPLEMENTATION equals 0 in preprocessor macros. #define SIMDJSON_IMPLEMENTATION_ID_arm64 1 #define SIMDJSON_IMPLEMENTATION_ID_fallback 2 #define SIMDJSON_IMPLEMENTATION_ID_haswell 3 #define SIMDJSON_IMPLEMENTATION_ID_icelake 4 #define SIMDJSON_IMPLEMENTATION_ID_ppc64 5 #define SIMDJSON_IMPLEMENTATION_ID_westmere 6 #define SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) SIMDJSON_CAT(SIMDJSON_IMPLEMENTATION_ID_, IMPL) #define SIMDJSON_IMPLEMENTATION_ID SIMDJSON_IMPLEMENTATION_ID_FOR(SIMDJSON_IMPLEMENTATION) #define SIMDJSON_IMPLEMENTATION_IS(IMPL) SIMDJSON_IMPLEMENTATION_ID == SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) // // First, figure out which implementations can be run. Doing it here makes it so we don't have to worry about the order // in which we include them. // #ifndef SIMDJSON_IMPLEMENTATION_ARM64 #define SIMDJSON_IMPLEMENTATION_ARM64 (SIMDJSON_IS_ARM64) #endif #define SIMDJSON_CAN_ALWAYS_RUN_ARM64 SIMDJSON_IMPLEMENTATION_ARM64 && SIMDJSON_IS_ARM64 // Default Icelake to on if this is x86-64. Even if we're not compiled for it, it could be selected // at runtime. #ifndef SIMDJSON_IMPLEMENTATION_ICELAKE #define SIMDJSON_IMPLEMENTATION_ICELAKE ((SIMDJSON_IS_X86_64) && (SIMDJSON_AVX512_ALLOWED) && (SIMDJSON_COMPILER_SUPPORTS_VBMI2)) #endif #ifdef _MSC_VER // To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see // https://github.com/simdjson/simdjson/issues/1247 #define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) #else #define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) #endif // Default Haswell to on if this is x86-64. Even if we're not compiled for it, it could be selected // at runtime. #ifndef SIMDJSON_IMPLEMENTATION_HASWELL #if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE // if icelake is always available, never enable haswell. #define SIMDJSON_IMPLEMENTATION_HASWELL 0 #else #define SIMDJSON_IMPLEMENTATION_HASWELL SIMDJSON_IS_X86_64 #endif #endif #ifdef _MSC_VER // To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see // https://github.com/simdjson/simdjson/issues/1247 #define SIMDJSON_CAN_ALWAYS_RUN_HASWELL ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__)) #else #define SIMDJSON_CAN_ALWAYS_RUN_HASWELL ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__)) #endif // Default Westmere to on if this is x86-64. #ifndef SIMDJSON_IMPLEMENTATION_WESTMERE #if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE || SIMDJSON_CAN_ALWAYS_RUN_HASWELL // if icelake or haswell are always available, never enable westmere. #define SIMDJSON_IMPLEMENTATION_WESTMERE 0 #else #define SIMDJSON_IMPLEMENTATION_WESTMERE SIMDJSON_IS_X86_64 #endif #endif #define SIMDJSON_CAN_ALWAYS_RUN_WESTMERE (SIMDJSON_IMPLEMENTATION_WESTMERE && SIMDJSON_IS_X86_64 && __SSE4_2__ && __PCLMUL__) #ifndef SIMDJSON_IMPLEMENTATION_PPC64 #define SIMDJSON_IMPLEMENTATION_PPC64 (SIMDJSON_IS_PPC64 && SIMDJSON_IS_PPC64_VMX) #endif #define SIMDJSON_CAN_ALWAYS_RUN_PPC64 SIMDJSON_IMPLEMENTATION_PPC64 && SIMDJSON_IS_PPC64 && SIMDJSON_IS_PPC64_VMX // Default Fallback to on unless a builtin implementation has already been selected. #ifndef SIMDJSON_IMPLEMENTATION_FALLBACK #if SIMDJSON_CAN_ALWAYS_RUN_ARM64 || SIMDJSON_CAN_ALWAYS_RUN_ICELAKE || SIMDJSON_CAN_ALWAYS_RUN_HASWELL || SIMDJSON_CAN_ALWAYS_RUN_WESTMERE || SIMDJSON_CAN_ALWAYS_RUN_PPC64 // if anything at all except fallback can always run, then disable fallback. #define SIMDJSON_IMPLEMENTATION_FALLBACK 0 #else #define SIMDJSON_IMPLEMENTATION_FALLBACK 1 #endif #endif #define SIMDJSON_CAN_ALWAYS_RUN_FALLBACK SIMDJSON_IMPLEMENTATION_FALLBACK // Determine the best builtin implementation #ifndef SIMDJSON_BUILTIN_IMPLEMENTATION #if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE #define SIMDJSON_BUILTIN_IMPLEMENTATION icelake #elif SIMDJSON_CAN_ALWAYS_RUN_HASWELL #define SIMDJSON_BUILTIN_IMPLEMENTATION haswell #elif SIMDJSON_CAN_ALWAYS_RUN_WESTMERE #define SIMDJSON_BUILTIN_IMPLEMENTATION westmere #elif SIMDJSON_CAN_ALWAYS_RUN_ARM64 #define SIMDJSON_BUILTIN_IMPLEMENTATION arm64 #elif SIMDJSON_CAN_ALWAYS_RUN_PPC64 #define SIMDJSON_BUILTIN_IMPLEMENTATION ppc64 #elif SIMDJSON_CAN_ALWAYS_RUN_FALLBACK #define SIMDJSON_BUILTIN_IMPLEMENTATION fallback #else #error "All possible implementations (including fallback) have been disabled! simdjson will not run." #endif #endif // SIMDJSON_BUILTIN_IMPLEMENTATION #define SIMDJSON_BUILTIN_IMPLEMENTATION_ID SIMDJSON_IMPLEMENTATION_ID_FOR(SIMDJSON_BUILTIN_IMPLEMENTATION) #define SIMDJSON_BUILTIN_IMPLEMENTATION_IS(IMPL) SIMDJSON_BUILTIN_IMPLEMENTATION_ID == SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) #endif // SIMDJSON_IMPLEMENTATION_DETECTION_H /* end file simdjson/implementation_detection.h */ namespace simdjson { #if SIMDJSON_BUILTIN_IMPLEMENTATION_IS(arm64) namespace arm64 {} #elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(fallback) namespace fallback {} #elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(haswell) namespace haswell {} #elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(icelake) namespace icelake {} #elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(ppc64) namespace ppc64 {} #elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(westmere) namespace westmere {} #else #error Unknown SIMDJSON_BUILTIN_IMPLEMENTATION #endif /** * Represents the best statically linked simdjson implementation that can be used by the compiling * program. * * Detects what options the program is compiled against, and picks the minimum implementation that * will work on any computer that can run the program. For example, if you compile with g++ * -march=westmere, it will pick the westmere implementation. The haswell implementation will * still be available, and can be selected at runtime, but the builtin implementation (and any * code that uses it) will use westmere. */ namespace builtin = SIMDJSON_BUILTIN_IMPLEMENTATION; } // namespace simdjson #endif // SIMDJSON_BUILTIN_BASE_H /* end file simdjson/builtin/base.h */ /* including simdjson/builtin/implementation.h: #include "simdjson/builtin/implementation.h" */ /* begin file simdjson/builtin/implementation.h */ #ifndef SIMDJSON_BUILTIN_IMPLEMENTATION_H #define SIMDJSON_BUILTIN_IMPLEMENTATION_H /* skipped duplicate #include "simdjson/builtin/base.h" */ /* including simdjson/generic/dependencies.h: #include "simdjson/generic/dependencies.h" */ /* begin file simdjson/generic/dependencies.h */ #ifdef SIMDJSON_CONDITIONAL_INCLUDE #error simdjson/generic/dependencies.h must be included before defining SIMDJSON_CONDITIONAL_INCLUDE! #endif #ifndef SIMDJSON_GENERIC_DEPENDENCIES_H #define SIMDJSON_GENERIC_DEPENDENCIES_H // Internal headers needed for generics. // All includes referencing simdjson headers *not* under simdjson/generic must be here! // Otherwise, amalgamation will fail. /* skipped duplicate #include "simdjson/base.h" */ /* skipped duplicate #include "simdjson/implementation.h" */ /* skipped duplicate #include "simdjson/implementation_detection.h" */ /* including simdjson/internal/instruction_set.h: #include "simdjson/internal/instruction_set.h" */ /* begin file simdjson/internal/instruction_set.h */ /* From https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h Highly modified. Copyright (c) 2016- Facebook, Inc (Adam Paszke) Copyright (c) 2014- Facebook, Inc (Soumith Chintala) Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) Copyright (c) 2011-2013 NYU (Clement Farabet) Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute (Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America and IDIAP Research Institute nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef SIMDJSON_INTERNAL_INSTRUCTION_SET_H #define SIMDJSON_INTERNAL_INSTRUCTION_SET_H namespace simdjson { namespace internal { enum instruction_set { DEFAULT = 0x0, NEON = 0x1, AVX2 = 0x4, SSE42 = 0x8, PCLMULQDQ = 0x10, BMI1 = 0x20, BMI2 = 0x40, ALTIVEC = 0x80, AVX512F = 0x100, AVX512DQ = 0x200, AVX512IFMA = 0x400, AVX512PF = 0x800, AVX512ER = 0x1000, AVX512CD = 0x2000, AVX512BW = 0x4000, AVX512VL = 0x8000, AVX512VBMI2 = 0x10000 }; } // namespace internal } // namespace simdjson #endif // SIMDJSON_INTERNAL_INSTRUCTION_SET_H /* end file simdjson/internal/instruction_set.h */ /* skipped duplicate #include "simdjson/internal/dom_parser_implementation.h" */ /* including simdjson/internal/jsoncharutils_tables.h: #include "simdjson/internal/jsoncharutils_tables.h" */ /* begin file simdjson/internal/jsoncharutils_tables.h */ #ifndef SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H #define SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H /* skipped duplicate #include "simdjson/base.h" */ #ifdef JSON_TEST_STRINGS void found_string(const uint8_t *buf, const uint8_t *parsed_begin, const uint8_t *parsed_end); void found_bad_string(const uint8_t *buf); #endif namespace simdjson { namespace internal { // structural chars here are // they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL) // we are also interested in the four whitespace characters // space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace_negated[256]; extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace[256]; extern SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886]; } // namespace internal } // namespace simdjson #endif // SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H /* end file simdjson/internal/jsoncharutils_tables.h */ /* including simdjson/internal/numberparsing_tables.h: #include "simdjson/internal/numberparsing_tables.h" */ /* begin file simdjson/internal/numberparsing_tables.h */ #ifndef SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H #define SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H /* skipped duplicate #include "simdjson/base.h" */ namespace simdjson { namespace internal { /** * The smallest non-zero float (binary64) is 2^-1074. * We take as input numbers of the form w x 10^q where w < 2^64. * We have that w * 10^-343 < 2^(64-344) 5^-343 < 2^-1076. * However, we have that * (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^-1074. * Thus it is possible for a number of the form w * 10^-342 where * w is a 64-bit value to be a non-zero floating-point number. ********* * Any number of form w * 10^309 where w>= 1 is going to be * infinite in binary64 so we never need to worry about powers * of 5 greater than 308. */ constexpr int smallest_power = -342; constexpr int largest_power = 308; /** * Represents a 128-bit value. * low: least significant 64 bits. * high: most significant 64 bits. */ struct value128 { uint64_t low; uint64_t high; }; // Precomputed powers of ten from 10^0 to 10^22. These // can be represented exactly using the double type. extern SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[]; /** * When mapping numbers from decimal to binary, * we go from w * 10^q to m * 2^p but we have * 10^q = 5^q * 2^q, so effectively * we are trying to match * w * 2^q * 5^q to m * 2^p. Thus the powers of two * are not a concern since they can be represented * exactly using the binary notation, only the powers of five * affect the binary significand. */ // The truncated powers of five from 5^-342 all the way to 5^308 // The mantissa is truncated to 128 bits, and // never rounded up. Uses about 10KB. extern SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[]; } // namespace internal } // namespace simdjson #endif // SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H /* end file simdjson/internal/numberparsing_tables.h */ /* including simdjson/internal/simdprune_tables.h: #include "simdjson/internal/simdprune_tables.h" */ /* begin file simdjson/internal/simdprune_tables.h */ #ifndef SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H #define SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H /* skipped duplicate #include "simdjson/base.h" */ #include namespace simdjson { // table modified and copied from namespace internal { // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable extern SIMDJSON_DLLIMPORTEXPORT const unsigned char BitsSetTable256mul2[256]; extern SIMDJSON_DLLIMPORTEXPORT const uint8_t pshufb_combine_table[272]; // 256 * 8 bytes = 2kB, easily fits in cache. extern SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256]; } // namespace internal } // namespace simdjson #endif // SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H /* end file simdjson/internal/simdprune_tables.h */ #endif // SIMDJSON_GENERIC_DEPENDENCIES_H /* end file simdjson/generic/dependencies.h */ /* defining SIMDJSON_CONDITIONAL_INCLUDE */ #define SIMDJSON_CONDITIONAL_INCLUDE #if SIMDJSON_BUILTIN_IMPLEMENTATION_IS(arm64) /* including simdjson/arm64/implementation.h: #include "simdjson/arm64/implementation.h" */ /* begin file simdjson/arm64/implementation.h */ #ifndef SIMDJSON_ARM64_IMPLEMENTATION_H #define SIMDJSON_ARM64_IMPLEMENTATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { /** * @private */ class implementation final : public simdjson::implementation { public: simdjson_inline implementation() : simdjson::implementation("arm64", "ARM NEON", internal::instruction_set::NEON) {} simdjson_warn_unused error_code create_dom_parser_implementation( size_t capacity, size_t max_length, std::unique_ptr& dst ) const noexcept final; simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; }; } // namespace arm64 } // namespace simdjson #endif // SIMDJSON_ARM64_IMPLEMENTATION_H /* end file simdjson/arm64/implementation.h */ #elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(fallback) /* including simdjson/fallback/implementation.h: #include "simdjson/fallback/implementation.h" */ /* begin file simdjson/fallback/implementation.h */ #ifndef SIMDJSON_FALLBACK_IMPLEMENTATION_H #define SIMDJSON_FALLBACK_IMPLEMENTATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { /** * @private */ class implementation final : public simdjson::implementation { public: simdjson_inline implementation() : simdjson::implementation( "fallback", "Generic fallback implementation", 0 ) {} simdjson_warn_unused error_code create_dom_parser_implementation( size_t capacity, size_t max_length, std::unique_ptr& dst ) const noexcept final; simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; }; } // namespace fallback } // namespace simdjson #endif // SIMDJSON_FALLBACK_IMPLEMENTATION_H /* end file simdjson/fallback/implementation.h */ #elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(haswell) /* including simdjson/haswell/implementation.h: #include "simdjson/haswell/implementation.h" */ /* begin file simdjson/haswell/implementation.h */ #ifndef SIMDJSON_HASWELL_IMPLEMENTATION_H #define SIMDJSON_HASWELL_IMPLEMENTATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ // The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL namespace simdjson { namespace haswell { /** * @private */ class implementation final : public simdjson::implementation { public: simdjson_inline implementation() : simdjson::implementation( "haswell", "Intel/AMD AVX2", internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 ) {} simdjson_warn_unused error_code create_dom_parser_implementation( size_t capacity, size_t max_length, std::unique_ptr& dst ) const noexcept final; simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; }; } // namespace haswell } // namespace simdjson #endif // SIMDJSON_HASWELL_IMPLEMENTATION_H /* end file simdjson/haswell/implementation.h */ #elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(icelake) /* including simdjson/icelake/implementation.h: #include "simdjson/icelake/implementation.h" */ /* begin file simdjson/icelake/implementation.h */ #ifndef SIMDJSON_ICELAKE_IMPLEMENTATION_H #define SIMDJSON_ICELAKE_IMPLEMENTATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ // The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE namespace simdjson { namespace icelake { /** * @private */ class implementation final : public simdjson::implementation { public: simdjson_inline implementation() : simdjson::implementation( "icelake", "Intel/AMD AVX512", internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512F | internal::instruction_set::AVX512DQ | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 ) {} simdjson_warn_unused error_code create_dom_parser_implementation( size_t capacity, size_t max_length, std::unique_ptr& dst ) const noexcept final; simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; }; } // namespace icelake } // namespace simdjson #endif // SIMDJSON_ICELAKE_IMPLEMENTATION_H /* end file simdjson/icelake/implementation.h */ #elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(ppc64) /* including simdjson/ppc64/implementation.h: #include "simdjson/ppc64/implementation.h" */ /* begin file simdjson/ppc64/implementation.h */ #ifndef SIMDJSON_PPC64_IMPLEMENTATION_H #define SIMDJSON_PPC64_IMPLEMENTATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { /** * Implementation for ALTIVEC (PPC64). */ namespace ppc64 { /** * @private */ class implementation final : public simdjson::implementation { public: simdjson_inline implementation() : simdjson::implementation("ppc64", "PPC64 ALTIVEC", internal::instruction_set::ALTIVEC) {} simdjson_warn_unused error_code create_dom_parser_implementation( size_t capacity, size_t max_length, std::unique_ptr &dst) const noexcept final; simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; }; } // namespace ppc64 } // namespace simdjson #endif // SIMDJSON_PPC64_IMPLEMENTATION_H /* end file simdjson/ppc64/implementation.h */ #elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(westmere) /* including simdjson/westmere/implementation.h: #include "simdjson/westmere/implementation.h" */ /* begin file simdjson/westmere/implementation.h */ #ifndef SIMDJSON_WESTMERE_IMPLEMENTATION_H #define SIMDJSON_WESTMERE_IMPLEMENTATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ // The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE namespace simdjson { namespace westmere { /** * @private */ class implementation final : public simdjson::implementation { public: simdjson_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {} simdjson_warn_unused error_code create_dom_parser_implementation( size_t capacity, size_t max_length, std::unique_ptr& dst ) const noexcept final; simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; }; } // namespace westmere } // namespace simdjson #endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H /* end file simdjson/westmere/implementation.h */ #else #error Unknown SIMDJSON_BUILTIN_IMPLEMENTATION #endif /* undefining SIMDJSON_CONDITIONAL_INCLUDE */ #undef SIMDJSON_CONDITIONAL_INCLUDE namespace simdjson { /** * Function which returns a pointer to an implementation matching the "builtin" implementation. * The builtin implementation is the best statically linked simdjson implementation that can be used by the compiling * program. If you compile with g++ -march=haswell, this will return the haswell implementation. * It is handy to be able to check what builtin was used: builtin_implementation()->name(). */ const implementation * builtin_implementation(); } // namespace simdjson #endif // SIMDJSON_BUILTIN_IMPLEMENTATION_H /* end file simdjson/builtin/implementation.h */ /* skipped duplicate #include "simdjson/generic/dependencies.h" */ /* defining SIMDJSON_CONDITIONAL_INCLUDE */ #define SIMDJSON_CONDITIONAL_INCLUDE #if SIMDJSON_BUILTIN_IMPLEMENTATION_IS(arm64) /* including simdjson/arm64.h: #include "simdjson/arm64.h" */ /* begin file simdjson/arm64.h */ #ifndef SIMDJSON_ARM64_H #define SIMDJSON_ARM64_H /* including simdjson/arm64/begin.h: #include "simdjson/arm64/begin.h" */ /* begin file simdjson/arm64/begin.h */ /* defining SIMDJSON_IMPLEMENTATION to "arm64" */ #define SIMDJSON_IMPLEMENTATION arm64 /* including simdjson/arm64/base.h: #include "simdjson/arm64/base.h" */ /* begin file simdjson/arm64/base.h */ #ifndef SIMDJSON_ARM64_BASE_H #define SIMDJSON_ARM64_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { /** * Implementation for NEON (ARMv8). */ namespace arm64 { class implementation; namespace { namespace simd { template struct simd8; template struct simd8x64; } // namespace simd } // unnamed namespace } // namespace arm64 } // namespace simdjson #endif // SIMDJSON_ARM64_BASE_H /* end file simdjson/arm64/base.h */ /* including simdjson/arm64/intrinsics.h: #include "simdjson/arm64/intrinsics.h" */ /* begin file simdjson/arm64/intrinsics.h */ #ifndef SIMDJSON_ARM64_INTRINSICS_H #define SIMDJSON_ARM64_INTRINSICS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ // This should be the correct header whether // you use visual studio or other compilers. #include static_assert(sizeof(uint8x16_t) <= simdjson::SIMDJSON_PADDING, "insufficient padding for arm64"); #endif // SIMDJSON_ARM64_INTRINSICS_H /* end file simdjson/arm64/intrinsics.h */ /* including simdjson/arm64/bitmanipulation.h: #include "simdjson/arm64/bitmanipulation.h" */ /* begin file simdjson/arm64/bitmanipulation.h */ #ifndef SIMDJSON_ARM64_BITMANIPULATION_H #define SIMDJSON_ARM64_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace { // We sometimes call trailing_zero on inputs that are zero, // but the algorithms do not end up using the returned value. // Sadly, sanitizers are not smart enough to figure it out. SIMDJSON_NO_SANITIZE_UNDEFINED // This function can be used safely even if not all bytes have been // initialized. // See issue https://github.com/simdjson/simdjson/issues/1965 SIMDJSON_NO_SANITIZE_MEMORY simdjson_inline int trailing_zeroes(uint64_t input_num) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long ret; // Search the mask data from least significant bit (LSB) // to the most significant bit (MSB) for a set bit (1). _BitScanForward64(&ret, input_num); return (int)ret; #else // SIMDJSON_REGULAR_VISUAL_STUDIO return __builtin_ctzll(input_num); #endif // SIMDJSON_REGULAR_VISUAL_STUDIO } /* result might be undefined when input_num is zero */ simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return input_num & (input_num-1); } /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long leading_zero = 0; // Search the mask data from most significant bit (MSB) // to least significant bit (LSB) for a set bit (1). if (_BitScanReverse64(&leading_zero, input_num)) return (int)(63 - leading_zero); else return 64; #else return __builtin_clzll(input_num); #endif// SIMDJSON_REGULAR_VISUAL_STUDIO } /* result might be undefined when input_num is zero */ simdjson_inline int count_ones(uint64_t input_num) { return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); } #if defined(__GNUC__) // catches clang and gcc /** * ARM has a fast 64-bit "bit reversal function" that is handy. However, * it is not generally available as an intrinsic function under Visual * Studio (though this might be changing). Even under clang/gcc, we * apparently need to invoke inline assembly. */ /* * We use SIMDJSON_PREFER_REVERSE_BITS as a hint that algorithms that * work well with bit reversal may use it. */ #define SIMDJSON_PREFER_REVERSE_BITS 1 /* reverse the bits */ simdjson_inline uint64_t reverse_bits(uint64_t input_num) { uint64_t rev_bits; __asm("rbit %0, %1" : "=r"(rev_bits) : "r"(input_num)); return rev_bits; } /** * Flips bit at index 63 - lz. Thus if you have 'leading_zeroes' leading zeroes, * then this will set to zero the leading bit. It is possible for leading_zeroes to be * greating or equal to 63 in which case we trigger undefined behavior, but the output * of such undefined behavior is never used. **/ SIMDJSON_NO_SANITIZE_UNDEFINED simdjson_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) { return rev_bits ^ (uint64_t(0x8000000000000000) >> leading_zeroes); } #endif simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO *result = value1 + value2; return *result < value1; #else return __builtin_uaddll_overflow(value1, value2, reinterpret_cast(result)); #endif } } // unnamed namespace } // namespace arm64 } // namespace simdjson #endif // SIMDJSON_ARM64_BITMANIPULATION_H /* end file simdjson/arm64/bitmanipulation.h */ /* including simdjson/arm64/bitmask.h: #include "simdjson/arm64/bitmask.h" */ /* begin file simdjson/arm64/bitmask.h */ #ifndef SIMDJSON_ARM64_BITMASK_H #define SIMDJSON_ARM64_BITMASK_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace { // // Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. // // For example, prefix_xor(00100100) == 00011100 // simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { ///////////// // We could do this with PMULL, but it is apparently slow. // //#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension //return vmull_p64(-1ULL, bitmask); //#else // Analysis by @sebpop: // When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out // in between other vector code, so effectively the extra cycles of the sequence do not matter // because the GPR units are idle otherwise and the critical path is on the FP side. // Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 ) // and FP->GPR (2 cycles on N1 and 5 cycles on A72.) /////////// bitmask ^= bitmask << 1; bitmask ^= bitmask << 2; bitmask ^= bitmask << 4; bitmask ^= bitmask << 8; bitmask ^= bitmask << 16; bitmask ^= bitmask << 32; return bitmask; } } // unnamed namespace } // namespace arm64 } // namespace simdjson #endif /* end file simdjson/arm64/bitmask.h */ /* including simdjson/arm64/numberparsing_defs.h: #include "simdjson/arm64/numberparsing_defs.h" */ /* begin file simdjson/arm64/numberparsing_defs.h */ #ifndef SIMDJSON_ARM64_NUMBERPARSING_DEFS_H #define SIMDJSON_ARM64_NUMBERPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #if _M_ARM64 // __umulh requires intrin.h #include #endif // _M_ARM64 namespace simdjson { namespace arm64 { namespace numberparsing { // we don't have SSE, so let us use a scalar function // credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ /** @private */ static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { uint64_t val; std::memcpy(&val, chars, sizeof(uint64_t)); val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); } simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { internal::value128 answer; #if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS #ifdef _M_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); answer.low = value1 * value2; #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 #endif // _M_ARM64 #else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); #endif return answer; } } // namespace numberparsing } // namespace arm64 } // namespace simdjson #define SIMDJSON_SWAR_NUMBER_PARSING 1 #endif // SIMDJSON_ARM64_NUMBERPARSING_DEFS_H /* end file simdjson/arm64/numberparsing_defs.h */ /* including simdjson/arm64/simd.h: #include "simdjson/arm64/simd.h" */ /* begin file simdjson/arm64/simd.h */ #ifndef SIMDJSON_ARM64_SIMD_H #define SIMDJSON_ARM64_SIMD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace { namespace simd { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO namespace { // Start of private section with Visual Studio workaround #ifndef simdjson_make_uint8x16_t #define simdjson_make_uint8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ x13, x14, x15, x16) \ ([=]() { \ uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ x9, x10, x11, x12, x13, x14, x15, x16}; \ return vld1q_u8(array); \ }()) #endif #ifndef simdjson_make_int8x16_t #define simdjson_make_int8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ x13, x14, x15, x16) \ ([=]() { \ int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ x9, x10, x11, x12, x13, x14, x15, x16}; \ return vld1q_s8(array); \ }()) #endif #ifndef simdjson_make_uint8x8_t #define simdjson_make_uint8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ ([=]() { \ uint8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ return vld1_u8(array); \ }()) #endif #ifndef simdjson_make_int8x8_t #define simdjson_make_int8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ ([=]() { \ int8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ return vld1_s8(array); \ }()) #endif #ifndef simdjson_make_uint16x8_t #define simdjson_make_uint16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ ([=]() { \ uint16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ return vld1q_u16(array); \ }()) #endif #ifndef simdjson_make_int16x8_t #define simdjson_make_int16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ ([=]() { \ int16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ return vld1q_s16(array); \ }()) #endif // End of private section with Visual Studio workaround } // namespace #endif // SIMDJSON_REGULAR_VISUAL_STUDIO template struct simd8; // // Base class of simd8 and simd8, both of which use uint8x16_t internally. // template> struct base_u8 { uint8x16_t value; static const int SIZE = sizeof(value); // Conversion from/to SIMD register simdjson_inline base_u8(const uint8x16_t _value) : value(_value) {} simdjson_inline operator const uint8x16_t&() const { return this->value; } simdjson_inline operator uint8x16_t&() { return this->value; } // Bit operations simdjson_inline simd8 operator|(const simd8 other) const { return vorrq_u8(*this, other); } simdjson_inline simd8 operator&(const simd8 other) const { return vandq_u8(*this, other); } simdjson_inline simd8 operator^(const simd8 other) const { return veorq_u8(*this, other); } simdjson_inline simd8 bit_andnot(const simd8 other) const { return vbicq_u8(*this, other); } simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } simdjson_inline simd8& operator|=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast | other; return *this_cast; } simdjson_inline simd8& operator&=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast & other; return *this_cast; } simdjson_inline simd8& operator^=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast ^ other; return *this_cast; } friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return vceqq_u8(lhs, rhs); } template simdjson_inline simd8 prev(const simd8 prev_chunk) const { return vextq_u8(prev_chunk, *this, 16 - N); } }; // SIMD byte mask type (returned by things like eq and gt) template<> struct simd8: base_u8 { typedef uint16_t bitmask_t; typedef uint32_t bitmask2_t; static simdjson_inline simd8 splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); } simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} // False constructor simdjson_inline simd8() : simd8(vdupq_n_u8(0)) {} // Splat constructor simdjson_inline simd8(bool _value) : simd8(splat(_value)) {} // We return uint32_t instead of uint16_t because that seems to be more efficient for most // purposes (cutting it down to uint16_t costs performance in some compilers). simdjson_inline uint32_t to_bitmask() const { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO const uint8x16_t bit_mask = simdjson_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); #else const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; #endif auto minput = *this & bit_mask; uint8x16_t tmp = vpaddq_u8(minput, minput); tmp = vpaddq_u8(tmp, tmp); tmp = vpaddq_u8(tmp, tmp); return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); } simdjson_inline bool any() const { return vmaxvq_u8(*this) != 0; } }; // Unsigned bytes template<> struct simd8: base_u8 { static simdjson_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); } static simdjson_inline uint8x16_t zero() { return vdupq_n_u8(0); } static simdjson_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); } simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} // Zero constructor simdjson_inline simd8() : simd8(zero()) {} // Array constructor simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} // Splat constructor simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Member-by-member initialization #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) : simd8(simdjson_make_uint8x16_t( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 )) {} #else simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) : simd8(uint8x16_t{ v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 }) {} #endif // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Store to array simdjson_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); } // Saturated math simdjson_inline simd8 saturating_add(const simd8 other) const { return vqaddq_u8(*this, other); } simdjson_inline simd8 saturating_sub(const simd8 other) const { return vqsubq_u8(*this, other); } // Addition/subtraction are the same for signed and unsigned simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_u8(*this, other); } simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_u8(*this, other); } simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } // Order-specific operations simdjson_inline uint8_t max_val() const { return vmaxvq_u8(*this); } simdjson_inline uint8_t min_val() const { return vminvq_u8(*this); } simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_u8(*this, other); } simdjson_inline simd8 min_val(const simd8 other) const { return vminq_u8(*this, other); } simdjson_inline simd8 operator<=(const simd8 other) const { return vcleq_u8(*this, other); } simdjson_inline simd8 operator>=(const simd8 other) const { return vcgeq_u8(*this, other); } simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_u8(*this, other); } simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_u8(*this, other); } // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. simdjson_inline simd8 gt_bits(const simd8 other) const { return simd8(*this > other); } // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. simdjson_inline simd8 lt_bits(const simd8 other) const { return simd8(*this < other); } // Bit-specific operations simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } template simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } template simdjson_inline simd8 shl() const { return vshlq_n_u8(*this, N); } // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) template simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return lookup_table.apply_lookup_16_to(*this); } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes // get written. // Design consideration: it seems like a function with the // signature simd8 compress(uint16_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template simdjson_inline void compress(uint16_t mask, L * output) const { using internal::thintable_epi8; using internal::BitsSetTable256mul2; using internal::pshufb_combine_table; // this particular implementation was inspired by work done by @animetosho // we do it in two steps, first 8 bytes and then second 8 bytes uint8_t mask1 = uint8_t(mask); // least significant 8 bits uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits // next line just loads the 64-bit values thintable_epi8[mask1] and // thintable_epi8[mask2] into a 128-bit register, using only // two instructions on most compilers. uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]}; uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64); // we increment by 0x08 the second half of the mask #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO uint8x16_t inc = simdjson_make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); #else uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; #endif shufmask = vaddq_u8(shufmask, inc); // this is the version "nearly pruned" uint8x16_t pruned = vqtbl1q_u8(*this, shufmask); // we still need to put the two halves together. // we compute the popcount of the first half: int pop1 = BitsSetTable256mul2[mask1]; // then load the corresponding mask, what it does is to write // only the first pop1 bytes from the first 8 bytes, and then // it fills in with the bytes from the second 8 bytes + some filling // at the end. uint8x16_t compactmask = vld1q_u8(reinterpret_cast(pshufb_combine_table + pop1 * 8)); uint8x16_t answer = vqtbl1q_u8(pruned, compactmask); vst1q_u8(reinterpret_cast(output), answer); } // Copies all bytes corresponding to a 0 in the low half of the mask (interpreted as a // bitset) to output1, then those corresponding to a 0 in the high half to output2. template simdjson_inline void compress_halves(uint16_t mask, L *output1, L *output2) const { using internal::thintable_epi8; uint8_t mask1 = uint8_t(mask); // least significant 8 bits uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits uint8x8_t compactmask1 = vcreate_u8(thintable_epi8[mask1]); uint8x8_t compactmask2 = vcreate_u8(thintable_epi8[mask2]); // we increment by 0x08 the second half of the mask #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO uint8x8_t inc = simdjson_make_uint8x8_t(0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); #else uint8x8_t inc = {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; #endif compactmask2 = vadd_u8(compactmask2, inc); // store each result (with the second store possibly overlapping the first) vst1_u8((uint8_t*)output1, vqtbl1_u8(*this, compactmask1)); vst1_u8((uint8_t*)output2, vqtbl1_u8(*this, compactmask2)); } template simdjson_inline simd8 lookup_16( L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, L replace12, L replace13, L replace14, L replace15) const { return lookup_16(simd8::repeat_16( replace0, replace1, replace2, replace3, replace4, replace5, replace6, replace7, replace8, replace9, replace10, replace11, replace12, replace13, replace14, replace15 )); } template simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { return vqtbl1q_u8(*this, simd8(original)); } }; // Signed bytes template<> struct simd8 { int8x16_t value; static simdjson_inline simd8 splat(int8_t _value) { return vmovq_n_s8(_value); } static simdjson_inline simd8 zero() { return vdupq_n_s8(0); } static simdjson_inline simd8 load(const int8_t values[16]) { return vld1q_s8(values); } // Conversion from/to SIMD register simdjson_inline simd8(const int8x16_t _value) : value{_value} {} simdjson_inline operator const int8x16_t&() const { return this->value; } simdjson_inline operator int8x16_t&() { return this->value; } // Zero constructor simdjson_inline simd8() : simd8(zero()) {} // Splat constructor simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} // Member-by-member initialization #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) : simd8(simdjson_make_int8x16_t( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 )) {} #else simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) : simd8(int8x16_t{ v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 }) {} #endif // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Store to array simdjson_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); } // Explicit conversion to/from unsigned // // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same type. // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14 // and relatively ugly and hard to read. #ifndef SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {} #endif simdjson_inline explicit operator simd8() const { return vreinterpretq_u8_s8(this->value); } // Math simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_s8(*this, other); } simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_s8(*this, other); } simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } // Order-sensitive comparisons simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_s8(*this, other); } simdjson_inline simd8 min_val(const simd8 other) const { return vminq_s8(*this, other); } simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_s8(*this, other); } simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_s8(*this, other); } simdjson_inline simd8 operator==(const simd8 other) const { return vceqq_s8(*this, other); } template simdjson_inline simd8 prev(const simd8 prev_chunk) const { return vextq_s8(prev_chunk, *this, 16 - N); } // Perform a lookup assuming no value is larger than 16 template simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return lookup_table.apply_lookup_16_to(*this); } template simdjson_inline simd8 lookup_16( L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, L replace12, L replace13, L replace14, L replace15) const { return lookup_16(simd8::repeat_16( replace0, replace1, replace2, replace3, replace4, replace5, replace6, replace7, replace8, replace9, replace10, replace11, replace12, replace13, replace14, replace15 )); } template simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { return vqtbl1q_s8(*this, simd8(original)); } }; template struct simd8x64 { static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block."); const simd8 chunks[NUM_CHUNKS]; simd8x64(const simd8x64& o) = delete; // no copy allowed simd8x64& operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr+sizeof(simd8)*0); this->chunks[1].store(ptr+sizeof(simd8)*1); this->chunks[2].store(ptr+sizeof(simd8)*2); this->chunks[3].store(ptr+sizeof(simd8)*3); } simdjson_inline simd8 reduce_or() const { return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); } simdjson_inline uint64_t compress(uint64_t mask, T * output) const { uint64_t popcounts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); // compute the prefix sum of the popcounts of each byte uint64_t offsets = popcounts * 0x0101010101010101; this->chunks[0].compress_halves(uint16_t(mask), output, &output[popcounts & 0xFF]); this->chunks[1].compress_halves(uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF], &output[(offsets >> 16) & 0xFF]); this->chunks[2].compress_halves(uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF], &output[(offsets >> 32) & 0xFF]); this->chunks[3].compress_halves(uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF], &output[(offsets >> 48) & 0xFF]); return offsets >> 56; } simdjson_inline uint64_t to_bitmask() const { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO const uint8x16_t bit_mask = simdjson_make_uint8x16_t( 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 ); #else const uint8x16_t bit_mask = { 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 }; #endif // Add each of the elements next to each other, successively, to stuff each 8 byte mask into one. uint8x16_t sum0 = vpaddq_u8(this->chunks[0] & bit_mask, this->chunks[1] & bit_mask); uint8x16_t sum1 = vpaddq_u8(this->chunks[2] & bit_mask, this->chunks[3] & bit_mask); sum0 = vpaddq_u8(sum0, sum1); sum0 = vpaddq_u8(sum0, sum0); return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); } simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] == mask, this->chunks[1] == mask, this->chunks[2] == mask, this->chunks[3] == mask ).to_bitmask(); } simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] <= mask, this->chunks[1] <= mask, this->chunks[2] <= mask, this->chunks[3] <= mask ).to_bitmask(); } }; // struct simd8x64 } // namespace simd } // unnamed namespace } // namespace arm64 } // namespace simdjson #endif // SIMDJSON_ARM64_SIMD_H /* end file simdjson/arm64/simd.h */ /* including simdjson/arm64/stringparsing_defs.h: #include "simdjson/arm64/stringparsing_defs.h" */ /* begin file simdjson/arm64/stringparsing_defs.h */ #ifndef SIMDJSON_ARM64_STRINGPARSING_DEFS_H #define SIMDJSON_ARM64_STRINGPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/simd.h" */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace { using namespace simd; // Holds backslashes and quotes locations. struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return bs_bits != 0; } simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } uint32_t bs_bits; uint32_t quote_bits; }; // struct backslash_and_quote simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); simd8 v0(src); simd8 v1(src + sizeof(v0)); v0.store(dst); v1.store(dst + sizeof(v0)); // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on ARM; therefore, we // smash them together into a 64-byte mask and get the bitmask from there. uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); return { uint32_t(bs_and_quote), // bs_bits uint32_t(bs_and_quote >> 32) // quote_bits }; } } // unnamed namespace } // namespace arm64 } // namespace simdjson #endif // SIMDJSON_ARM64_STRINGPARSING_DEFS_H /* end file simdjson/arm64/stringparsing_defs.h */ #define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 /* end file simdjson/arm64/begin.h */ /* including simdjson/generic/amalgamated.h for arm64: #include "simdjson/generic/amalgamated.h" */ /* begin file simdjson/generic/amalgamated.h for arm64 */ #if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) #error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! #endif /* including simdjson/generic/base.h for arm64: #include "simdjson/generic/base.h" */ /* begin file simdjson/generic/base.h for arm64 */ #ifndef SIMDJSON_GENERIC_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ /* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ /* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ /* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ /* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ /* amalgamation skipped (editor-only): #else */ /* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ /* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { struct open_container; class dom_parser_implementation; /** * The type of a JSON number */ enum class number_type { floating_point_number=1, /// a binary64 number signed_integer, /// a signed integer that fits in a 64-bit word using two's complement unsigned_integer /// a positive integer larger or equal to 1<<63 }; } // namespace arm64 } // namespace simdjson #endif // SIMDJSON_GENERIC_BASE_H /* end file simdjson/generic/base.h for arm64 */ /* including simdjson/generic/jsoncharutils.h for arm64: #include "simdjson/generic/jsoncharutils.h" */ /* begin file simdjson/generic/jsoncharutils.h for arm64 */ #ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace { namespace jsoncharutils { // return non-zero if not a structural or whitespace char // zero otherwise simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace_negated[c]; } simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace[c]; } // returns a value with the high 16 bits set if not valid // otherwise returns the conversion of the 4 hex digits at src into the bottom // 16 bits of the 32-bit return register // // see // https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ static inline uint32_t hex_to_u32_nocheck( const uint8_t *src) { // strictly speaking, static inline is a C-ism uint32_t v1 = internal::digit_to_val32[630 + src[0]]; uint32_t v2 = internal::digit_to_val32[420 + src[1]]; uint32_t v3 = internal::digit_to_val32[210 + src[2]]; uint32_t v4 = internal::digit_to_val32[0 + src[3]]; return v1 | v2 | v3 | v4; } // given a code point cp, writes to c // the utf-8 code, outputting the length in // bytes, if the length is zero, the code point // is invalid // // This can possibly be made faster using pdep // and clz and table lookups, but JSON documents // have few escaped code points, and the following // function looks cheap. // // Note: we assume that surrogates are treated separately // simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { if (cp <= 0x7F) { c[0] = uint8_t(cp); return 1; // ascii } if (cp <= 0x7FF) { c[0] = uint8_t((cp >> 6) + 192); c[1] = uint8_t((cp & 63) + 128); return 2; // universal plane // Surrogates are treated elsewhere... //} //else if (0xd800 <= cp && cp <= 0xdfff) { // return 0; // surrogates // could put assert here } else if (cp <= 0xFFFF) { c[0] = uint8_t((cp >> 12) + 224); c[1] = uint8_t(((cp >> 6) & 63) + 128); c[2] = uint8_t((cp & 63) + 128); return 3; } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this // is not needed c[0] = uint8_t((cp >> 18) + 240); c[1] = uint8_t(((cp >> 12) & 63) + 128); c[2] = uint8_t(((cp >> 6) & 63) + 128); c[3] = uint8_t((cp & 63) + 128); return 4; } // will return 0 when the code point was too large. return 0; // bad r } #if SIMDJSON_IS_32BITS // _umul128 for x86, arm // this is a slow emulation routine for 32-bit // static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; } static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); uint64_t adbc_carry = !!(adbc < ad); uint64_t lo = bd + (adbc << 32); *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + (adbc_carry << 32) + !!(lo < bd); return lo; } #endif } // namespace jsoncharutils } // unnamed namespace } // namespace arm64 } // namespace simdjson #endif // SIMDJSON_GENERIC_JSONCHARUTILS_H /* end file simdjson/generic/jsoncharutils.h for arm64 */ /* including simdjson/generic/atomparsing.h for arm64: #include "simdjson/generic/atomparsing.h" */ /* begin file simdjson/generic/atomparsing.h for arm64 */ #ifndef SIMDJSON_GENERIC_ATOMPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include namespace simdjson { namespace arm64 { namespace { /// @private namespace atomparsing { // The string_to_uint32 is exclusively used to map literal strings to 32-bit values. // We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot // be certain that the character pointer will be properly aligned. // You might think that using memcpy makes this function expensive, but you'd be wrong. // All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); // to the compile-time constant 1936482662. simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } // Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. // Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. simdjson_warn_unused simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); std::memcpy(&srcval, src, sizeof(uint32_t)); return srcval ^ string_to_uint32(atom); } simdjson_warn_unused simdjson_inline bool is_valid_true_atom(const uint8_t *src) { return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_true_atom(src); } else if (len == 4) { return !str4ncmp(src, "true"); } else { return false; } } simdjson_warn_unused simdjson_inline bool is_valid_false_atom(const uint8_t *src) { return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; } simdjson_warn_unused simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { if (len > 5) { return is_valid_false_atom(src); } else if (len == 5) { return !str4ncmp(src+1, "alse"); } else { return false; } } simdjson_warn_unused simdjson_inline bool is_valid_null_atom(const uint8_t *src) { return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_null_atom(src); } else if (len == 4) { return !str4ncmp(src, "null"); } else { return false; } } } // namespace atomparsing } // unnamed namespace } // namespace arm64 } // namespace simdjson #endif // SIMDJSON_GENERIC_ATOMPARSING_H /* end file simdjson/generic/atomparsing.h for arm64 */ /* including simdjson/generic/dom_parser_implementation.h for arm64: #include "simdjson/generic/dom_parser_implementation.h" */ /* begin file simdjson/generic/dom_parser_implementation.h for arm64 */ #ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { // expectation: sizeof(open_container) = 64/8. struct open_container { uint32_t tape_index; // where, on the tape, does the scope ([,{) begins uint32_t count; // how many elements in the scope }; // struct open_container static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); class dom_parser_implementation final : public internal::dom_parser_implementation { public: /** Tape location of each open { or [ */ std::unique_ptr open_containers{}; /** Whether each open container is a [ or { */ std::unique_ptr is_array{}; /** Buffer passed to stage 1 */ const uint8_t *buf{}; /** Length passed to stage 1 */ size_t len{0}; /** Document passed to stage 2 */ dom::document *doc{}; inline dom_parser_implementation() noexcept; inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; dom_parser_implementation(const dom_parser_implementation &) = delete; dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; private: simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); }; } // namespace arm64 } // namespace simdjson namespace simdjson { namespace arm64 { inline dom_parser_implementation::dom_parser_implementation() noexcept = default; inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; // Leaving these here so they can be inlined if so desired inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } // Stage 1 index output size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); if (!structural_indexes) { _capacity = 0; return MEMALLOC; } structural_indexes[0] = 0; n_structural_indexes = 0; _capacity = capacity; return SUCCESS; } inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { // Stage 2 stacks open_containers.reset(new (std::nothrow) open_container[max_depth]); is_array.reset(new (std::nothrow) bool[max_depth]); if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } _max_depth = max_depth; return SUCCESS; } } // namespace arm64 } // namespace simdjson #endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H /* end file simdjson/generic/dom_parser_implementation.h for arm64 */ /* including simdjson/generic/implementation_simdjson_result_base.h for arm64: #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* begin file simdjson/generic/implementation_simdjson_result_base.h for arm64 */ #ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { // This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair // so we can avoid inlining errors // TODO reconcile these! /** * The result of a simdjson operation that could fail. * * Gives the option of reading error codes, or throwing an exception by casting to the desired result. * * This is a base class for implementations that want to add functions to the result type for * chaining. * * Override like: * * struct simdjson_result : public internal::implementation_simdjson_result_base { * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} * // Your extra methods here * } * * Then any method returning simdjson_result will be chainable with your methods. */ template struct implementation_simdjson_result_base { /** * Create a new empty result with error = UNINITIALIZED. */ simdjson_inline implementation_simdjson_result_base() noexcept = default; /** * Create a new error result. */ simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; /** * Create a new successful result. */ simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; /** * Create a new result with both things (use if you don't want to branch when creating the result). */ simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; /** * Move the value and the error to the provided variables. * * @param value The variable to assign the value to. May not be set if there is an error. * @param error The variable to assign the error to. Set to SUCCESS if there is no error. */ simdjson_inline void tie(T &value, error_code &error) && noexcept; /** * Move the value to the provided variable. * * @param value The variable to assign the value to. May not be set if there is an error. */ simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ simdjson_inline error_code error() const noexcept; #if SIMDJSON_EXCEPTIONS /** * Get the result value. * * @throw simdjson_error if there was an error. */ simdjson_inline T& value() & noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ simdjson_inline T&& value() && noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ simdjson_inline T&& take_value() && noexcept(false); /** * Cast to the value (will throw on error). * * @throw simdjson_error if there was an error. */ simdjson_inline operator T&&() && noexcept(false); #endif // SIMDJSON_EXCEPTIONS /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. */ simdjson_inline const T& value_unsafe() const& noexcept; /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. */ simdjson_inline T& value_unsafe() & noexcept; /** * Take the result value (move it). This function is safe if and only * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ }; // struct implementation_simdjson_result_base } // namespace arm64 } // namespace simdjson #endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H /* end file simdjson/generic/implementation_simdjson_result_base.h for arm64 */ /* including simdjson/generic/numberparsing.h for arm64: #include "simdjson/generic/numberparsing.h" */ /* begin file simdjson/generic/numberparsing.h for arm64 */ #ifndef SIMDJSON_GENERIC_NUMBERPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #include #include namespace simdjson { namespace arm64 { namespace numberparsing { #ifdef JSON_TEST_NUMBERS #define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) #define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) #define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) #define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) #else #define INVALID_NUMBER(SRC) (NUMBER_ERROR) #define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) #define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) #define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) #endif namespace { // Convert a mantissa, an exponent and a sign bit into an ieee64 double. // The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). // The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { double d; mantissa &= ~(1ULL << 52); mantissa |= real_exponent << 52; mantissa |= ((static_cast(negative)) << 63); std::memcpy(&d, &mantissa, sizeof(d)); return d; } // Attempts to compute i * 10^(power) exactly; and if "negative" is // true, negate the result. // This function will only work in some cases, when it does not work, success is // set to false. This should work *most of the time* (like 99% of the time). // We assume that power is in the [smallest_power, // largest_power] interval: the caller is responsible for this check. simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { // we start with a fast path // It was described in // Clinger WD. How to read floating point numbers accurately. // ACM SIGPLAN Notices. 1990 #ifndef FLT_EVAL_METHOD #error "FLT_EVAL_METHOD should be defined, please include cfloat." #endif #if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) // We cannot be certain that x/y is rounded to nearest. if (0 <= power && power <= 22 && i <= 9007199254740991) #else if (-22 <= power && power <= 22 && i <= 9007199254740991) #endif { // convert the integer into a double. This is lossless since // 0 <= i <= 2^53 - 1. d = double(i); // // The general idea is as follows. // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then // 1) Both s and p can be represented exactly as 64-bit floating-point // values // (binary64). // 2) Because s and p can be represented exactly as floating-point values, // then s * p // and s / p will produce correctly rounded values. // if (power < 0) { d = d / simdjson::internal::power_of_ten[-power]; } else { d = d * simdjson::internal::power_of_ten[power]; } if (negative) { d = -d; } return true; } // When 22 < power && power < 22 + 16, we could // hope for another, secondary fast path. It was // described by David M. Gay in "Correctly rounded // binary-decimal and decimal-binary conversions." (1990) // If you need to compute i * 10^(22 + x) for x < 16, // first compute i * 10^x, if you know that result is exact // (e.g., when i * 10^x < 2^53), // then you can still proceed and do (i * 10^x) * 10^22. // Is this worth your time? // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) // for this second fast path to work. // If you you have 22 < power *and* power < 22 + 16, and then you // optimistically compute "i * 10^(x-22)", there is still a chance that you // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of // this optimization maybe less common than we would like. Source: // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html // The fast path has now failed, so we are failing back on the slower path. // In the slow path, we need to adjust i so that it is > 1<<63 which is always // possible, except if i == 0, so we handle i == 0 separately. if(i == 0) { d = negative ? -0.0 : 0.0; return true; } // The exponent is 1024 + 63 + power // + floor(log(5**power)/log(2)). // The 1024 comes from the ieee64 standard. // The 63 comes from the fact that we use a 64-bit word. // // Computing floor(log(5**power)/log(2)) could be // slow. Instead we use a fast function. // // For power in (-400,350), we have that // (((152170 + 65536) * power ) >> 16); // is equal to // floor(log(5**power)/log(2)) + power when power >= 0 // and it is equal to // ceil(log(5**-power)/log(2)) + power when power < 0 // // The 65536 is (1<<16) and corresponds to // (65536 * power) >> 16 ---> power // // ((152170 * power ) >> 16) is equal to // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is // approximatively equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; // We want the most significant bit of i to be 1. Shift if needed. int lz = leading_zeroes(i); i <<= lz; // We are going to need to do some 64-bit arithmetic to get a precise product. // We use a table lookup approach. // It is safe because // power >= smallest_power // and power <= largest_power // We recover the mantissa of the power, it has a leading 1. It is always // rounded down. // // We want the most significant 64 bits of the product. We know // this will be non-zero because the most significant bit of i is // 1. const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) // // The full_multiplication function computes the 128-bit product of two 64-bit words // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use // we make of the product. It also makes it easy to reason about the product: there // is 0 or 1 leading zero in the product. // Unless the least significant 9 bits of the high (64-bit) part of the full // product are all 1s, then we know that the most significant 55 bits are // exact and no further work is needed. Having 55 bits is necessary because // we need 53 bits for the mantissa but we have to have one rounding bit and // we can waste a bit if the most significant bit of the product is zero. if((firstproduct.high & 0x1FF) == 0x1FF) { // We want to compute i * 5^q, but only care about the top 55 bits at most. // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing // the full computation is wasteful. So we do what is called a "truncated // multiplication". // We take the most significant 64-bits, and we put them in // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q // to the desired approximation using one multiplication. Sometimes it does not suffice. // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and // then we get a better approximation to i * 5^q. // // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat // more complicated. // // There is an extra layer of complexity in that we need more than 55 bits of // accuracy in the round-to-even scenario. // // The full_multiplication function computes the 128-bit product of two 64-bit words // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); firstproduct.low += secondproduct.high; if(secondproduct.high > firstproduct.low) { firstproduct.high++; } // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product // is sufficiently accurate, and more computation is not needed. } uint64_t lower = firstproduct.low; uint64_t upper = firstproduct.high; // The final mantissa should be 53 bits with a leading 1. // We shift it so that it occupies 54 bits with a leading 1. /////// uint64_t upperbit = upper >> 63; uint64_t mantissa = upper >> (upperbit + 9); lz += int(1 ^ upperbit); // Here we have mantissa < (1<<54). int64_t real_exponent = exponent - lz; if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? // Here have that real_exponent <= 0 so -real_exponent >= 0 if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. d = negative ? -0.0 : 0.0; return true; } // next line is safe because -real_exponent + 1 < 0 mantissa >>= -real_exponent + 1; // Thankfully, we can't have both "round-to-even" and subnormals because // "round-to-even" only occurs for powers close to 0. mantissa += (mantissa & 1); // round up mantissa >>= 1; // There is a weird scenario where we don't have a subnormal but just. // Suppose we start with 2.2250738585072013e-308, we end up // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer // subnormal, but we can only know this after rounding. // So we only declare a subnormal if we are smaller than the threshold. real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; d = to_double(mantissa, real_exponent, negative); return true; } // We have to round to even. The "to even" part // is only a problem when we are right in between two floats // which we guard against. // If we have lots of trailing zeros, we may fall right between two // floating-point values. // // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] // times a power of two. That is, it is right between a number with binary significand // m and another number with binary significand m+1; and it must be the case // that it cannot be represented by a float itself. // // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. // Recall that 10^q = 5^q * 2^q. // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have // 2^{53} x 5^{-q} < 2^{64}. // Hence we have 5^{-q} < 2^{11}$ or q>= -4. // // We require lower <= 1 and not lower == 0 because we could not prove that // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { mantissa &= ~1; // flip it so that we do not round up } } mantissa += mantissa & 1; mantissa >>= 1; // Here we have mantissa < (1<<53), unless there was an overflow if (mantissa >= (1ULL << 53)) { ////////// // This will happen when parsing values such as 7.2057594037927933e+16 //////// mantissa = (1ULL << 52); real_exponent++; } mantissa &= ~(1ULL << 52); // we have to check that real_exponent is in range, otherwise we bail out if (simdjson_unlikely(real_exponent > 2046)) { // We have an infinite value!!! We could actually throw an error here if we could. return false; } d = to_double(mantissa, real_exponent, negative); return true; } // We call a fallback floating-point parser that might be slow. Note // it will accept JSON numbers, but the JSON spec. is more restrictive so // before you call parse_float_fallback, you need to have validated the input // string with the JSON grammar. // It will return an error (false) if the parsed number is infinite. // The string parsing itself always succeeds. We know that there is at least // one digit. static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); // We do not accept infinite values. // Detecting finite values in a portable manner is ridiculously hard, ideally // we would want to do: // return !std::isfinite(*outDouble); // but that mysteriously fails under legacy/old libc++ libraries, see // https://github.com/simdjson/simdjson/issues/1286 // // Therefore, fall back to this solution (the extra parens are there // to handle that max may be a macro on windows). return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); // We do not accept infinite values. // Detecting finite values in a portable manner is ridiculously hard, ideally // we would want to do: // return !std::isfinite(*outDouble); // but that mysteriously fails under legacy/old libc++ libraries, see // https://github.com/simdjson/simdjson/issues/1286 // // Therefore, fall back to this solution (the extra parens are there // to handle that max may be a macro on windows). return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } // check quickly whether the next 8 chars are made of digits // at a glance, it looks better than Mula's // http://0x80.pl/articles/swar-digits-validate.html simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { uint64_t val; // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); std::memcpy(&val, chars, 8); // a branchy method might be faster: // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == // 0x3030303030303030); return (((val & 0xF0F0F0F0F0F0F0F0) | (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == 0x3333333333333333); } template SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later simdjson_inline bool parse_digit(const uint8_t c, I &i) { const uint8_t digit = static_cast(c - '0'); if (digit > 9) { return false; } // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication i = 10 * i + digit; // might overflow, we will handle the overflow later return true; } simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the // the integer into a float in a lossless manner. const uint8_t *const first_after_period = p; #ifdef SIMDJSON_SWAR_NUMBER_PARSING #if SIMDJSON_SWAR_NUMBER_PARSING // this helps if we have lots of decimals! // this turns out to be frequent enough. if (is_made_of_eight_digits_fast(p)) { i = i * 100000000 + parse_eight_digits_unrolled(p); p += 8; } #endif // SIMDJSON_SWAR_NUMBER_PARSING #endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) if (parse_digit(*p, i)) { ++p; } while (parse_digit(*p, i)) { p++; } exponent = first_after_period - p; // Decimal without digits (123.) is illegal if (exponent == 0) { return INVALID_NUMBER(src); } return SUCCESS; } simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well // Exponent: -123.456e-[78] auto start_exp = p; int64_t exp_number = 0; while (parse_digit(*p, exp_number)) { ++p; } // It is possible for parse_digit to overflow. // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. // Thus we *must* check for possible overflow before we negate exp_number. // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may // not oblige and may, in fact, generate two distinct paths in any case. It might be // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off // instructions for a simdjson_likely branch, an unconclusive gain. // If there were no digits, it's an error. if (simdjson_unlikely(p == start_exp)) { return INVALID_NUMBER(src); } // We have a valid positive exponent in exp_number at this point, except that // it may have overflowed. // If there were more than 18 digits, we may have overflowed the integer. We have to do // something!!!! if (simdjson_unlikely(p > start_exp+18)) { // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow while (*start_exp == '0') { start_exp++; } // 19 digits could overflow int64_t and is kind of absurd anyway. We don't // support exponents smaller than -999,999,999,999,999,999 and bigger // than 999,999,999,999,999,999. // We can truncate. // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could // truncate at 324. // Note that there is no reason to fail per se at this point in time. // E.g., 0e999999999999999999999 is a fine number. if (p > start_exp+18) { exp_number = 999999999999999999; } } // At this point, we know that exp_number is a sane, positive, signed integer. // It is <= 999,999,999,999,999,999. As long as 'exponent' is in // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' // is bounded in magnitude by the size of the JSON input, we are fine in this universe. // To sum it up: the next line should never overflow. exponent += (neg_exp ? -exp_number : exp_number); return SUCCESS; } simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const uint8_t *start = start_digits; while ((*start == '0') || (*start == '.')) { ++start; } // we over-decrement by one when there is a '.' return digit_count - size_t(start - start_digits); } } // unnamed namespace /** @private */ template error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { double d; if (parse_float_fallback(src, &d)) { writer.append_double(d); return SUCCESS; } return INVALID_NUMBER(src); } /** @private */ template simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. // // 9999999999999999999 < 2**64 so we can accommodate 19 digits. // If we have a decimal separator, then digit_count - 1 is the number of digits, but we // may not have a decimal separator! if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { // Ok, chances are good that we had an overflow! // this is almost never going to get called!!! // we start anew, going slowly!!! // This will happen in the following examples: // 10000000000000000000000000000000000000000000e+308 // 3.1415926535897932384626433832795028841971693993751 // // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens // because slow_float_parsing is a non-inlined function. If we passed our writer reference to // it, it would force it to be stored in memory, preventing the compiler from picking it apart // and putting into registers. i.e. if we pass it as reference, it gets slow. // This is what forces the skip_double, as well. error_code error = slow_float_parsing(src, writer); writer.skip_double(); return error; } // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 // To future reader: we'd love if someone found a better way, or at least could explain this result! if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { // // Important: smallest_power is such that it leads to a zero value. // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero // so something x 10^-343 goes to zero, but not so with something x 10^-342. static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); // if((exponent < simdjson::internal::smallest_power) || (i == 0)) { // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); return SUCCESS; } else { // (exponent > largest_power) and (i != 0) // We have, for sure, an infinite value and simdjson refuses to parse infinite values. return INVALID_NUMBER(src); } } double d; if (!compute_float_64(exponent, i, negative, d)) { // we are almost never going to get here. if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } } WRITE_DOUBLE(d, src, writer); return SUCCESS; } // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING template simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } #else // parse the number at src // define JSON_TEST_NUMBERS for unit testing // // It is assumed that the number is followed by a structural ({,},],[) character // or a white space character. If that is not the case (e.g., when the JSON // document is made of a single number), then it is necessary to copy the // content and append a space before calling this function. // // Our objective is accurate parsing (ULP of 0) at high speed. template simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // bool negative = (*src == '-'); const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } // // Handle floats if there is a . or e (or both) // int64_t exponent = 0; bool is_float = false; if ('.' == *p) { is_float = true; ++p; SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); digit_count = int(p - start_digits); // used later to guard against overflows } if (('e' == *p) || ('E' == *p)) { is_float = true; ++p; SIMDJSON_TRY( parse_exponent(src, p, exponent) ); } if (is_float) { const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); if (dirty_end) { return INVALID_NUMBER(src); } return SUCCESS; } // The longest negative 64-bit number is 19 digits. // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. size_t longest_digit_count = negative ? 19 : 20; if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } if (digit_count == longest_digit_count) { if (negative) { // Anything negative above INT64_MAX+1 is invalid if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } WRITE_INTEGER(~i+1, src, writer); if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } } // Write unsigned if it doesn't fit in a signed integer. if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { WRITE_INTEGER(negative ? (~i+1) : i, src, writer); } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; } // Inlineable functions namespace { // This table can be used to characterize the final character of an integer // string. For JSON structural character and allowable white space characters, // we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise // we return NUMBER_ERROR. // Optimization note: we could easily reduce the size of the table by half (to 128) // at the cost of an extra branch. // Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); const uint8_t integer_string_finisher[256] = { NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR}; // Parse any number from 0 to 18,446,744,073,709,551,615 simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { const uint8_t *p = src; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > 20)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } if (digit_count == 20) { // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } return i; } // Parse any number from 0 to 18,446,744,073,709,551,615 // Never read at src_end or beyond simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { const uint8_t *p = src; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while ((p != src_end) && parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > 20)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } if (digit_count == 20) { // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } return i; } // Parse any number from 0 to 18,446,744,073,709,551,615 simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { const uint8_t *p = src + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > 20)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if (*p != '"') { return NUMBER_ERROR; } if (digit_count == 20) { // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // // Note: we use src[1] and not src[0] because src[0] is the quote character in this // instance. if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } return i; } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. size_t longest_digit_count = 19; // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > longest_digit_count)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } return negative ? (~i+1) : i; } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // Never read at src_end or beyond simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { // // Check for minus sign // if(src == src_end) { return NUMBER_ERROR; } bool negative = (*src == '-'); const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while ((p != src_end) && parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. size_t longest_digit_count = 19; // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > longest_digit_count)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } return negative ? (~i+1) : i; } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); src += uint8_t(negative) + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = src; uint64_t i = 0; while (parse_digit(*src, i)) { src++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(src - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. size_t longest_digit_count = 19; // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > longest_digit_count)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*src)) { // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if(*src != '"') { return NUMBER_ERROR; } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } return negative ? (~i+1) : i; } simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); src += uint8_t(negative); // // Parse the integer part. // uint64_t i = 0; const uint8_t *p = src; p += parse_digit(*p, i); bool leading_zero = (i == 0); while (parse_digit(*p, i)) { p++; } // no integer digits, or 0123 (zero must be solo) if ( p == src ) { return INCORRECT_TYPE; } if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } // // Parse the decimal part. // int64_t exponent = 0; bool overflow; if (simdjson_likely(*p == '.')) { p++; const uint8_t *start_decimal_digits = p; if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits p++; while (parse_digit(*p, i)) { p++; } exponent = -(p - start_decimal_digits); // Overflow check. More than 19 digits (minus the decimal) may be overflow. overflow = p-src-1 > 19; if (simdjson_unlikely(overflow && leading_zero)) { // Skip leading 0.00000 and see if it still overflows const uint8_t *start_digits = src + 2; while (*start_digits == '0') { start_digits++; } overflow = p-start_digits > 19; } } else { overflow = p-src > 19; } // // Parse the exponent // if (*p == 'e' || *p == 'E') { p++; bool exp_neg = *p == '-'; p += exp_neg || *p == '+'; uint64_t exp = 0; const uint8_t *start_exp_digits = p; while (parse_digit(*p, exp)) { p++; } // no exp digits, or 20+ exp digits if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } exponent += exp_neg ? 0-exp : exp; } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; // // Assemble (or slow-parse) the float // double d; if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return (*src == '-'); } simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { bool negative = (*src == '-'); src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } return false; } simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { bool negative = (*src == '-'); src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } if (jsoncharutils::is_structural_or_whitespace(*p)) { // We have an integer. // If the number is negative and valid, it must be a signed integer. if(negative) { return number_type::signed_integer; } // We want values larger or equal to 9223372036854775808 to be unsigned // integers, and the other values to be signed integers. int digit_count = int(p - src); if(digit_count >= 19) { const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { return number_type::unsigned_integer; } } return number_type::signed_integer; } // Hopefully, we have 'e' or 'E' or '.'. return number_type::floating_point_number; } // Never read at src_end or beyond simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { if(src == src_end) { return NUMBER_ERROR; } // // Check for minus sign // bool negative = (*src == '-'); src += uint8_t(negative); // // Parse the integer part. // uint64_t i = 0; const uint8_t *p = src; if(p == src_end) { return NUMBER_ERROR; } p += parse_digit(*p, i); bool leading_zero = (i == 0); while ((p != src_end) && parse_digit(*p, i)) { p++; } // no integer digits, or 0123 (zero must be solo) if ( p == src ) { return INCORRECT_TYPE; } if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } // // Parse the decimal part. // int64_t exponent = 0; bool overflow; if (simdjson_likely((p != src_end) && (*p == '.'))) { p++; const uint8_t *start_decimal_digits = p; if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits p++; while ((p != src_end) && parse_digit(*p, i)) { p++; } exponent = -(p - start_decimal_digits); // Overflow check. More than 19 digits (minus the decimal) may be overflow. overflow = p-src-1 > 19; if (simdjson_unlikely(overflow && leading_zero)) { // Skip leading 0.00000 and see if it still overflows const uint8_t *start_digits = src + 2; while (*start_digits == '0') { start_digits++; } overflow = start_digits-src > 19; } } else { overflow = p-src > 19; } // // Parse the exponent // if ((p != src_end) && (*p == 'e' || *p == 'E')) { p++; if(p == src_end) { return NUMBER_ERROR; } bool exp_neg = *p == '-'; p += exp_neg || *p == '+'; uint64_t exp = 0; const uint8_t *start_exp_digits = p; while ((p != src_end) && parse_digit(*p, exp)) { p++; } // no exp digits, or 20+ exp digits if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } exponent += exp_neg ? 0-exp : exp; } if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; // // Assemble (or slow-parse) the float // double d; if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { return NUMBER_ERROR; } return d; } simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); src += uint8_t(negative) + 1; // // Parse the integer part. // uint64_t i = 0; const uint8_t *p = src; p += parse_digit(*p, i); bool leading_zero = (i == 0); while (parse_digit(*p, i)) { p++; } // no integer digits, or 0123 (zero must be solo) if ( p == src ) { return INCORRECT_TYPE; } if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } // // Parse the decimal part. // int64_t exponent = 0; bool overflow; if (simdjson_likely(*p == '.')) { p++; const uint8_t *start_decimal_digits = p; if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits p++; while (parse_digit(*p, i)) { p++; } exponent = -(p - start_decimal_digits); // Overflow check. More than 19 digits (minus the decimal) may be overflow. overflow = p-src-1 > 19; if (simdjson_unlikely(overflow && leading_zero)) { // Skip leading 0.00000 and see if it still overflows const uint8_t *start_digits = src + 2; while (*start_digits == '0') { start_digits++; } overflow = p-start_digits > 19; } } else { overflow = p-src > 19; } // // Parse the exponent // if (*p == 'e' || *p == 'E') { p++; bool exp_neg = *p == '-'; p += exp_neg || *p == '+'; uint64_t exp = 0; const uint8_t *start_exp_digits = p; while (parse_digit(*p, exp)) { p++; } // no exp digits, or 20+ exp digits if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } exponent += exp_neg ? 0-exp : exp; } if (*p != '"') { return NUMBER_ERROR; } overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; // // Assemble (or slow-parse) the float // double d; if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } } // unnamed namespace #endif // SIMDJSON_SKIPNUMBERPARSING } // namespace numberparsing inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { switch (type) { case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; case number_type::floating_point_number: out << "floating-point number (binary64)"; break; default: SIMDJSON_UNREACHABLE(); } return out; } } // namespace arm64 } // namespace simdjson #endif // SIMDJSON_GENERIC_NUMBERPARSING_H /* end file simdjson/generic/numberparsing.h for arm64 */ /* including simdjson/generic/implementation_simdjson_result_base-inl.h for arm64: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for arm64 */ #ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { // // internal::implementation_simdjson_result_base inline implementation // template simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { error = this->second; if (!error) { value = std::forward>(*this).first; } } template simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { error_code error; std::forward>(*this).tie(value, error); return error; } template simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } #if SIMDJSON_EXCEPTIONS template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return this->first; } template simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { return std::forward>(*this).take_value(); } template simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { if (error()) { throw simdjson_error(error()); } return std::forward(this->first); } template simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { return std::forward>(*this).take_value(); } #endif // SIMDJSON_EXCEPTIONS template simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { return this->first; } template simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { return this->first; } template simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { return std::forward(this->first); } template simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept : first{std::forward(value)}, second{error} {} template simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept : implementation_simdjson_result_base(T{}, error) {} template simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} } // namespace arm64 } // namespace simdjson #endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H /* end file simdjson/generic/implementation_simdjson_result_base-inl.h for arm64 */ /* end file simdjson/generic/amalgamated.h for arm64 */ /* including simdjson/arm64/end.h: #include "simdjson/arm64/end.h" */ /* begin file simdjson/arm64/end.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT /* undefining SIMDJSON_IMPLEMENTATION from "arm64" */ #undef SIMDJSON_IMPLEMENTATION /* end file simdjson/arm64/end.h */ #endif // SIMDJSON_ARM64_H /* end file simdjson/arm64.h */ #elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(fallback) /* including simdjson/fallback.h: #include "simdjson/fallback.h" */ /* begin file simdjson/fallback.h */ #ifndef SIMDJSON_FALLBACK_H #define SIMDJSON_FALLBACK_H /* including simdjson/fallback/begin.h: #include "simdjson/fallback/begin.h" */ /* begin file simdjson/fallback/begin.h */ /* defining SIMDJSON_IMPLEMENTATION to "fallback" */ #define SIMDJSON_IMPLEMENTATION fallback /* including simdjson/fallback/base.h: #include "simdjson/fallback/base.h" */ /* begin file simdjson/fallback/base.h */ #ifndef SIMDJSON_FALLBACK_BASE_H #define SIMDJSON_FALLBACK_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { /** * Fallback implementation (runs on any machine). */ namespace fallback { class implementation; } // namespace fallback } // namespace simdjson #endif // SIMDJSON_FALLBACK_BASE_H /* end file simdjson/fallback/base.h */ /* including simdjson/fallback/bitmanipulation.h: #include "simdjson/fallback/bitmanipulation.h" */ /* begin file simdjson/fallback/bitmanipulation.h */ #ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H #define SIMDJSON_FALLBACK_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace { #if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { unsigned long x0 = (unsigned long)x, top, bottom; _BitScanForward(&top, (unsigned long)(x >> 32)); _BitScanForward(&bottom, x0); *ret = x0 ? bottom : 32 + top; return x != 0; } static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { unsigned long x1 = (unsigned long)(x >> 32), top, bottom; _BitScanReverse(&top, x1); _BitScanReverse(&bottom, (unsigned long)x); *ret = x1 ? top + 32 : bottom; return x != 0; } #endif /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { #ifdef _MSC_VER unsigned long leading_zero = 0; // Search the mask data from most significant bit (MSB) // to least significant bit (LSB) for a set bit (1). if (_BitScanReverse64(&leading_zero, input_num)) return (int)(63 - leading_zero); else return 64; #else return __builtin_clzll(input_num); #endif// _MSC_VER } } // unnamed namespace } // namespace fallback } // namespace simdjson #endif // SIMDJSON_FALLBACK_BITMANIPULATION_H /* end file simdjson/fallback/bitmanipulation.h */ /* including simdjson/fallback/stringparsing_defs.h: #include "simdjson/fallback/stringparsing_defs.h" */ /* begin file simdjson/fallback/stringparsing_defs.h */ #ifndef SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H #define SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace { // Holds backslashes and quotes locations. struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 1; simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return c == '"'; } simdjson_inline bool has_backslash() { return c == '\\'; } simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } uint8_t c; }; // struct backslash_and_quote simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { // store to dest unconditionally - we can overwrite the bits we don't like later dst[0] = src[0]; return { src[0] }; } } // unnamed namespace } // namespace fallback } // namespace simdjson #endif // SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H /* end file simdjson/fallback/stringparsing_defs.h */ /* including simdjson/fallback/numberparsing_defs.h: #include "simdjson/fallback/numberparsing_defs.h" */ /* begin file simdjson/fallback/numberparsing_defs.h */ #ifndef SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H #define SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #ifdef JSON_TEST_NUMBERS // for unit testing void found_invalid_number(const uint8_t *buf); void found_integer(int64_t result, const uint8_t *buf); void found_unsigned_integer(uint64_t result, const uint8_t *buf); void found_float(double result, const uint8_t *buf); #endif namespace simdjson { namespace fallback { namespace numberparsing { // credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ /** @private */ static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) { uint64_t val; memcpy(&val, chars, sizeof(uint64_t)); val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); } /** @private */ static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { return parse_eight_digits_unrolled(reinterpret_cast(chars)); } #if SIMDJSON_IS_32BITS // _umul128 for x86, arm // this is a slow emulation routine for 32-bit // static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; } static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); uint64_t adbc_carry = !!(adbc < ad); uint64_t lo = bd + (adbc << 32); *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + (adbc_carry << 32) + !!(lo < bd); return lo; } #endif /** @private */ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { internal::value128 answer; #if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS #ifdef _M_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); answer.low = value1 * value2; #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 #endif // _M_ARM64 #else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); #endif return answer; } } // namespace numberparsing } // namespace fallback } // namespace simdjson #define SIMDJSON_SWAR_NUMBER_PARSING 1 #endif // SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H /* end file simdjson/fallback/numberparsing_defs.h */ /* end file simdjson/fallback/begin.h */ /* including simdjson/generic/amalgamated.h for fallback: #include "simdjson/generic/amalgamated.h" */ /* begin file simdjson/generic/amalgamated.h for fallback */ #if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) #error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! #endif /* including simdjson/generic/base.h for fallback: #include "simdjson/generic/base.h" */ /* begin file simdjson/generic/base.h for fallback */ #ifndef SIMDJSON_GENERIC_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ /* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ /* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ /* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ /* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ /* amalgamation skipped (editor-only): #else */ /* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ /* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { struct open_container; class dom_parser_implementation; /** * The type of a JSON number */ enum class number_type { floating_point_number=1, /// a binary64 number signed_integer, /// a signed integer that fits in a 64-bit word using two's complement unsigned_integer /// a positive integer larger or equal to 1<<63 }; } // namespace fallback } // namespace simdjson #endif // SIMDJSON_GENERIC_BASE_H /* end file simdjson/generic/base.h for fallback */ /* including simdjson/generic/jsoncharutils.h for fallback: #include "simdjson/generic/jsoncharutils.h" */ /* begin file simdjson/generic/jsoncharutils.h for fallback */ #ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace { namespace jsoncharutils { // return non-zero if not a structural or whitespace char // zero otherwise simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace_negated[c]; } simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace[c]; } // returns a value with the high 16 bits set if not valid // otherwise returns the conversion of the 4 hex digits at src into the bottom // 16 bits of the 32-bit return register // // see // https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ static inline uint32_t hex_to_u32_nocheck( const uint8_t *src) { // strictly speaking, static inline is a C-ism uint32_t v1 = internal::digit_to_val32[630 + src[0]]; uint32_t v2 = internal::digit_to_val32[420 + src[1]]; uint32_t v3 = internal::digit_to_val32[210 + src[2]]; uint32_t v4 = internal::digit_to_val32[0 + src[3]]; return v1 | v2 | v3 | v4; } // given a code point cp, writes to c // the utf-8 code, outputting the length in // bytes, if the length is zero, the code point // is invalid // // This can possibly be made faster using pdep // and clz and table lookups, but JSON documents // have few escaped code points, and the following // function looks cheap. // // Note: we assume that surrogates are treated separately // simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { if (cp <= 0x7F) { c[0] = uint8_t(cp); return 1; // ascii } if (cp <= 0x7FF) { c[0] = uint8_t((cp >> 6) + 192); c[1] = uint8_t((cp & 63) + 128); return 2; // universal plane // Surrogates are treated elsewhere... //} //else if (0xd800 <= cp && cp <= 0xdfff) { // return 0; // surrogates // could put assert here } else if (cp <= 0xFFFF) { c[0] = uint8_t((cp >> 12) + 224); c[1] = uint8_t(((cp >> 6) & 63) + 128); c[2] = uint8_t((cp & 63) + 128); return 3; } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this // is not needed c[0] = uint8_t((cp >> 18) + 240); c[1] = uint8_t(((cp >> 12) & 63) + 128); c[2] = uint8_t(((cp >> 6) & 63) + 128); c[3] = uint8_t((cp & 63) + 128); return 4; } // will return 0 when the code point was too large. return 0; // bad r } #if SIMDJSON_IS_32BITS // _umul128 for x86, arm // this is a slow emulation routine for 32-bit // static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; } static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); uint64_t adbc_carry = !!(adbc < ad); uint64_t lo = bd + (adbc << 32); *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + (adbc_carry << 32) + !!(lo < bd); return lo; } #endif } // namespace jsoncharutils } // unnamed namespace } // namespace fallback } // namespace simdjson #endif // SIMDJSON_GENERIC_JSONCHARUTILS_H /* end file simdjson/generic/jsoncharutils.h for fallback */ /* including simdjson/generic/atomparsing.h for fallback: #include "simdjson/generic/atomparsing.h" */ /* begin file simdjson/generic/atomparsing.h for fallback */ #ifndef SIMDJSON_GENERIC_ATOMPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include namespace simdjson { namespace fallback { namespace { /// @private namespace atomparsing { // The string_to_uint32 is exclusively used to map literal strings to 32-bit values. // We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot // be certain that the character pointer will be properly aligned. // You might think that using memcpy makes this function expensive, but you'd be wrong. // All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); // to the compile-time constant 1936482662. simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } // Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. // Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. simdjson_warn_unused simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); std::memcpy(&srcval, src, sizeof(uint32_t)); return srcval ^ string_to_uint32(atom); } simdjson_warn_unused simdjson_inline bool is_valid_true_atom(const uint8_t *src) { return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_true_atom(src); } else if (len == 4) { return !str4ncmp(src, "true"); } else { return false; } } simdjson_warn_unused simdjson_inline bool is_valid_false_atom(const uint8_t *src) { return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; } simdjson_warn_unused simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { if (len > 5) { return is_valid_false_atom(src); } else if (len == 5) { return !str4ncmp(src+1, "alse"); } else { return false; } } simdjson_warn_unused simdjson_inline bool is_valid_null_atom(const uint8_t *src) { return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_null_atom(src); } else if (len == 4) { return !str4ncmp(src, "null"); } else { return false; } } } // namespace atomparsing } // unnamed namespace } // namespace fallback } // namespace simdjson #endif // SIMDJSON_GENERIC_ATOMPARSING_H /* end file simdjson/generic/atomparsing.h for fallback */ /* including simdjson/generic/dom_parser_implementation.h for fallback: #include "simdjson/generic/dom_parser_implementation.h" */ /* begin file simdjson/generic/dom_parser_implementation.h for fallback */ #ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { // expectation: sizeof(open_container) = 64/8. struct open_container { uint32_t tape_index; // where, on the tape, does the scope ([,{) begins uint32_t count; // how many elements in the scope }; // struct open_container static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); class dom_parser_implementation final : public internal::dom_parser_implementation { public: /** Tape location of each open { or [ */ std::unique_ptr open_containers{}; /** Whether each open container is a [ or { */ std::unique_ptr is_array{}; /** Buffer passed to stage 1 */ const uint8_t *buf{}; /** Length passed to stage 1 */ size_t len{0}; /** Document passed to stage 2 */ dom::document *doc{}; inline dom_parser_implementation() noexcept; inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; dom_parser_implementation(const dom_parser_implementation &) = delete; dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; private: simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); }; } // namespace fallback } // namespace simdjson namespace simdjson { namespace fallback { inline dom_parser_implementation::dom_parser_implementation() noexcept = default; inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; // Leaving these here so they can be inlined if so desired inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } // Stage 1 index output size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); if (!structural_indexes) { _capacity = 0; return MEMALLOC; } structural_indexes[0] = 0; n_structural_indexes = 0; _capacity = capacity; return SUCCESS; } inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { // Stage 2 stacks open_containers.reset(new (std::nothrow) open_container[max_depth]); is_array.reset(new (std::nothrow) bool[max_depth]); if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } _max_depth = max_depth; return SUCCESS; } } // namespace fallback } // namespace simdjson #endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H /* end file simdjson/generic/dom_parser_implementation.h for fallback */ /* including simdjson/generic/implementation_simdjson_result_base.h for fallback: #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* begin file simdjson/generic/implementation_simdjson_result_base.h for fallback */ #ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { // This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair // so we can avoid inlining errors // TODO reconcile these! /** * The result of a simdjson operation that could fail. * * Gives the option of reading error codes, or throwing an exception by casting to the desired result. * * This is a base class for implementations that want to add functions to the result type for * chaining. * * Override like: * * struct simdjson_result : public internal::implementation_simdjson_result_base { * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} * // Your extra methods here * } * * Then any method returning simdjson_result will be chainable with your methods. */ template struct implementation_simdjson_result_base { /** * Create a new empty result with error = UNINITIALIZED. */ simdjson_inline implementation_simdjson_result_base() noexcept = default; /** * Create a new error result. */ simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; /** * Create a new successful result. */ simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; /** * Create a new result with both things (use if you don't want to branch when creating the result). */ simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; /** * Move the value and the error to the provided variables. * * @param value The variable to assign the value to. May not be set if there is an error. * @param error The variable to assign the error to. Set to SUCCESS if there is no error. */ simdjson_inline void tie(T &value, error_code &error) && noexcept; /** * Move the value to the provided variable. * * @param value The variable to assign the value to. May not be set if there is an error. */ simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ simdjson_inline error_code error() const noexcept; #if SIMDJSON_EXCEPTIONS /** * Get the result value. * * @throw simdjson_error if there was an error. */ simdjson_inline T& value() & noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ simdjson_inline T&& value() && noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ simdjson_inline T&& take_value() && noexcept(false); /** * Cast to the value (will throw on error). * * @throw simdjson_error if there was an error. */ simdjson_inline operator T&&() && noexcept(false); #endif // SIMDJSON_EXCEPTIONS /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. */ simdjson_inline const T& value_unsafe() const& noexcept; /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. */ simdjson_inline T& value_unsafe() & noexcept; /** * Take the result value (move it). This function is safe if and only * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ }; // struct implementation_simdjson_result_base } // namespace fallback } // namespace simdjson #endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H /* end file simdjson/generic/implementation_simdjson_result_base.h for fallback */ /* including simdjson/generic/numberparsing.h for fallback: #include "simdjson/generic/numberparsing.h" */ /* begin file simdjson/generic/numberparsing.h for fallback */ #ifndef SIMDJSON_GENERIC_NUMBERPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #include #include namespace simdjson { namespace fallback { namespace numberparsing { #ifdef JSON_TEST_NUMBERS #define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) #define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) #define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) #define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) #else #define INVALID_NUMBER(SRC) (NUMBER_ERROR) #define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) #define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) #define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) #endif namespace { // Convert a mantissa, an exponent and a sign bit into an ieee64 double. // The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). // The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { double d; mantissa &= ~(1ULL << 52); mantissa |= real_exponent << 52; mantissa |= ((static_cast(negative)) << 63); std::memcpy(&d, &mantissa, sizeof(d)); return d; } // Attempts to compute i * 10^(power) exactly; and if "negative" is // true, negate the result. // This function will only work in some cases, when it does not work, success is // set to false. This should work *most of the time* (like 99% of the time). // We assume that power is in the [smallest_power, // largest_power] interval: the caller is responsible for this check. simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { // we start with a fast path // It was described in // Clinger WD. How to read floating point numbers accurately. // ACM SIGPLAN Notices. 1990 #ifndef FLT_EVAL_METHOD #error "FLT_EVAL_METHOD should be defined, please include cfloat." #endif #if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) // We cannot be certain that x/y is rounded to nearest. if (0 <= power && power <= 22 && i <= 9007199254740991) #else if (-22 <= power && power <= 22 && i <= 9007199254740991) #endif { // convert the integer into a double. This is lossless since // 0 <= i <= 2^53 - 1. d = double(i); // // The general idea is as follows. // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then // 1) Both s and p can be represented exactly as 64-bit floating-point // values // (binary64). // 2) Because s and p can be represented exactly as floating-point values, // then s * p // and s / p will produce correctly rounded values. // if (power < 0) { d = d / simdjson::internal::power_of_ten[-power]; } else { d = d * simdjson::internal::power_of_ten[power]; } if (negative) { d = -d; } return true; } // When 22 < power && power < 22 + 16, we could // hope for another, secondary fast path. It was // described by David M. Gay in "Correctly rounded // binary-decimal and decimal-binary conversions." (1990) // If you need to compute i * 10^(22 + x) for x < 16, // first compute i * 10^x, if you know that result is exact // (e.g., when i * 10^x < 2^53), // then you can still proceed and do (i * 10^x) * 10^22. // Is this worth your time? // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) // for this second fast path to work. // If you you have 22 < power *and* power < 22 + 16, and then you // optimistically compute "i * 10^(x-22)", there is still a chance that you // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of // this optimization maybe less common than we would like. Source: // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html // The fast path has now failed, so we are failing back on the slower path. // In the slow path, we need to adjust i so that it is > 1<<63 which is always // possible, except if i == 0, so we handle i == 0 separately. if(i == 0) { d = negative ? -0.0 : 0.0; return true; } // The exponent is 1024 + 63 + power // + floor(log(5**power)/log(2)). // The 1024 comes from the ieee64 standard. // The 63 comes from the fact that we use a 64-bit word. // // Computing floor(log(5**power)/log(2)) could be // slow. Instead we use a fast function. // // For power in (-400,350), we have that // (((152170 + 65536) * power ) >> 16); // is equal to // floor(log(5**power)/log(2)) + power when power >= 0 // and it is equal to // ceil(log(5**-power)/log(2)) + power when power < 0 // // The 65536 is (1<<16) and corresponds to // (65536 * power) >> 16 ---> power // // ((152170 * power ) >> 16) is equal to // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is // approximatively equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; // We want the most significant bit of i to be 1. Shift if needed. int lz = leading_zeroes(i); i <<= lz; // We are going to need to do some 64-bit arithmetic to get a precise product. // We use a table lookup approach. // It is safe because // power >= smallest_power // and power <= largest_power // We recover the mantissa of the power, it has a leading 1. It is always // rounded down. // // We want the most significant 64 bits of the product. We know // this will be non-zero because the most significant bit of i is // 1. const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) // // The full_multiplication function computes the 128-bit product of two 64-bit words // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use // we make of the product. It also makes it easy to reason about the product: there // is 0 or 1 leading zero in the product. // Unless the least significant 9 bits of the high (64-bit) part of the full // product are all 1s, then we know that the most significant 55 bits are // exact and no further work is needed. Having 55 bits is necessary because // we need 53 bits for the mantissa but we have to have one rounding bit and // we can waste a bit if the most significant bit of the product is zero. if((firstproduct.high & 0x1FF) == 0x1FF) { // We want to compute i * 5^q, but only care about the top 55 bits at most. // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing // the full computation is wasteful. So we do what is called a "truncated // multiplication". // We take the most significant 64-bits, and we put them in // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q // to the desired approximation using one multiplication. Sometimes it does not suffice. // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and // then we get a better approximation to i * 5^q. // // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat // more complicated. // // There is an extra layer of complexity in that we need more than 55 bits of // accuracy in the round-to-even scenario. // // The full_multiplication function computes the 128-bit product of two 64-bit words // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); firstproduct.low += secondproduct.high; if(secondproduct.high > firstproduct.low) { firstproduct.high++; } // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product // is sufficiently accurate, and more computation is not needed. } uint64_t lower = firstproduct.low; uint64_t upper = firstproduct.high; // The final mantissa should be 53 bits with a leading 1. // We shift it so that it occupies 54 bits with a leading 1. /////// uint64_t upperbit = upper >> 63; uint64_t mantissa = upper >> (upperbit + 9); lz += int(1 ^ upperbit); // Here we have mantissa < (1<<54). int64_t real_exponent = exponent - lz; if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? // Here have that real_exponent <= 0 so -real_exponent >= 0 if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. d = negative ? -0.0 : 0.0; return true; } // next line is safe because -real_exponent + 1 < 0 mantissa >>= -real_exponent + 1; // Thankfully, we can't have both "round-to-even" and subnormals because // "round-to-even" only occurs for powers close to 0. mantissa += (mantissa & 1); // round up mantissa >>= 1; // There is a weird scenario where we don't have a subnormal but just. // Suppose we start with 2.2250738585072013e-308, we end up // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer // subnormal, but we can only know this after rounding. // So we only declare a subnormal if we are smaller than the threshold. real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; d = to_double(mantissa, real_exponent, negative); return true; } // We have to round to even. The "to even" part // is only a problem when we are right in between two floats // which we guard against. // If we have lots of trailing zeros, we may fall right between two // floating-point values. // // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] // times a power of two. That is, it is right between a number with binary significand // m and another number with binary significand m+1; and it must be the case // that it cannot be represented by a float itself. // // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. // Recall that 10^q = 5^q * 2^q. // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have // 2^{53} x 5^{-q} < 2^{64}. // Hence we have 5^{-q} < 2^{11}$ or q>= -4. // // We require lower <= 1 and not lower == 0 because we could not prove that // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { mantissa &= ~1; // flip it so that we do not round up } } mantissa += mantissa & 1; mantissa >>= 1; // Here we have mantissa < (1<<53), unless there was an overflow if (mantissa >= (1ULL << 53)) { ////////// // This will happen when parsing values such as 7.2057594037927933e+16 //////// mantissa = (1ULL << 52); real_exponent++; } mantissa &= ~(1ULL << 52); // we have to check that real_exponent is in range, otherwise we bail out if (simdjson_unlikely(real_exponent > 2046)) { // We have an infinite value!!! We could actually throw an error here if we could. return false; } d = to_double(mantissa, real_exponent, negative); return true; } // We call a fallback floating-point parser that might be slow. Note // it will accept JSON numbers, but the JSON spec. is more restrictive so // before you call parse_float_fallback, you need to have validated the input // string with the JSON grammar. // It will return an error (false) if the parsed number is infinite. // The string parsing itself always succeeds. We know that there is at least // one digit. static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); // We do not accept infinite values. // Detecting finite values in a portable manner is ridiculously hard, ideally // we would want to do: // return !std::isfinite(*outDouble); // but that mysteriously fails under legacy/old libc++ libraries, see // https://github.com/simdjson/simdjson/issues/1286 // // Therefore, fall back to this solution (the extra parens are there // to handle that max may be a macro on windows). return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); // We do not accept infinite values. // Detecting finite values in a portable manner is ridiculously hard, ideally // we would want to do: // return !std::isfinite(*outDouble); // but that mysteriously fails under legacy/old libc++ libraries, see // https://github.com/simdjson/simdjson/issues/1286 // // Therefore, fall back to this solution (the extra parens are there // to handle that max may be a macro on windows). return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } // check quickly whether the next 8 chars are made of digits // at a glance, it looks better than Mula's // http://0x80.pl/articles/swar-digits-validate.html simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { uint64_t val; // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); std::memcpy(&val, chars, 8); // a branchy method might be faster: // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == // 0x3030303030303030); return (((val & 0xF0F0F0F0F0F0F0F0) | (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == 0x3333333333333333); } template SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later simdjson_inline bool parse_digit(const uint8_t c, I &i) { const uint8_t digit = static_cast(c - '0'); if (digit > 9) { return false; } // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication i = 10 * i + digit; // might overflow, we will handle the overflow later return true; } simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the // the integer into a float in a lossless manner. const uint8_t *const first_after_period = p; #ifdef SIMDJSON_SWAR_NUMBER_PARSING #if SIMDJSON_SWAR_NUMBER_PARSING // this helps if we have lots of decimals! // this turns out to be frequent enough. if (is_made_of_eight_digits_fast(p)) { i = i * 100000000 + parse_eight_digits_unrolled(p); p += 8; } #endif // SIMDJSON_SWAR_NUMBER_PARSING #endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) if (parse_digit(*p, i)) { ++p; } while (parse_digit(*p, i)) { p++; } exponent = first_after_period - p; // Decimal without digits (123.) is illegal if (exponent == 0) { return INVALID_NUMBER(src); } return SUCCESS; } simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well // Exponent: -123.456e-[78] auto start_exp = p; int64_t exp_number = 0; while (parse_digit(*p, exp_number)) { ++p; } // It is possible for parse_digit to overflow. // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. // Thus we *must* check for possible overflow before we negate exp_number. // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may // not oblige and may, in fact, generate two distinct paths in any case. It might be // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off // instructions for a simdjson_likely branch, an unconclusive gain. // If there were no digits, it's an error. if (simdjson_unlikely(p == start_exp)) { return INVALID_NUMBER(src); } // We have a valid positive exponent in exp_number at this point, except that // it may have overflowed. // If there were more than 18 digits, we may have overflowed the integer. We have to do // something!!!! if (simdjson_unlikely(p > start_exp+18)) { // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow while (*start_exp == '0') { start_exp++; } // 19 digits could overflow int64_t and is kind of absurd anyway. We don't // support exponents smaller than -999,999,999,999,999,999 and bigger // than 999,999,999,999,999,999. // We can truncate. // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could // truncate at 324. // Note that there is no reason to fail per se at this point in time. // E.g., 0e999999999999999999999 is a fine number. if (p > start_exp+18) { exp_number = 999999999999999999; } } // At this point, we know that exp_number is a sane, positive, signed integer. // It is <= 999,999,999,999,999,999. As long as 'exponent' is in // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' // is bounded in magnitude by the size of the JSON input, we are fine in this universe. // To sum it up: the next line should never overflow. exponent += (neg_exp ? -exp_number : exp_number); return SUCCESS; } simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const uint8_t *start = start_digits; while ((*start == '0') || (*start == '.')) { ++start; } // we over-decrement by one when there is a '.' return digit_count - size_t(start - start_digits); } } // unnamed namespace /** @private */ template error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { double d; if (parse_float_fallback(src, &d)) { writer.append_double(d); return SUCCESS; } return INVALID_NUMBER(src); } /** @private */ template simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. // // 9999999999999999999 < 2**64 so we can accommodate 19 digits. // If we have a decimal separator, then digit_count - 1 is the number of digits, but we // may not have a decimal separator! if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { // Ok, chances are good that we had an overflow! // this is almost never going to get called!!! // we start anew, going slowly!!! // This will happen in the following examples: // 10000000000000000000000000000000000000000000e+308 // 3.1415926535897932384626433832795028841971693993751 // // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens // because slow_float_parsing is a non-inlined function. If we passed our writer reference to // it, it would force it to be stored in memory, preventing the compiler from picking it apart // and putting into registers. i.e. if we pass it as reference, it gets slow. // This is what forces the skip_double, as well. error_code error = slow_float_parsing(src, writer); writer.skip_double(); return error; } // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 // To future reader: we'd love if someone found a better way, or at least could explain this result! if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { // // Important: smallest_power is such that it leads to a zero value. // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero // so something x 10^-343 goes to zero, but not so with something x 10^-342. static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); // if((exponent < simdjson::internal::smallest_power) || (i == 0)) { // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); return SUCCESS; } else { // (exponent > largest_power) and (i != 0) // We have, for sure, an infinite value and simdjson refuses to parse infinite values. return INVALID_NUMBER(src); } } double d; if (!compute_float_64(exponent, i, negative, d)) { // we are almost never going to get here. if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } } WRITE_DOUBLE(d, src, writer); return SUCCESS; } // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING template simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } #else // parse the number at src // define JSON_TEST_NUMBERS for unit testing // // It is assumed that the number is followed by a structural ({,},],[) character // or a white space character. If that is not the case (e.g., when the JSON // document is made of a single number), then it is necessary to copy the // content and append a space before calling this function. // // Our objective is accurate parsing (ULP of 0) at high speed. template simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // bool negative = (*src == '-'); const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } // // Handle floats if there is a . or e (or both) // int64_t exponent = 0; bool is_float = false; if ('.' == *p) { is_float = true; ++p; SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); digit_count = int(p - start_digits); // used later to guard against overflows } if (('e' == *p) || ('E' == *p)) { is_float = true; ++p; SIMDJSON_TRY( parse_exponent(src, p, exponent) ); } if (is_float) { const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); if (dirty_end) { return INVALID_NUMBER(src); } return SUCCESS; } // The longest negative 64-bit number is 19 digits. // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. size_t longest_digit_count = negative ? 19 : 20; if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } if (digit_count == longest_digit_count) { if (negative) { // Anything negative above INT64_MAX+1 is invalid if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } WRITE_INTEGER(~i+1, src, writer); if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } } // Write unsigned if it doesn't fit in a signed integer. if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { WRITE_INTEGER(negative ? (~i+1) : i, src, writer); } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; } // Inlineable functions namespace { // This table can be used to characterize the final character of an integer // string. For JSON structural character and allowable white space characters, // we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise // we return NUMBER_ERROR. // Optimization note: we could easily reduce the size of the table by half (to 128) // at the cost of an extra branch. // Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); const uint8_t integer_string_finisher[256] = { NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR}; // Parse any number from 0 to 18,446,744,073,709,551,615 simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { const uint8_t *p = src; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > 20)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } if (digit_count == 20) { // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } return i; } // Parse any number from 0 to 18,446,744,073,709,551,615 // Never read at src_end or beyond simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { const uint8_t *p = src; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while ((p != src_end) && parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > 20)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } if (digit_count == 20) { // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } return i; } // Parse any number from 0 to 18,446,744,073,709,551,615 simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { const uint8_t *p = src + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > 20)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if (*p != '"') { return NUMBER_ERROR; } if (digit_count == 20) { // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // // Note: we use src[1] and not src[0] because src[0] is the quote character in this // instance. if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } return i; } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. size_t longest_digit_count = 19; // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > longest_digit_count)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } return negative ? (~i+1) : i; } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // Never read at src_end or beyond simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { // // Check for minus sign // if(src == src_end) { return NUMBER_ERROR; } bool negative = (*src == '-'); const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while ((p != src_end) && parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. size_t longest_digit_count = 19; // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > longest_digit_count)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } return negative ? (~i+1) : i; } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); src += uint8_t(negative) + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = src; uint64_t i = 0; while (parse_digit(*src, i)) { src++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(src - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. size_t longest_digit_count = 19; // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > longest_digit_count)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*src)) { // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if(*src != '"') { return NUMBER_ERROR; } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } return negative ? (~i+1) : i; } simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); src += uint8_t(negative); // // Parse the integer part. // uint64_t i = 0; const uint8_t *p = src; p += parse_digit(*p, i); bool leading_zero = (i == 0); while (parse_digit(*p, i)) { p++; } // no integer digits, or 0123 (zero must be solo) if ( p == src ) { return INCORRECT_TYPE; } if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } // // Parse the decimal part. // int64_t exponent = 0; bool overflow; if (simdjson_likely(*p == '.')) { p++; const uint8_t *start_decimal_digits = p; if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits p++; while (parse_digit(*p, i)) { p++; } exponent = -(p - start_decimal_digits); // Overflow check. More than 19 digits (minus the decimal) may be overflow. overflow = p-src-1 > 19; if (simdjson_unlikely(overflow && leading_zero)) { // Skip leading 0.00000 and see if it still overflows const uint8_t *start_digits = src + 2; while (*start_digits == '0') { start_digits++; } overflow = p-start_digits > 19; } } else { overflow = p-src > 19; } // // Parse the exponent // if (*p == 'e' || *p == 'E') { p++; bool exp_neg = *p == '-'; p += exp_neg || *p == '+'; uint64_t exp = 0; const uint8_t *start_exp_digits = p; while (parse_digit(*p, exp)) { p++; } // no exp digits, or 20+ exp digits if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } exponent += exp_neg ? 0-exp : exp; } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; // // Assemble (or slow-parse) the float // double d; if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return (*src == '-'); } simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { bool negative = (*src == '-'); src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } return false; } simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { bool negative = (*src == '-'); src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } if (jsoncharutils::is_structural_or_whitespace(*p)) { // We have an integer. // If the number is negative and valid, it must be a signed integer. if(negative) { return number_type::signed_integer; } // We want values larger or equal to 9223372036854775808 to be unsigned // integers, and the other values to be signed integers. int digit_count = int(p - src); if(digit_count >= 19) { const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { return number_type::unsigned_integer; } } return number_type::signed_integer; } // Hopefully, we have 'e' or 'E' or '.'. return number_type::floating_point_number; } // Never read at src_end or beyond simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { if(src == src_end) { return NUMBER_ERROR; } // // Check for minus sign // bool negative = (*src == '-'); src += uint8_t(negative); // // Parse the integer part. // uint64_t i = 0; const uint8_t *p = src; if(p == src_end) { return NUMBER_ERROR; } p += parse_digit(*p, i); bool leading_zero = (i == 0); while ((p != src_end) && parse_digit(*p, i)) { p++; } // no integer digits, or 0123 (zero must be solo) if ( p == src ) { return INCORRECT_TYPE; } if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } // // Parse the decimal part. // int64_t exponent = 0; bool overflow; if (simdjson_likely((p != src_end) && (*p == '.'))) { p++; const uint8_t *start_decimal_digits = p; if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits p++; while ((p != src_end) && parse_digit(*p, i)) { p++; } exponent = -(p - start_decimal_digits); // Overflow check. More than 19 digits (minus the decimal) may be overflow. overflow = p-src-1 > 19; if (simdjson_unlikely(overflow && leading_zero)) { // Skip leading 0.00000 and see if it still overflows const uint8_t *start_digits = src + 2; while (*start_digits == '0') { start_digits++; } overflow = start_digits-src > 19; } } else { overflow = p-src > 19; } // // Parse the exponent // if ((p != src_end) && (*p == 'e' || *p == 'E')) { p++; if(p == src_end) { return NUMBER_ERROR; } bool exp_neg = *p == '-'; p += exp_neg || *p == '+'; uint64_t exp = 0; const uint8_t *start_exp_digits = p; while ((p != src_end) && parse_digit(*p, exp)) { p++; } // no exp digits, or 20+ exp digits if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } exponent += exp_neg ? 0-exp : exp; } if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; // // Assemble (or slow-parse) the float // double d; if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { return NUMBER_ERROR; } return d; } simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); src += uint8_t(negative) + 1; // // Parse the integer part. // uint64_t i = 0; const uint8_t *p = src; p += parse_digit(*p, i); bool leading_zero = (i == 0); while (parse_digit(*p, i)) { p++; } // no integer digits, or 0123 (zero must be solo) if ( p == src ) { return INCORRECT_TYPE; } if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } // // Parse the decimal part. // int64_t exponent = 0; bool overflow; if (simdjson_likely(*p == '.')) { p++; const uint8_t *start_decimal_digits = p; if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits p++; while (parse_digit(*p, i)) { p++; } exponent = -(p - start_decimal_digits); // Overflow check. More than 19 digits (minus the decimal) may be overflow. overflow = p-src-1 > 19; if (simdjson_unlikely(overflow && leading_zero)) { // Skip leading 0.00000 and see if it still overflows const uint8_t *start_digits = src + 2; while (*start_digits == '0') { start_digits++; } overflow = p-start_digits > 19; } } else { overflow = p-src > 19; } // // Parse the exponent // if (*p == 'e' || *p == 'E') { p++; bool exp_neg = *p == '-'; p += exp_neg || *p == '+'; uint64_t exp = 0; const uint8_t *start_exp_digits = p; while (parse_digit(*p, exp)) { p++; } // no exp digits, or 20+ exp digits if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } exponent += exp_neg ? 0-exp : exp; } if (*p != '"') { return NUMBER_ERROR; } overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; // // Assemble (or slow-parse) the float // double d; if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } } // unnamed namespace #endif // SIMDJSON_SKIPNUMBERPARSING } // namespace numberparsing inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { switch (type) { case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; case number_type::floating_point_number: out << "floating-point number (binary64)"; break; default: SIMDJSON_UNREACHABLE(); } return out; } } // namespace fallback } // namespace simdjson #endif // SIMDJSON_GENERIC_NUMBERPARSING_H /* end file simdjson/generic/numberparsing.h for fallback */ /* including simdjson/generic/implementation_simdjson_result_base-inl.h for fallback: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for fallback */ #ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { // // internal::implementation_simdjson_result_base inline implementation // template simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { error = this->second; if (!error) { value = std::forward>(*this).first; } } template simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { error_code error; std::forward>(*this).tie(value, error); return error; } template simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } #if SIMDJSON_EXCEPTIONS template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return this->first; } template simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { return std::forward>(*this).take_value(); } template simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { if (error()) { throw simdjson_error(error()); } return std::forward(this->first); } template simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { return std::forward>(*this).take_value(); } #endif // SIMDJSON_EXCEPTIONS template simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { return this->first; } template simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { return this->first; } template simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { return std::forward(this->first); } template simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept : first{std::forward(value)}, second{error} {} template simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept : implementation_simdjson_result_base(T{}, error) {} template simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} } // namespace fallback } // namespace simdjson #endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H /* end file simdjson/generic/implementation_simdjson_result_base-inl.h for fallback */ /* end file simdjson/generic/amalgamated.h for fallback */ /* including simdjson/fallback/end.h: #include "simdjson/fallback/end.h" */ /* begin file simdjson/fallback/end.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ /* undefining SIMDJSON_IMPLEMENTATION from "fallback" */ #undef SIMDJSON_IMPLEMENTATION /* end file simdjson/fallback/end.h */ #endif // SIMDJSON_FALLBACK_H /* end file simdjson/fallback.h */ #elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(haswell) /* including simdjson/haswell.h: #include "simdjson/haswell.h" */ /* begin file simdjson/haswell.h */ #ifndef SIMDJSON_HASWELL_H #define SIMDJSON_HASWELL_H /* including simdjson/haswell/begin.h: #include "simdjson/haswell/begin.h" */ /* begin file simdjson/haswell/begin.h */ /* defining SIMDJSON_IMPLEMENTATION to "haswell" */ #define SIMDJSON_IMPLEMENTATION haswell /* including simdjson/haswell/base.h: #include "simdjson/haswell/base.h" */ /* begin file simdjson/haswell/base.h */ #ifndef SIMDJSON_HASWELL_BASE_H #define SIMDJSON_HASWELL_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ // The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL namespace simdjson { /** * Implementation for Haswell (Intel AVX2). */ namespace haswell { class implementation; namespace { namespace simd { template struct simd8; template struct simd8x64; } // namespace simd } // unnamed namespace } // namespace haswell } // namespace simdjson #endif // SIMDJSON_HASWELL_BASE_H /* end file simdjson/haswell/base.h */ /* including simdjson/haswell/intrinsics.h: #include "simdjson/haswell/intrinsics.h" */ /* begin file simdjson/haswell/intrinsics.h */ #ifndef SIMDJSON_HASWELL_INTRINSICS_H #define SIMDJSON_HASWELL_INTRINSICS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #if SIMDJSON_VISUAL_STUDIO // under clang within visual studio, this will include #include // visual studio or clang #else #include // elsewhere #endif // SIMDJSON_VISUAL_STUDIO #if SIMDJSON_CLANG_VISUAL_STUDIO /** * You are not supposed, normally, to include these * headers directly. Instead you should either include intrin.h * or x86intrin.h. However, when compiling with clang * under Windows (i.e., when _MSC_VER is set), these headers * only get included *if* the corresponding features are detected * from macros: * e.g., if __AVX2__ is set... in turn, we normally set these * macros by compiling against the corresponding architecture * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole * software with these advanced instructions. In simdjson, we * want to compile the whole program for a generic target, * and only target our specific kernels. As a workaround, * we directly include the needed headers. These headers would * normally guard against such usage, but we carefully included * (or ) before, so the headers * are fooled. */ #include // for _blsr_u64 #include // for __lzcnt64 #include // for most things (AVX2, AVX512, _popcnt64) #include #include #include #include #include // for _mm_clmulepi64_si128 // unfortunately, we may not get _blsr_u64, but, thankfully, clang // has it as a macro. #ifndef _blsr_u64 // we roll our own #define _blsr_u64(n) ((n - 1) & n) #endif // _blsr_u64 #endif // SIMDJSON_CLANG_VISUAL_STUDIO static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel."); #endif // SIMDJSON_HASWELL_INTRINSICS_H /* end file simdjson/haswell/intrinsics.h */ #if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt") #endif /* including simdjson/haswell/bitmanipulation.h: #include "simdjson/haswell/bitmanipulation.h" */ /* begin file simdjson/haswell/bitmanipulation.h */ #ifndef SIMDJSON_HASWELL_BITMANIPULATION_H #define SIMDJSON_HASWELL_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmask.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace { // We sometimes call trailing_zero on inputs that are zero, // but the algorithms do not end up using the returned value. // Sadly, sanitizers are not smart enough to figure it out. SIMDJSON_NO_SANITIZE_UNDEFINED // This function can be used safely even if not all bytes have been // initialized. // See issue https://github.com/simdjson/simdjson/issues/1965 SIMDJSON_NO_SANITIZE_MEMORY simdjson_inline int trailing_zeroes(uint64_t input_num) { #if SIMDJSON_REGULAR_VISUAL_STUDIO return (int)_tzcnt_u64(input_num); #else // SIMDJSON_REGULAR_VISUAL_STUDIO //////// // You might expect the next line to be equivalent to // return (int)_tzcnt_u64(input_num); // but the generated code differs and might be less efficient? //////// return __builtin_ctzll(input_num); #endif // SIMDJSON_REGULAR_VISUAL_STUDIO } /* result might be undefined when input_num is zero */ simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return _blsr_u64(input_num); } /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { return int(_lzcnt_u64(input_num)); } #if SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { // note: we do not support legacy 32-bit Windows in this kernel return __popcnt64(input_num);// Visual Studio wants two underscores } #else simdjson_inline long long int count_ones(uint64_t input_num) { return _popcnt64(input_num); } #endif simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { #if SIMDJSON_REGULAR_VISUAL_STUDIO return _addcarry_u64(0, value1, value2, reinterpret_cast(result)); #else return __builtin_uaddll_overflow(value1, value2, reinterpret_cast(result)); #endif } } // unnamed namespace } // namespace haswell } // namespace simdjson #endif // SIMDJSON_HASWELL_BITMANIPULATION_H /* end file simdjson/haswell/bitmanipulation.h */ /* including simdjson/haswell/bitmask.h: #include "simdjson/haswell/bitmask.h" */ /* begin file simdjson/haswell/bitmask.h */ #ifndef SIMDJSON_HASWELL_BITMASK_H #define SIMDJSON_HASWELL_BITMASK_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace { // // Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. // // For example, prefix_xor(00100100) == 00011100 // simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { // There should be no such thing with a processor supporting avx2 // but not clmul. __m128i all_ones = _mm_set1_epi8('\xFF'); __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); return _mm_cvtsi128_si64(result); } } // unnamed namespace } // namespace haswell } // namespace simdjson #endif // SIMDJSON_HASWELL_BITMASK_H /* end file simdjson/haswell/bitmask.h */ /* including simdjson/haswell/numberparsing_defs.h: #include "simdjson/haswell/numberparsing_defs.h" */ /* begin file simdjson/haswell/numberparsing_defs.h */ #ifndef SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H #define SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace numberparsing { /** @private */ static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { // this actually computes *16* values so we are being wasteful. const __m128i ascii0 = _mm_set1_epi8('0'); const __m128i mul_1_10 = _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); const __m128i mul_1_10000 = _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); const __m128i input = _mm_sub_epi8( _mm_loadu_si128(reinterpret_cast(chars)), ascii0); const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); const __m128i t3 = _mm_packus_epi32(t2, t2); const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); return _mm_cvtsi128_si32( t4); // only captures the sum of the first 8 digits, drop the rest } /** @private */ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { internal::value128 answer; #if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS #ifdef _M_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); answer.low = value1 * value2; #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 #endif // _M_ARM64 #else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); #endif return answer; } } // namespace numberparsing } // namespace haswell } // namespace simdjson #define SIMDJSON_SWAR_NUMBER_PARSING 1 #endif // SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H /* end file simdjson/haswell/numberparsing_defs.h */ /* including simdjson/haswell/simd.h: #include "simdjson/haswell/simd.h" */ /* begin file simdjson/haswell/simd.h */ #ifndef SIMDJSON_HASWELL_SIMD_H #define SIMDJSON_HASWELL_SIMD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace { namespace simd { // Forward-declared so they can be used by splat and friends. template struct base { __m256i value; // Zero constructor simdjson_inline base() : value{__m256i()} {} // Conversion from SIMD register simdjson_inline base(const __m256i _value) : value(_value) {} // Conversion to SIMD register simdjson_inline operator const __m256i&() const { return this->value; } simdjson_inline operator __m256i&() { return this->value; } // Bit operations simdjson_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } simdjson_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } simdjson_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } simdjson_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } }; // Forward-declared so they can be used by splat and friends. template struct simd8; template> struct base8: base> { typedef uint32_t bitmask_t; typedef uint64_t bitmask2_t; simdjson_inline base8() : base>() {} simdjson_inline base8(const __m256i _value) : base>(_value) {} friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } static const int SIZE = sizeof(base::value); template simdjson_inline simd8 prev(const simd8 prev_chunk) const { return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); } }; // SIMD byte mask type (returned by things like eq and gt) template<> struct simd8: base8 { static simdjson_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } simdjson_inline simd8() : base8() {} simdjson_inline simd8(const __m256i _value) : base8(_value) {} // Splat constructor simdjson_inline simd8(bool _value) : base8(splat(_value)) {} simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); } simdjson_inline simd8 operator~() const { return *this ^ true; } }; template struct base8_numeric: base8 { static simdjson_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } static simdjson_inline simd8 zero() { return _mm256_setzero_si256(); } static simdjson_inline simd8 load(const T values[32]) { return _mm256_loadu_si256(reinterpret_cast(values)); } // Repeat 16 values as many times as necessary (usually for lookup tables) static simdjson_inline simd8 repeat_16( T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } simdjson_inline base8_numeric() : base8() {} simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} // Store to array simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } // Addition/subtraction are the same for signed and unsigned simdjson_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } simdjson_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } // Override to distinguish from bool version simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) template simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return _mm256_shuffle_epi8(lookup_table, *this); } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes // get written. // Design consideration: it seems like a function with the // signature simd8 compress(uint32_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template simdjson_inline void compress(uint32_t mask, L * output) const { using internal::thintable_epi8; using internal::BitsSetTable256mul2; using internal::pshufb_combine_table; // this particular implementation was inspired by work done by @animetosho // we do it in four steps, first 8 bytes and then second 8 bytes... uint8_t mask1 = uint8_t(mask); // least significant 8 bits uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits uint8_t mask3 = uint8_t(mask >> 16); // ... uint8_t mask4 = uint8_t(mask >> 24); // ... // next line just loads the 64-bit values thintable_epi8[mask1] and // thintable_epi8[mask2] into a 128-bit register, using only // two instructions on most compilers. __m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3], thintable_epi8[mask2], thintable_epi8[mask1]); // we increment by 0x08 the second half of the mask and so forth shufmask = _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818, 0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0)); // this is the version "nearly pruned" __m256i pruned = _mm256_shuffle_epi8(*this, shufmask); // we still need to put the pieces back together. // we compute the popcount of the first words: int pop1 = BitsSetTable256mul2[mask1]; int pop3 = BitsSetTable256mul2[mask3]; // then load the corresponding mask // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic. __m256i v256 = _mm256_castsi128_si256( _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8))); __m256i compactmask = _mm256_insertf128_si256(v256, _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop3 * 8)), 1); __m256i almostthere = _mm256_shuffle_epi8(pruned, compactmask); // We just need to write out the result. // This is the tricky bit that is hard to do // if we want to return a SIMD register, since there // is no single-instruction approach to recombine // the two 128-bit lanes with an offset. __m128i v128; v128 = _mm256_castsi256_si128(almostthere); _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128); v128 = _mm256_extractf128_si256(almostthere, 1); _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128); } template simdjson_inline simd8 lookup_16( L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, L replace12, L replace13, L replace14, L replace15) const { return lookup_16(simd8::repeat_16( replace0, replace1, replace2, replace3, replace4, replace5, replace6, replace7, replace8, replace9, replace10, replace11, replace12, replace13, replace14, replace15 )); } }; // Signed bytes template<> struct simd8 : base8_numeric { simdjson_inline simd8() : base8_numeric() {} simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 ) : simd8(_mm256_setr_epi8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v16,v17,v18,v19,v20,v21,v22,v23, v24,v25,v26,v27,v28,v29,v30,v31 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Order-sensitive comparisons simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } simdjson_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } simdjson_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } }; // Unsigned bytes template<> struct simd8: base8_numeric { simdjson_inline simd8() : base8_numeric() {} simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Array constructor simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 ) : simd8(_mm256_setr_epi8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v16,v17,v18,v19,v20,v21,v22,v23, v24,v25,v26,v27,v28,v29,v30,v31 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Saturated math simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } // Order-specific operations simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } // Same as >, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } // Same as <, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } // Bit-specific operations simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } template simdjson_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } template simdjson_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } // Get one of the bits and make a bitmask out of it. // e.g. value.get_bit<7>() gets the high bit template simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } }; template struct simd8x64 { static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block."); const simd8 chunks[NUM_CHUNKS]; simd8x64(const simd8x64& o) = delete; // no copy allowed simd8x64& operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} simdjson_inline uint64_t compress(uint64_t mask, T * output) const { uint32_t mask1 = uint32_t(mask); uint32_t mask2 = uint32_t(mask >> 32); this->chunks[0].compress(mask1, output); this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); return 64 - count_ones(mask); } simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr+sizeof(simd8)*0); this->chunks[1].store(ptr+sizeof(simd8)*1); } simdjson_inline uint64_t to_bitmask() const { uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); uint64_t r_hi = this->chunks[1].to_bitmask(); return r_lo | (r_hi << 32); } simdjson_inline simd8 reduce_or() const { return this->chunks[0] | this->chunks[1]; } simdjson_inline simd8x64 bit_or(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] | mask, this->chunks[1] | mask ); } simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] == mask, this->chunks[1] == mask ).to_bitmask(); } simdjson_inline uint64_t eq(const simd8x64 &other) const { return simd8x64( this->chunks[0] == other.chunks[0], this->chunks[1] == other.chunks[1] ).to_bitmask(); } simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] <= mask, this->chunks[1] <= mask ).to_bitmask(); } }; // struct simd8x64 } // namespace simd } // unnamed namespace } // namespace haswell } // namespace simdjson #endif // SIMDJSON_HASWELL_SIMD_H /* end file simdjson/haswell/simd.h */ /* including simdjson/haswell/stringparsing_defs.h: #include "simdjson/haswell/stringparsing_defs.h" */ /* begin file simdjson/haswell/stringparsing_defs.h */ #ifndef SIMDJSON_HASWELL_STRINGPARSING_DEFS_H #define SIMDJSON_HASWELL_STRINGPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/simd.h" */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace { using namespace simd; // Holds backslashes and quotes locations. struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } uint32_t bs_bits; uint32_t quote_bits; }; // struct backslash_and_quote simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { // this can read up to 15 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); simd8 v(src); // store to dest unconditionally - we can overwrite the bits we don't like later v.store(dst); return { static_cast((v == '\\').to_bitmask()), // bs_bits static_cast((v == '"').to_bitmask()), // quote_bits }; } } // unnamed namespace } // namespace haswell } // namespace simdjson #endif // SIMDJSON_HASWELL_STRINGPARSING_DEFS_H /* end file simdjson/haswell/stringparsing_defs.h */ /* end file simdjson/haswell/begin.h */ /* including simdjson/generic/amalgamated.h for haswell: #include "simdjson/generic/amalgamated.h" */ /* begin file simdjson/generic/amalgamated.h for haswell */ #if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) #error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! #endif /* including simdjson/generic/base.h for haswell: #include "simdjson/generic/base.h" */ /* begin file simdjson/generic/base.h for haswell */ #ifndef SIMDJSON_GENERIC_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ /* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ /* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ /* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ /* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ /* amalgamation skipped (editor-only): #else */ /* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ /* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { struct open_container; class dom_parser_implementation; /** * The type of a JSON number */ enum class number_type { floating_point_number=1, /// a binary64 number signed_integer, /// a signed integer that fits in a 64-bit word using two's complement unsigned_integer /// a positive integer larger or equal to 1<<63 }; } // namespace haswell } // namespace simdjson #endif // SIMDJSON_GENERIC_BASE_H /* end file simdjson/generic/base.h for haswell */ /* including simdjson/generic/jsoncharutils.h for haswell: #include "simdjson/generic/jsoncharutils.h" */ /* begin file simdjson/generic/jsoncharutils.h for haswell */ #ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace { namespace jsoncharutils { // return non-zero if not a structural or whitespace char // zero otherwise simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace_negated[c]; } simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace[c]; } // returns a value with the high 16 bits set if not valid // otherwise returns the conversion of the 4 hex digits at src into the bottom // 16 bits of the 32-bit return register // // see // https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ static inline uint32_t hex_to_u32_nocheck( const uint8_t *src) { // strictly speaking, static inline is a C-ism uint32_t v1 = internal::digit_to_val32[630 + src[0]]; uint32_t v2 = internal::digit_to_val32[420 + src[1]]; uint32_t v3 = internal::digit_to_val32[210 + src[2]]; uint32_t v4 = internal::digit_to_val32[0 + src[3]]; return v1 | v2 | v3 | v4; } // given a code point cp, writes to c // the utf-8 code, outputting the length in // bytes, if the length is zero, the code point // is invalid // // This can possibly be made faster using pdep // and clz and table lookups, but JSON documents // have few escaped code points, and the following // function looks cheap. // // Note: we assume that surrogates are treated separately // simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { if (cp <= 0x7F) { c[0] = uint8_t(cp); return 1; // ascii } if (cp <= 0x7FF) { c[0] = uint8_t((cp >> 6) + 192); c[1] = uint8_t((cp & 63) + 128); return 2; // universal plane // Surrogates are treated elsewhere... //} //else if (0xd800 <= cp && cp <= 0xdfff) { // return 0; // surrogates // could put assert here } else if (cp <= 0xFFFF) { c[0] = uint8_t((cp >> 12) + 224); c[1] = uint8_t(((cp >> 6) & 63) + 128); c[2] = uint8_t((cp & 63) + 128); return 3; } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this // is not needed c[0] = uint8_t((cp >> 18) + 240); c[1] = uint8_t(((cp >> 12) & 63) + 128); c[2] = uint8_t(((cp >> 6) & 63) + 128); c[3] = uint8_t((cp & 63) + 128); return 4; } // will return 0 when the code point was too large. return 0; // bad r } #if SIMDJSON_IS_32BITS // _umul128 for x86, arm // this is a slow emulation routine for 32-bit // static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; } static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); uint64_t adbc_carry = !!(adbc < ad); uint64_t lo = bd + (adbc << 32); *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + (adbc_carry << 32) + !!(lo < bd); return lo; } #endif } // namespace jsoncharutils } // unnamed namespace } // namespace haswell } // namespace simdjson #endif // SIMDJSON_GENERIC_JSONCHARUTILS_H /* end file simdjson/generic/jsoncharutils.h for haswell */ /* including simdjson/generic/atomparsing.h for haswell: #include "simdjson/generic/atomparsing.h" */ /* begin file simdjson/generic/atomparsing.h for haswell */ #ifndef SIMDJSON_GENERIC_ATOMPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include namespace simdjson { namespace haswell { namespace { /// @private namespace atomparsing { // The string_to_uint32 is exclusively used to map literal strings to 32-bit values. // We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot // be certain that the character pointer will be properly aligned. // You might think that using memcpy makes this function expensive, but you'd be wrong. // All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); // to the compile-time constant 1936482662. simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } // Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. // Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. simdjson_warn_unused simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); std::memcpy(&srcval, src, sizeof(uint32_t)); return srcval ^ string_to_uint32(atom); } simdjson_warn_unused simdjson_inline bool is_valid_true_atom(const uint8_t *src) { return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_true_atom(src); } else if (len == 4) { return !str4ncmp(src, "true"); } else { return false; } } simdjson_warn_unused simdjson_inline bool is_valid_false_atom(const uint8_t *src) { return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; } simdjson_warn_unused simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { if (len > 5) { return is_valid_false_atom(src); } else if (len == 5) { return !str4ncmp(src+1, "alse"); } else { return false; } } simdjson_warn_unused simdjson_inline bool is_valid_null_atom(const uint8_t *src) { return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_null_atom(src); } else if (len == 4) { return !str4ncmp(src, "null"); } else { return false; } } } // namespace atomparsing } // unnamed namespace } // namespace haswell } // namespace simdjson #endif // SIMDJSON_GENERIC_ATOMPARSING_H /* end file simdjson/generic/atomparsing.h for haswell */ /* including simdjson/generic/dom_parser_implementation.h for haswell: #include "simdjson/generic/dom_parser_implementation.h" */ /* begin file simdjson/generic/dom_parser_implementation.h for haswell */ #ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { // expectation: sizeof(open_container) = 64/8. struct open_container { uint32_t tape_index; // where, on the tape, does the scope ([,{) begins uint32_t count; // how many elements in the scope }; // struct open_container static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); class dom_parser_implementation final : public internal::dom_parser_implementation { public: /** Tape location of each open { or [ */ std::unique_ptr open_containers{}; /** Whether each open container is a [ or { */ std::unique_ptr is_array{}; /** Buffer passed to stage 1 */ const uint8_t *buf{}; /** Length passed to stage 1 */ size_t len{0}; /** Document passed to stage 2 */ dom::document *doc{}; inline dom_parser_implementation() noexcept; inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; dom_parser_implementation(const dom_parser_implementation &) = delete; dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; private: simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); }; } // namespace haswell } // namespace simdjson namespace simdjson { namespace haswell { inline dom_parser_implementation::dom_parser_implementation() noexcept = default; inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; // Leaving these here so they can be inlined if so desired inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } // Stage 1 index output size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); if (!structural_indexes) { _capacity = 0; return MEMALLOC; } structural_indexes[0] = 0; n_structural_indexes = 0; _capacity = capacity; return SUCCESS; } inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { // Stage 2 stacks open_containers.reset(new (std::nothrow) open_container[max_depth]); is_array.reset(new (std::nothrow) bool[max_depth]); if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } _max_depth = max_depth; return SUCCESS; } } // namespace haswell } // namespace simdjson #endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H /* end file simdjson/generic/dom_parser_implementation.h for haswell */ /* including simdjson/generic/implementation_simdjson_result_base.h for haswell: #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* begin file simdjson/generic/implementation_simdjson_result_base.h for haswell */ #ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { // This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair // so we can avoid inlining errors // TODO reconcile these! /** * The result of a simdjson operation that could fail. * * Gives the option of reading error codes, or throwing an exception by casting to the desired result. * * This is a base class for implementations that want to add functions to the result type for * chaining. * * Override like: * * struct simdjson_result : public internal::implementation_simdjson_result_base { * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} * // Your extra methods here * } * * Then any method returning simdjson_result will be chainable with your methods. */ template struct implementation_simdjson_result_base { /** * Create a new empty result with error = UNINITIALIZED. */ simdjson_inline implementation_simdjson_result_base() noexcept = default; /** * Create a new error result. */ simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; /** * Create a new successful result. */ simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; /** * Create a new result with both things (use if you don't want to branch when creating the result). */ simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; /** * Move the value and the error to the provided variables. * * @param value The variable to assign the value to. May not be set if there is an error. * @param error The variable to assign the error to. Set to SUCCESS if there is no error. */ simdjson_inline void tie(T &value, error_code &error) && noexcept; /** * Move the value to the provided variable. * * @param value The variable to assign the value to. May not be set if there is an error. */ simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ simdjson_inline error_code error() const noexcept; #if SIMDJSON_EXCEPTIONS /** * Get the result value. * * @throw simdjson_error if there was an error. */ simdjson_inline T& value() & noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ simdjson_inline T&& value() && noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ simdjson_inline T&& take_value() && noexcept(false); /** * Cast to the value (will throw on error). * * @throw simdjson_error if there was an error. */ simdjson_inline operator T&&() && noexcept(false); #endif // SIMDJSON_EXCEPTIONS /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. */ simdjson_inline const T& value_unsafe() const& noexcept; /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. */ simdjson_inline T& value_unsafe() & noexcept; /** * Take the result value (move it). This function is safe if and only * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ }; // struct implementation_simdjson_result_base } // namespace haswell } // namespace simdjson #endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H /* end file simdjson/generic/implementation_simdjson_result_base.h for haswell */ /* including simdjson/generic/numberparsing.h for haswell: #include "simdjson/generic/numberparsing.h" */ /* begin file simdjson/generic/numberparsing.h for haswell */ #ifndef SIMDJSON_GENERIC_NUMBERPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #include #include namespace simdjson { namespace haswell { namespace numberparsing { #ifdef JSON_TEST_NUMBERS #define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) #define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) #define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) #define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) #else #define INVALID_NUMBER(SRC) (NUMBER_ERROR) #define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) #define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) #define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) #endif namespace { // Convert a mantissa, an exponent and a sign bit into an ieee64 double. // The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). // The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { double d; mantissa &= ~(1ULL << 52); mantissa |= real_exponent << 52; mantissa |= ((static_cast(negative)) << 63); std::memcpy(&d, &mantissa, sizeof(d)); return d; } // Attempts to compute i * 10^(power) exactly; and if "negative" is // true, negate the result. // This function will only work in some cases, when it does not work, success is // set to false. This should work *most of the time* (like 99% of the time). // We assume that power is in the [smallest_power, // largest_power] interval: the caller is responsible for this check. simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { // we start with a fast path // It was described in // Clinger WD. How to read floating point numbers accurately. // ACM SIGPLAN Notices. 1990 #ifndef FLT_EVAL_METHOD #error "FLT_EVAL_METHOD should be defined, please include cfloat." #endif #if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) // We cannot be certain that x/y is rounded to nearest. if (0 <= power && power <= 22 && i <= 9007199254740991) #else if (-22 <= power && power <= 22 && i <= 9007199254740991) #endif { // convert the integer into a double. This is lossless since // 0 <= i <= 2^53 - 1. d = double(i); // // The general idea is as follows. // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then // 1) Both s and p can be represented exactly as 64-bit floating-point // values // (binary64). // 2) Because s and p can be represented exactly as floating-point values, // then s * p // and s / p will produce correctly rounded values. // if (power < 0) { d = d / simdjson::internal::power_of_ten[-power]; } else { d = d * simdjson::internal::power_of_ten[power]; } if (negative) { d = -d; } return true; } // When 22 < power && power < 22 + 16, we could // hope for another, secondary fast path. It was // described by David M. Gay in "Correctly rounded // binary-decimal and decimal-binary conversions." (1990) // If you need to compute i * 10^(22 + x) for x < 16, // first compute i * 10^x, if you know that result is exact // (e.g., when i * 10^x < 2^53), // then you can still proceed and do (i * 10^x) * 10^22. // Is this worth your time? // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) // for this second fast path to work. // If you you have 22 < power *and* power < 22 + 16, and then you // optimistically compute "i * 10^(x-22)", there is still a chance that you // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of // this optimization maybe less common than we would like. Source: // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html // The fast path has now failed, so we are failing back on the slower path. // In the slow path, we need to adjust i so that it is > 1<<63 which is always // possible, except if i == 0, so we handle i == 0 separately. if(i == 0) { d = negative ? -0.0 : 0.0; return true; } // The exponent is 1024 + 63 + power // + floor(log(5**power)/log(2)). // The 1024 comes from the ieee64 standard. // The 63 comes from the fact that we use a 64-bit word. // // Computing floor(log(5**power)/log(2)) could be // slow. Instead we use a fast function. // // For power in (-400,350), we have that // (((152170 + 65536) * power ) >> 16); // is equal to // floor(log(5**power)/log(2)) + power when power >= 0 // and it is equal to // ceil(log(5**-power)/log(2)) + power when power < 0 // // The 65536 is (1<<16) and corresponds to // (65536 * power) >> 16 ---> power // // ((152170 * power ) >> 16) is equal to // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is // approximatively equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; // We want the most significant bit of i to be 1. Shift if needed. int lz = leading_zeroes(i); i <<= lz; // We are going to need to do some 64-bit arithmetic to get a precise product. // We use a table lookup approach. // It is safe because // power >= smallest_power // and power <= largest_power // We recover the mantissa of the power, it has a leading 1. It is always // rounded down. // // We want the most significant 64 bits of the product. We know // this will be non-zero because the most significant bit of i is // 1. const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) // // The full_multiplication function computes the 128-bit product of two 64-bit words // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use // we make of the product. It also makes it easy to reason about the product: there // is 0 or 1 leading zero in the product. // Unless the least significant 9 bits of the high (64-bit) part of the full // product are all 1s, then we know that the most significant 55 bits are // exact and no further work is needed. Having 55 bits is necessary because // we need 53 bits for the mantissa but we have to have one rounding bit and // we can waste a bit if the most significant bit of the product is zero. if((firstproduct.high & 0x1FF) == 0x1FF) { // We want to compute i * 5^q, but only care about the top 55 bits at most. // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing // the full computation is wasteful. So we do what is called a "truncated // multiplication". // We take the most significant 64-bits, and we put them in // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q // to the desired approximation using one multiplication. Sometimes it does not suffice. // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and // then we get a better approximation to i * 5^q. // // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat // more complicated. // // There is an extra layer of complexity in that we need more than 55 bits of // accuracy in the round-to-even scenario. // // The full_multiplication function computes the 128-bit product of two 64-bit words // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); firstproduct.low += secondproduct.high; if(secondproduct.high > firstproduct.low) { firstproduct.high++; } // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product // is sufficiently accurate, and more computation is not needed. } uint64_t lower = firstproduct.low; uint64_t upper = firstproduct.high; // The final mantissa should be 53 bits with a leading 1. // We shift it so that it occupies 54 bits with a leading 1. /////// uint64_t upperbit = upper >> 63; uint64_t mantissa = upper >> (upperbit + 9); lz += int(1 ^ upperbit); // Here we have mantissa < (1<<54). int64_t real_exponent = exponent - lz; if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? // Here have that real_exponent <= 0 so -real_exponent >= 0 if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. d = negative ? -0.0 : 0.0; return true; } // next line is safe because -real_exponent + 1 < 0 mantissa >>= -real_exponent + 1; // Thankfully, we can't have both "round-to-even" and subnormals because // "round-to-even" only occurs for powers close to 0. mantissa += (mantissa & 1); // round up mantissa >>= 1; // There is a weird scenario where we don't have a subnormal but just. // Suppose we start with 2.2250738585072013e-308, we end up // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer // subnormal, but we can only know this after rounding. // So we only declare a subnormal if we are smaller than the threshold. real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; d = to_double(mantissa, real_exponent, negative); return true; } // We have to round to even. The "to even" part // is only a problem when we are right in between two floats // which we guard against. // If we have lots of trailing zeros, we may fall right between two // floating-point values. // // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] // times a power of two. That is, it is right between a number with binary significand // m and another number with binary significand m+1; and it must be the case // that it cannot be represented by a float itself. // // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. // Recall that 10^q = 5^q * 2^q. // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have // 2^{53} x 5^{-q} < 2^{64}. // Hence we have 5^{-q} < 2^{11}$ or q>= -4. // // We require lower <= 1 and not lower == 0 because we could not prove that // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { mantissa &= ~1; // flip it so that we do not round up } } mantissa += mantissa & 1; mantissa >>= 1; // Here we have mantissa < (1<<53), unless there was an overflow if (mantissa >= (1ULL << 53)) { ////////// // This will happen when parsing values such as 7.2057594037927933e+16 //////// mantissa = (1ULL << 52); real_exponent++; } mantissa &= ~(1ULL << 52); // we have to check that real_exponent is in range, otherwise we bail out if (simdjson_unlikely(real_exponent > 2046)) { // We have an infinite value!!! We could actually throw an error here if we could. return false; } d = to_double(mantissa, real_exponent, negative); return true; } // We call a fallback floating-point parser that might be slow. Note // it will accept JSON numbers, but the JSON spec. is more restrictive so // before you call parse_float_fallback, you need to have validated the input // string with the JSON grammar. // It will return an error (false) if the parsed number is infinite. // The string parsing itself always succeeds. We know that there is at least // one digit. static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); // We do not accept infinite values. // Detecting finite values in a portable manner is ridiculously hard, ideally // we would want to do: // return !std::isfinite(*outDouble); // but that mysteriously fails under legacy/old libc++ libraries, see // https://github.com/simdjson/simdjson/issues/1286 // // Therefore, fall back to this solution (the extra parens are there // to handle that max may be a macro on windows). return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); // We do not accept infinite values. // Detecting finite values in a portable manner is ridiculously hard, ideally // we would want to do: // return !std::isfinite(*outDouble); // but that mysteriously fails under legacy/old libc++ libraries, see // https://github.com/simdjson/simdjson/issues/1286 // // Therefore, fall back to this solution (the extra parens are there // to handle that max may be a macro on windows). return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } // check quickly whether the next 8 chars are made of digits // at a glance, it looks better than Mula's // http://0x80.pl/articles/swar-digits-validate.html simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { uint64_t val; // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); std::memcpy(&val, chars, 8); // a branchy method might be faster: // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == // 0x3030303030303030); return (((val & 0xF0F0F0F0F0F0F0F0) | (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == 0x3333333333333333); } template SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later simdjson_inline bool parse_digit(const uint8_t c, I &i) { const uint8_t digit = static_cast(c - '0'); if (digit > 9) { return false; } // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication i = 10 * i + digit; // might overflow, we will handle the overflow later return true; } simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the // the integer into a float in a lossless manner. const uint8_t *const first_after_period = p; #ifdef SIMDJSON_SWAR_NUMBER_PARSING #if SIMDJSON_SWAR_NUMBER_PARSING // this helps if we have lots of decimals! // this turns out to be frequent enough. if (is_made_of_eight_digits_fast(p)) { i = i * 100000000 + parse_eight_digits_unrolled(p); p += 8; } #endif // SIMDJSON_SWAR_NUMBER_PARSING #endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) if (parse_digit(*p, i)) { ++p; } while (parse_digit(*p, i)) { p++; } exponent = first_after_period - p; // Decimal without digits (123.) is illegal if (exponent == 0) { return INVALID_NUMBER(src); } return SUCCESS; } simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well // Exponent: -123.456e-[78] auto start_exp = p; int64_t exp_number = 0; while (parse_digit(*p, exp_number)) { ++p; } // It is possible for parse_digit to overflow. // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. // Thus we *must* check for possible overflow before we negate exp_number. // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may // not oblige and may, in fact, generate two distinct paths in any case. It might be // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off // instructions for a simdjson_likely branch, an unconclusive gain. // If there were no digits, it's an error. if (simdjson_unlikely(p == start_exp)) { return INVALID_NUMBER(src); } // We have a valid positive exponent in exp_number at this point, except that // it may have overflowed. // If there were more than 18 digits, we may have overflowed the integer. We have to do // something!!!! if (simdjson_unlikely(p > start_exp+18)) { // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow while (*start_exp == '0') { start_exp++; } // 19 digits could overflow int64_t and is kind of absurd anyway. We don't // support exponents smaller than -999,999,999,999,999,999 and bigger // than 999,999,999,999,999,999. // We can truncate. // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could // truncate at 324. // Note that there is no reason to fail per se at this point in time. // E.g., 0e999999999999999999999 is a fine number. if (p > start_exp+18) { exp_number = 999999999999999999; } } // At this point, we know that exp_number is a sane, positive, signed integer. // It is <= 999,999,999,999,999,999. As long as 'exponent' is in // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' // is bounded in magnitude by the size of the JSON input, we are fine in this universe. // To sum it up: the next line should never overflow. exponent += (neg_exp ? -exp_number : exp_number); return SUCCESS; } simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const uint8_t *start = start_digits; while ((*start == '0') || (*start == '.')) { ++start; } // we over-decrement by one when there is a '.' return digit_count - size_t(start - start_digits); } } // unnamed namespace /** @private */ template error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { double d; if (parse_float_fallback(src, &d)) { writer.append_double(d); return SUCCESS; } return INVALID_NUMBER(src); } /** @private */ template simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. // // 9999999999999999999 < 2**64 so we can accommodate 19 digits. // If we have a decimal separator, then digit_count - 1 is the number of digits, but we // may not have a decimal separator! if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { // Ok, chances are good that we had an overflow! // this is almost never going to get called!!! // we start anew, going slowly!!! // This will happen in the following examples: // 10000000000000000000000000000000000000000000e+308 // 3.1415926535897932384626433832795028841971693993751 // // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens // because slow_float_parsing is a non-inlined function. If we passed our writer reference to // it, it would force it to be stored in memory, preventing the compiler from picking it apart // and putting into registers. i.e. if we pass it as reference, it gets slow. // This is what forces the skip_double, as well. error_code error = slow_float_parsing(src, writer); writer.skip_double(); return error; } // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 // To future reader: we'd love if someone found a better way, or at least could explain this result! if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { // // Important: smallest_power is such that it leads to a zero value. // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero // so something x 10^-343 goes to zero, but not so with something x 10^-342. static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); // if((exponent < simdjson::internal::smallest_power) || (i == 0)) { // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); return SUCCESS; } else { // (exponent > largest_power) and (i != 0) // We have, for sure, an infinite value and simdjson refuses to parse infinite values. return INVALID_NUMBER(src); } } double d; if (!compute_float_64(exponent, i, negative, d)) { // we are almost never going to get here. if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } } WRITE_DOUBLE(d, src, writer); return SUCCESS; } // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING template simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } #else // parse the number at src // define JSON_TEST_NUMBERS for unit testing // // It is assumed that the number is followed by a structural ({,},],[) character // or a white space character. If that is not the case (e.g., when the JSON // document is made of a single number), then it is necessary to copy the // content and append a space before calling this function. // // Our objective is accurate parsing (ULP of 0) at high speed. template simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // bool negative = (*src == '-'); const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } // // Handle floats if there is a . or e (or both) // int64_t exponent = 0; bool is_float = false; if ('.' == *p) { is_float = true; ++p; SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); digit_count = int(p - start_digits); // used later to guard against overflows } if (('e' == *p) || ('E' == *p)) { is_float = true; ++p; SIMDJSON_TRY( parse_exponent(src, p, exponent) ); } if (is_float) { const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); if (dirty_end) { return INVALID_NUMBER(src); } return SUCCESS; } // The longest negative 64-bit number is 19 digits. // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. size_t longest_digit_count = negative ? 19 : 20; if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } if (digit_count == longest_digit_count) { if (negative) { // Anything negative above INT64_MAX+1 is invalid if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } WRITE_INTEGER(~i+1, src, writer); if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } } // Write unsigned if it doesn't fit in a signed integer. if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { WRITE_INTEGER(negative ? (~i+1) : i, src, writer); } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; } // Inlineable functions namespace { // This table can be used to characterize the final character of an integer // string. For JSON structural character and allowable white space characters, // we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise // we return NUMBER_ERROR. // Optimization note: we could easily reduce the size of the table by half (to 128) // at the cost of an extra branch. // Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); const uint8_t integer_string_finisher[256] = { NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR}; // Parse any number from 0 to 18,446,744,073,709,551,615 simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { const uint8_t *p = src; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > 20)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } if (digit_count == 20) { // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } return i; } // Parse any number from 0 to 18,446,744,073,709,551,615 // Never read at src_end or beyond simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { const uint8_t *p = src; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while ((p != src_end) && parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > 20)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } if (digit_count == 20) { // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } return i; } // Parse any number from 0 to 18,446,744,073,709,551,615 simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { const uint8_t *p = src + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > 20)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if (*p != '"') { return NUMBER_ERROR; } if (digit_count == 20) { // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // // Note: we use src[1] and not src[0] because src[0] is the quote character in this // instance. if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } return i; } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. size_t longest_digit_count = 19; // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > longest_digit_count)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } return negative ? (~i+1) : i; } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // Never read at src_end or beyond simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { // // Check for minus sign // if(src == src_end) { return NUMBER_ERROR; } bool negative = (*src == '-'); const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while ((p != src_end) && parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. size_t longest_digit_count = 19; // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > longest_digit_count)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } return negative ? (~i+1) : i; } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); src += uint8_t(negative) + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = src; uint64_t i = 0; while (parse_digit(*src, i)) { src++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(src - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. size_t longest_digit_count = 19; // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > longest_digit_count)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*src)) { // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if(*src != '"') { return NUMBER_ERROR; } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } return negative ? (~i+1) : i; } simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); src += uint8_t(negative); // // Parse the integer part. // uint64_t i = 0; const uint8_t *p = src; p += parse_digit(*p, i); bool leading_zero = (i == 0); while (parse_digit(*p, i)) { p++; } // no integer digits, or 0123 (zero must be solo) if ( p == src ) { return INCORRECT_TYPE; } if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } // // Parse the decimal part. // int64_t exponent = 0; bool overflow; if (simdjson_likely(*p == '.')) { p++; const uint8_t *start_decimal_digits = p; if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits p++; while (parse_digit(*p, i)) { p++; } exponent = -(p - start_decimal_digits); // Overflow check. More than 19 digits (minus the decimal) may be overflow. overflow = p-src-1 > 19; if (simdjson_unlikely(overflow && leading_zero)) { // Skip leading 0.00000 and see if it still overflows const uint8_t *start_digits = src + 2; while (*start_digits == '0') { start_digits++; } overflow = p-start_digits > 19; } } else { overflow = p-src > 19; } // // Parse the exponent // if (*p == 'e' || *p == 'E') { p++; bool exp_neg = *p == '-'; p += exp_neg || *p == '+'; uint64_t exp = 0; const uint8_t *start_exp_digits = p; while (parse_digit(*p, exp)) { p++; } // no exp digits, or 20+ exp digits if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } exponent += exp_neg ? 0-exp : exp; } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; // // Assemble (or slow-parse) the float // double d; if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return (*src == '-'); } simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { bool negative = (*src == '-'); src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } return false; } simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { bool negative = (*src == '-'); src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } if (jsoncharutils::is_structural_or_whitespace(*p)) { // We have an integer. // If the number is negative and valid, it must be a signed integer. if(negative) { return number_type::signed_integer; } // We want values larger or equal to 9223372036854775808 to be unsigned // integers, and the other values to be signed integers. int digit_count = int(p - src); if(digit_count >= 19) { const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { return number_type::unsigned_integer; } } return number_type::signed_integer; } // Hopefully, we have 'e' or 'E' or '.'. return number_type::floating_point_number; } // Never read at src_end or beyond simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { if(src == src_end) { return NUMBER_ERROR; } // // Check for minus sign // bool negative = (*src == '-'); src += uint8_t(negative); // // Parse the integer part. // uint64_t i = 0; const uint8_t *p = src; if(p == src_end) { return NUMBER_ERROR; } p += parse_digit(*p, i); bool leading_zero = (i == 0); while ((p != src_end) && parse_digit(*p, i)) { p++; } // no integer digits, or 0123 (zero must be solo) if ( p == src ) { return INCORRECT_TYPE; } if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } // // Parse the decimal part. // int64_t exponent = 0; bool overflow; if (simdjson_likely((p != src_end) && (*p == '.'))) { p++; const uint8_t *start_decimal_digits = p; if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits p++; while ((p != src_end) && parse_digit(*p, i)) { p++; } exponent = -(p - start_decimal_digits); // Overflow check. More than 19 digits (minus the decimal) may be overflow. overflow = p-src-1 > 19; if (simdjson_unlikely(overflow && leading_zero)) { // Skip leading 0.00000 and see if it still overflows const uint8_t *start_digits = src + 2; while (*start_digits == '0') { start_digits++; } overflow = start_digits-src > 19; } } else { overflow = p-src > 19; } // // Parse the exponent // if ((p != src_end) && (*p == 'e' || *p == 'E')) { p++; if(p == src_end) { return NUMBER_ERROR; } bool exp_neg = *p == '-'; p += exp_neg || *p == '+'; uint64_t exp = 0; const uint8_t *start_exp_digits = p; while ((p != src_end) && parse_digit(*p, exp)) { p++; } // no exp digits, or 20+ exp digits if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } exponent += exp_neg ? 0-exp : exp; } if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; // // Assemble (or slow-parse) the float // double d; if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { return NUMBER_ERROR; } return d; } simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); src += uint8_t(negative) + 1; // // Parse the integer part. // uint64_t i = 0; const uint8_t *p = src; p += parse_digit(*p, i); bool leading_zero = (i == 0); while (parse_digit(*p, i)) { p++; } // no integer digits, or 0123 (zero must be solo) if ( p == src ) { return INCORRECT_TYPE; } if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } // // Parse the decimal part. // int64_t exponent = 0; bool overflow; if (simdjson_likely(*p == '.')) { p++; const uint8_t *start_decimal_digits = p; if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits p++; while (parse_digit(*p, i)) { p++; } exponent = -(p - start_decimal_digits); // Overflow check. More than 19 digits (minus the decimal) may be overflow. overflow = p-src-1 > 19; if (simdjson_unlikely(overflow && leading_zero)) { // Skip leading 0.00000 and see if it still overflows const uint8_t *start_digits = src + 2; while (*start_digits == '0') { start_digits++; } overflow = p-start_digits > 19; } } else { overflow = p-src > 19; } // // Parse the exponent // if (*p == 'e' || *p == 'E') { p++; bool exp_neg = *p == '-'; p += exp_neg || *p == '+'; uint64_t exp = 0; const uint8_t *start_exp_digits = p; while (parse_digit(*p, exp)) { p++; } // no exp digits, or 20+ exp digits if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } exponent += exp_neg ? 0-exp : exp; } if (*p != '"') { return NUMBER_ERROR; } overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; // // Assemble (or slow-parse) the float // double d; if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } } // unnamed namespace #endif // SIMDJSON_SKIPNUMBERPARSING } // namespace numberparsing inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { switch (type) { case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; case number_type::floating_point_number: out << "floating-point number (binary64)"; break; default: SIMDJSON_UNREACHABLE(); } return out; } } // namespace haswell } // namespace simdjson #endif // SIMDJSON_GENERIC_NUMBERPARSING_H /* end file simdjson/generic/numberparsing.h for haswell */ /* including simdjson/generic/implementation_simdjson_result_base-inl.h for haswell: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { // // internal::implementation_simdjson_result_base inline implementation // template simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { error = this->second; if (!error) { value = std::forward>(*this).first; } } template simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { error_code error; std::forward>(*this).tie(value, error); return error; } template simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } #if SIMDJSON_EXCEPTIONS template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return this->first; } template simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { return std::forward>(*this).take_value(); } template simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { if (error()) { throw simdjson_error(error()); } return std::forward(this->first); } template simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { return std::forward>(*this).take_value(); } #endif // SIMDJSON_EXCEPTIONS template simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { return this->first; } template simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { return this->first; } template simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { return std::forward(this->first); } template simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept : first{std::forward(value)}, second{error} {} template simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept : implementation_simdjson_result_base(T{}, error) {} template simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} } // namespace haswell } // namespace simdjson #endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H /* end file simdjson/generic/implementation_simdjson_result_base-inl.h for haswell */ /* end file simdjson/generic/amalgamated.h for haswell */ /* including simdjson/haswell/end.h: #include "simdjson/haswell/end.h" */ /* begin file simdjson/haswell/end.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL SIMDJSON_UNTARGET_REGION #endif /* undefining SIMDJSON_IMPLEMENTATION from "haswell" */ #undef SIMDJSON_IMPLEMENTATION /* end file simdjson/haswell/end.h */ #endif // SIMDJSON_HASWELL_H /* end file simdjson/haswell.h */ #elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(icelake) /* including simdjson/icelake.h: #include "simdjson/icelake.h" */ /* begin file simdjson/icelake.h */ #ifndef SIMDJSON_ICELAKE_H #define SIMDJSON_ICELAKE_H /* including simdjson/icelake/begin.h: #include "simdjson/icelake/begin.h" */ /* begin file simdjson/icelake/begin.h */ /* defining SIMDJSON_IMPLEMENTATION to "icelake" */ #define SIMDJSON_IMPLEMENTATION icelake /* including simdjson/icelake/base.h: #include "simdjson/icelake/base.h" */ /* begin file simdjson/icelake/base.h */ #ifndef SIMDJSON_ICELAKE_BASE_H #define SIMDJSON_ICELAKE_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ // The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE namespace simdjson { /** * Implementation for Icelake (Intel AVX512). */ namespace icelake { class implementation; } // namespace icelake } // namespace simdjson #endif // SIMDJSON_ICELAKE_BASE_H /* end file simdjson/icelake/base.h */ /* including simdjson/icelake/intrinsics.h: #include "simdjson/icelake/intrinsics.h" */ /* begin file simdjson/icelake/intrinsics.h */ #ifndef SIMDJSON_ICELAKE_INTRINSICS_H #define SIMDJSON_ICELAKE_INTRINSICS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #if SIMDJSON_VISUAL_STUDIO // under clang within visual studio, this will include #include // visual studio or clang #else #include // elsewhere #endif // SIMDJSON_VISUAL_STUDIO #if SIMDJSON_CLANG_VISUAL_STUDIO /** * You are not supposed, normally, to include these * headers directly. Instead you should either include intrin.h * or x86intrin.h. However, when compiling with clang * under Windows (i.e., when _MSC_VER is set), these headers * only get included *if* the corresponding features are detected * from macros: * e.g., if __AVX2__ is set... in turn, we normally set these * macros by compiling against the corresponding architecture * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole * software with these advanced instructions. In simdjson, we * want to compile the whole program for a generic target, * and only target our specific kernels. As a workaround, * we directly include the needed headers. These headers would * normally guard against such usage, but we carefully included * (or ) before, so the headers * are fooled. */ #include // for _blsr_u64 #include // for __lzcnt64 #include // for most things (AVX2, AVX512, _popcnt64) #include #include #include #include #include // for _mm_clmulepi64_si128 // Important: we need the AVX-512 headers: #include #include #include #include #include #include #include // unfortunately, we may not get _blsr_u64, but, thankfully, clang // has it as a macro. #ifndef _blsr_u64 // we roll our own #define _blsr_u64(n) ((n - 1) & n) #endif // _blsr_u64 #endif // SIMDJSON_CLANG_VISUAL_STUDIO static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake"); #endif // SIMDJSON_ICELAKE_INTRINSICS_H /* end file simdjson/icelake/intrinsics.h */ #if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt,popcnt") #endif /* including simdjson/icelake/bitmanipulation.h: #include "simdjson/icelake/bitmanipulation.h" */ /* begin file simdjson/icelake/bitmanipulation.h */ #ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H #define SIMDJSON_ICELAKE_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace { // We sometimes call trailing_zero on inputs that are zero, // but the algorithms do not end up using the returned value. // Sadly, sanitizers are not smart enough to figure it out. SIMDJSON_NO_SANITIZE_UNDEFINED // This function can be used safely even if not all bytes have been // initialized. // See issue https://github.com/simdjson/simdjson/issues/1965 SIMDJSON_NO_SANITIZE_MEMORY simdjson_inline int trailing_zeroes(uint64_t input_num) { #if SIMDJSON_REGULAR_VISUAL_STUDIO return (int)_tzcnt_u64(input_num); #else // SIMDJSON_REGULAR_VISUAL_STUDIO //////// // You might expect the next line to be equivalent to // return (int)_tzcnt_u64(input_num); // but the generated code differs and might be less efficient? //////// return __builtin_ctzll(input_num); #endif // SIMDJSON_REGULAR_VISUAL_STUDIO } /* result might be undefined when input_num is zero */ simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return _blsr_u64(input_num); } /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { return int(_lzcnt_u64(input_num)); } #if SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { // note: we do not support legacy 32-bit Windows return __popcnt64(input_num);// Visual Studio wants two underscores } #else simdjson_inline long long int count_ones(uint64_t input_num) { return _popcnt64(input_num); } #endif simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { #if SIMDJSON_REGULAR_VISUAL_STUDIO return _addcarry_u64(0, value1, value2, reinterpret_cast(result)); #else return __builtin_uaddll_overflow(value1, value2, reinterpret_cast(result)); #endif } } // unnamed namespace } // namespace icelake } // namespace simdjson #endif // SIMDJSON_ICELAKE_BITMANIPULATION_H /* end file simdjson/icelake/bitmanipulation.h */ /* including simdjson/icelake/bitmask.h: #include "simdjson/icelake/bitmask.h" */ /* begin file simdjson/icelake/bitmask.h */ #ifndef SIMDJSON_ICELAKE_BITMASK_H #define SIMDJSON_ICELAKE_BITMASK_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace { // // Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. // // For example, prefix_xor(00100100) == 00011100 // simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { // There should be no such thing with a processor supporting avx2 // but not clmul. __m128i all_ones = _mm_set1_epi8('\xFF'); __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); return _mm_cvtsi128_si64(result); } } // unnamed namespace } // namespace icelake } // namespace simdjson #endif // SIMDJSON_ICELAKE_BITMASK_H /* end file simdjson/icelake/bitmask.h */ /* including simdjson/icelake/simd.h: #include "simdjson/icelake/simd.h" */ /* begin file simdjson/icelake/simd.h */ #ifndef SIMDJSON_ICELAKE_SIMD_H #define SIMDJSON_ICELAKE_SIMD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #if defined(__GNUC__) && !defined(__clang__) #if __GNUC__ == 8 #define SIMDJSON_GCC8 1 #endif // __GNUC__ == 8 #endif // defined(__GNUC__) && !defined(__clang__) #if SIMDJSON_GCC8 /** * GCC 8 fails to provide _mm512_set_epi8. We roll our own. */ inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); } #endif // SIMDJSON_GCC8 namespace simdjson { namespace icelake { namespace { namespace simd { // Forward-declared so they can be used by splat and friends. template struct base { __m512i value; // Zero constructor simdjson_inline base() : value{__m512i()} {} // Conversion from SIMD register simdjson_inline base(const __m512i _value) : value(_value) {} // Conversion to SIMD register simdjson_inline operator const __m512i&() const { return this->value; } simdjson_inline operator __m512i&() { return this->value; } // Bit operations simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } }; // Forward-declared so they can be used by splat and friends. template struct simd8; template> struct base8: base> { typedef uint32_t bitmask_t; typedef uint64_t bitmask2_t; simdjson_inline base8() : base>() {} simdjson_inline base8(const __m512i _value) : base>(_value) {} friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { return _mm512_cmpeq_epi8_mask(lhs, rhs); } static const int SIZE = sizeof(base::value); template simdjson_inline simd8 prev(const simd8 prev_chunk) const { // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) constexpr int shift = 16 - N; return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift); } }; // SIMD byte mask type (returned by things like eq and gt) template<> struct simd8: base8 { static simdjson_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } simdjson_inline simd8() : base8() {} simdjson_inline simd8(const __m512i _value) : base8(_value) {} // Splat constructor simdjson_inline simd8(bool _value) : base8(splat(_value)) {} simdjson_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } simdjson_inline simd8 operator~() const { return *this ^ true; } }; template struct base8_numeric: base8 { static simdjson_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } static simdjson_inline simd8 zero() { return _mm512_setzero_si512(); } static simdjson_inline simd8 load(const T values[64]) { return _mm512_loadu_si512(reinterpret_cast(values)); } // Repeat 16 values as many times as necessary (usually for lookup tables) static simdjson_inline simd8 repeat_16( T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } simdjson_inline base8_numeric() : base8() {} simdjson_inline base8_numeric(const __m512i _value) : base8(_value) {} // Store to array simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } // Addition/subtraction are the same for signed and unsigned simdjson_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } simdjson_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } // Override to distinguish from bool version simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) template simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return _mm512_shuffle_epi8(lookup_table, *this); } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes // get written. // Design consideration: it seems like a function with the // signature simd8 compress(uint32_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template simdjson_inline void compress(uint64_t mask, L * output) const { _mm512_mask_compressstoreu_epi8 (output,~mask,*this); } template simdjson_inline simd8 lookup_16( L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, L replace12, L replace13, L replace14, L replace15) const { return lookup_16(simd8::repeat_16( replace0, replace1, replace2, replace3, replace4, replace5, replace6, replace7, replace8, replace9, replace10, replace11, replace12, replace13, replace14, replace15 )); } }; // Signed bytes template<> struct simd8 : base8_numeric { simdjson_inline simd8() : base8_numeric() {} simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31, int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39, int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47, int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55, int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63 ) : simd8(_mm512_set_epi8( v63, v62, v61, v60, v59, v58, v57, v56, v55, v54, v53, v52, v51, v50, v49, v48, v47, v46, v45, v44, v43, v42, v41, v40, v39, v38, v37, v36, v35, v34, v33, v32, v31, v30, v29, v28, v27, v26, v25, v24, v23, v22, v21, v20, v19, v18, v17, v16, v15, v14, v13, v12, v11, v10, v9, v8, v7, v6, v5, v4, v3, v2, v1, v0 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Order-sensitive comparisons simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } simdjson_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } simdjson_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } }; // Unsigned bytes template<> struct simd8: base8_numeric { simdjson_inline simd8() : base8_numeric() {} simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Array constructor simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31, uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39, uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47, uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55, uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63 ) : simd8(_mm512_set_epi8( v63, v62, v61, v60, v59, v58, v57, v56, v55, v54, v53, v52, v51, v50, v49, v48, v47, v46, v45, v44, v43, v42, v41, v40, v39, v38, v37, v36, v35, v34, v33, v32, v31, v30, v29, v28, v27, v26, v25, v24, v23, v22, v21, v20, v19, v18, v17, v16, v15, v14, v13, v12, v11, v10, v9, v8, v7, v6, v5, v4, v3, v2, v1, v0 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Saturated math simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } // Order-specific operations simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } // Same as >, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } // Same as <, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } simdjson_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } simdjson_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } // Bit-specific operations simdjson_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } simdjson_inline bool bits_not_set_anywhere() const { return !_mm512_test_epi8_mask(*this, *this); } simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } template simdjson_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } template simdjson_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } // Get one of the bits and make a bitmask out of it. // e.g. value.get_bit<7>() gets the high bit template simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } }; template struct simd8x64 { static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block."); const simd8 chunks[NUM_CHUNKS]; simd8x64(const simd8x64& o) = delete; // no copy allowed simd8x64& operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} simdjson_inline simd8x64(const simd8 chunk0) : chunks{chunk0} {} simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr)} {} simdjson_inline uint64_t compress(uint64_t mask, T * output) const { this->chunks[0].compress(mask, output); return 64 - count_ones(mask); } simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr+sizeof(simd8)*0); } simdjson_inline simd8 reduce_or() const { return this->chunks[0]; } simdjson_inline simd8x64 bit_or(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] | mask ); } simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return this->chunks[0] == mask; } simdjson_inline uint64_t eq(const simd8x64 &other) const { return this->chunks[0] == other.chunks[0]; } simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return this->chunks[0] <= mask; } }; // struct simd8x64 } // namespace simd } // unnamed namespace } // namespace icelake } // namespace simdjson #endif // SIMDJSON_ICELAKE_SIMD_H /* end file simdjson/icelake/simd.h */ /* including simdjson/icelake/stringparsing_defs.h: #include "simdjson/icelake/stringparsing_defs.h" */ /* begin file simdjson/icelake/stringparsing_defs.h */ #ifndef SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H #define SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/simd.h" */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace { using namespace simd; // Holds backslashes and quotes locations. struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 64; simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } uint64_t bs_bits; uint64_t quote_bits; }; // struct backslash_and_quote simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { // this can read up to 15 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); simd8 v(src); // store to dest unconditionally - we can overwrite the bits we don't like later v.store(dst); return { static_cast(v == '\\'), // bs_bits static_cast(v == '"'), // quote_bits }; } } // unnamed namespace } // namespace icelake } // namespace simdjson #endif // SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H /* end file simdjson/icelake/stringparsing_defs.h */ /* including simdjson/icelake/numberparsing_defs.h: #include "simdjson/icelake/numberparsing_defs.h" */ /* begin file simdjson/icelake/numberparsing_defs.h */ #ifndef SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H #define SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace numberparsing { static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { // this actually computes *16* values so we are being wasteful. const __m128i ascii0 = _mm_set1_epi8('0'); const __m128i mul_1_10 = _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); const __m128i mul_1_10000 = _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); const __m128i input = _mm_sub_epi8( _mm_loadu_si128(reinterpret_cast(chars)), ascii0); const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); const __m128i t3 = _mm_packus_epi32(t2, t2); const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); return _mm_cvtsi128_si32( t4); // only captures the sum of the first 8 digits, drop the rest } /** @private */ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { internal::value128 answer; #if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS #ifdef _M_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); answer.low = value1 * value2; #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 #endif // _M_ARM64 #else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); #endif return answer; } } // namespace numberparsing } // namespace icelake } // namespace simdjson #define SIMDJSON_SWAR_NUMBER_PARSING 1 #endif // SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H /* end file simdjson/icelake/numberparsing_defs.h */ /* end file simdjson/icelake/begin.h */ /* including simdjson/generic/amalgamated.h for icelake: #include "simdjson/generic/amalgamated.h" */ /* begin file simdjson/generic/amalgamated.h for icelake */ #if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) #error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! #endif /* including simdjson/generic/base.h for icelake: #include "simdjson/generic/base.h" */ /* begin file simdjson/generic/base.h for icelake */ #ifndef SIMDJSON_GENERIC_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ /* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ /* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ /* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ /* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ /* amalgamation skipped (editor-only): #else */ /* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ /* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { struct open_container; class dom_parser_implementation; /** * The type of a JSON number */ enum class number_type { floating_point_number=1, /// a binary64 number signed_integer, /// a signed integer that fits in a 64-bit word using two's complement unsigned_integer /// a positive integer larger or equal to 1<<63 }; } // namespace icelake } // namespace simdjson #endif // SIMDJSON_GENERIC_BASE_H /* end file simdjson/generic/base.h for icelake */ /* including simdjson/generic/jsoncharutils.h for icelake: #include "simdjson/generic/jsoncharutils.h" */ /* begin file simdjson/generic/jsoncharutils.h for icelake */ #ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace { namespace jsoncharutils { // return non-zero if not a structural or whitespace char // zero otherwise simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace_negated[c]; } simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace[c]; } // returns a value with the high 16 bits set if not valid // otherwise returns the conversion of the 4 hex digits at src into the bottom // 16 bits of the 32-bit return register // // see // https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ static inline uint32_t hex_to_u32_nocheck( const uint8_t *src) { // strictly speaking, static inline is a C-ism uint32_t v1 = internal::digit_to_val32[630 + src[0]]; uint32_t v2 = internal::digit_to_val32[420 + src[1]]; uint32_t v3 = internal::digit_to_val32[210 + src[2]]; uint32_t v4 = internal::digit_to_val32[0 + src[3]]; return v1 | v2 | v3 | v4; } // given a code point cp, writes to c // the utf-8 code, outputting the length in // bytes, if the length is zero, the code point // is invalid // // This can possibly be made faster using pdep // and clz and table lookups, but JSON documents // have few escaped code points, and the following // function looks cheap. // // Note: we assume that surrogates are treated separately // simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { if (cp <= 0x7F) { c[0] = uint8_t(cp); return 1; // ascii } if (cp <= 0x7FF) { c[0] = uint8_t((cp >> 6) + 192); c[1] = uint8_t((cp & 63) + 128); return 2; // universal plane // Surrogates are treated elsewhere... //} //else if (0xd800 <= cp && cp <= 0xdfff) { // return 0; // surrogates // could put assert here } else if (cp <= 0xFFFF) { c[0] = uint8_t((cp >> 12) + 224); c[1] = uint8_t(((cp >> 6) & 63) + 128); c[2] = uint8_t((cp & 63) + 128); return 3; } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this // is not needed c[0] = uint8_t((cp >> 18) + 240); c[1] = uint8_t(((cp >> 12) & 63) + 128); c[2] = uint8_t(((cp >> 6) & 63) + 128); c[3] = uint8_t((cp & 63) + 128); return 4; } // will return 0 when the code point was too large. return 0; // bad r } #if SIMDJSON_IS_32BITS // _umul128 for x86, arm // this is a slow emulation routine for 32-bit // static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; } static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); uint64_t adbc_carry = !!(adbc < ad); uint64_t lo = bd + (adbc << 32); *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + (adbc_carry << 32) + !!(lo < bd); return lo; } #endif } // namespace jsoncharutils } // unnamed namespace } // namespace icelake } // namespace simdjson #endif // SIMDJSON_GENERIC_JSONCHARUTILS_H /* end file simdjson/generic/jsoncharutils.h for icelake */ /* including simdjson/generic/atomparsing.h for icelake: #include "simdjson/generic/atomparsing.h" */ /* begin file simdjson/generic/atomparsing.h for icelake */ #ifndef SIMDJSON_GENERIC_ATOMPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include namespace simdjson { namespace icelake { namespace { /// @private namespace atomparsing { // The string_to_uint32 is exclusively used to map literal strings to 32-bit values. // We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot // be certain that the character pointer will be properly aligned. // You might think that using memcpy makes this function expensive, but you'd be wrong. // All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); // to the compile-time constant 1936482662. simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } // Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. // Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. simdjson_warn_unused simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); std::memcpy(&srcval, src, sizeof(uint32_t)); return srcval ^ string_to_uint32(atom); } simdjson_warn_unused simdjson_inline bool is_valid_true_atom(const uint8_t *src) { return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_true_atom(src); } else if (len == 4) { return !str4ncmp(src, "true"); } else { return false; } } simdjson_warn_unused simdjson_inline bool is_valid_false_atom(const uint8_t *src) { return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; } simdjson_warn_unused simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { if (len > 5) { return is_valid_false_atom(src); } else if (len == 5) { return !str4ncmp(src+1, "alse"); } else { return false; } } simdjson_warn_unused simdjson_inline bool is_valid_null_atom(const uint8_t *src) { return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_null_atom(src); } else if (len == 4) { return !str4ncmp(src, "null"); } else { return false; } } } // namespace atomparsing } // unnamed namespace } // namespace icelake } // namespace simdjson #endif // SIMDJSON_GENERIC_ATOMPARSING_H /* end file simdjson/generic/atomparsing.h for icelake */ /* including simdjson/generic/dom_parser_implementation.h for icelake: #include "simdjson/generic/dom_parser_implementation.h" */ /* begin file simdjson/generic/dom_parser_implementation.h for icelake */ #ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { // expectation: sizeof(open_container) = 64/8. struct open_container { uint32_t tape_index; // where, on the tape, does the scope ([,{) begins uint32_t count; // how many elements in the scope }; // struct open_container static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); class dom_parser_implementation final : public internal::dom_parser_implementation { public: /** Tape location of each open { or [ */ std::unique_ptr open_containers{}; /** Whether each open container is a [ or { */ std::unique_ptr is_array{}; /** Buffer passed to stage 1 */ const uint8_t *buf{}; /** Length passed to stage 1 */ size_t len{0}; /** Document passed to stage 2 */ dom::document *doc{}; inline dom_parser_implementation() noexcept; inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; dom_parser_implementation(const dom_parser_implementation &) = delete; dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; private: simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); }; } // namespace icelake } // namespace simdjson namespace simdjson { namespace icelake { inline dom_parser_implementation::dom_parser_implementation() noexcept = default; inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; // Leaving these here so they can be inlined if so desired inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } // Stage 1 index output size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); if (!structural_indexes) { _capacity = 0; return MEMALLOC; } structural_indexes[0] = 0; n_structural_indexes = 0; _capacity = capacity; return SUCCESS; } inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { // Stage 2 stacks open_containers.reset(new (std::nothrow) open_container[max_depth]); is_array.reset(new (std::nothrow) bool[max_depth]); if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } _max_depth = max_depth; return SUCCESS; } } // namespace icelake } // namespace simdjson #endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H /* end file simdjson/generic/dom_parser_implementation.h for icelake */ /* including simdjson/generic/implementation_simdjson_result_base.h for icelake: #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* begin file simdjson/generic/implementation_simdjson_result_base.h for icelake */ #ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { // This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair // so we can avoid inlining errors // TODO reconcile these! /** * The result of a simdjson operation that could fail. * * Gives the option of reading error codes, or throwing an exception by casting to the desired result. * * This is a base class for implementations that want to add functions to the result type for * chaining. * * Override like: * * struct simdjson_result : public internal::implementation_simdjson_result_base { * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} * // Your extra methods here * } * * Then any method returning simdjson_result will be chainable with your methods. */ template struct implementation_simdjson_result_base { /** * Create a new empty result with error = UNINITIALIZED. */ simdjson_inline implementation_simdjson_result_base() noexcept = default; /** * Create a new error result. */ simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; /** * Create a new successful result. */ simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; /** * Create a new result with both things (use if you don't want to branch when creating the result). */ simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; /** * Move the value and the error to the provided variables. * * @param value The variable to assign the value to. May not be set if there is an error. * @param error The variable to assign the error to. Set to SUCCESS if there is no error. */ simdjson_inline void tie(T &value, error_code &error) && noexcept; /** * Move the value to the provided variable. * * @param value The variable to assign the value to. May not be set if there is an error. */ simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ simdjson_inline error_code error() const noexcept; #if SIMDJSON_EXCEPTIONS /** * Get the result value. * * @throw simdjson_error if there was an error. */ simdjson_inline T& value() & noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ simdjson_inline T&& value() && noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ simdjson_inline T&& take_value() && noexcept(false); /** * Cast to the value (will throw on error). * * @throw simdjson_error if there was an error. */ simdjson_inline operator T&&() && noexcept(false); #endif // SIMDJSON_EXCEPTIONS /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. */ simdjson_inline const T& value_unsafe() const& noexcept; /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. */ simdjson_inline T& value_unsafe() & noexcept; /** * Take the result value (move it). This function is safe if and only * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ }; // struct implementation_simdjson_result_base } // namespace icelake } // namespace simdjson #endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H /* end file simdjson/generic/implementation_simdjson_result_base.h for icelake */ /* including simdjson/generic/numberparsing.h for icelake: #include "simdjson/generic/numberparsing.h" */ /* begin file simdjson/generic/numberparsing.h for icelake */ #ifndef SIMDJSON_GENERIC_NUMBERPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #include #include namespace simdjson { namespace icelake { namespace numberparsing { #ifdef JSON_TEST_NUMBERS #define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) #define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) #define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) #define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) #else #define INVALID_NUMBER(SRC) (NUMBER_ERROR) #define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) #define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) #define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) #endif namespace { // Convert a mantissa, an exponent and a sign bit into an ieee64 double. // The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). // The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { double d; mantissa &= ~(1ULL << 52); mantissa |= real_exponent << 52; mantissa |= ((static_cast(negative)) << 63); std::memcpy(&d, &mantissa, sizeof(d)); return d; } // Attempts to compute i * 10^(power) exactly; and if "negative" is // true, negate the result. // This function will only work in some cases, when it does not work, success is // set to false. This should work *most of the time* (like 99% of the time). // We assume that power is in the [smallest_power, // largest_power] interval: the caller is responsible for this check. simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { // we start with a fast path // It was described in // Clinger WD. How to read floating point numbers accurately. // ACM SIGPLAN Notices. 1990 #ifndef FLT_EVAL_METHOD #error "FLT_EVAL_METHOD should be defined, please include cfloat." #endif #if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) // We cannot be certain that x/y is rounded to nearest. if (0 <= power && power <= 22 && i <= 9007199254740991) #else if (-22 <= power && power <= 22 && i <= 9007199254740991) #endif { // convert the integer into a double. This is lossless since // 0 <= i <= 2^53 - 1. d = double(i); // // The general idea is as follows. // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then // 1) Both s and p can be represented exactly as 64-bit floating-point // values // (binary64). // 2) Because s and p can be represented exactly as floating-point values, // then s * p // and s / p will produce correctly rounded values. // if (power < 0) { d = d / simdjson::internal::power_of_ten[-power]; } else { d = d * simdjson::internal::power_of_ten[power]; } if (negative) { d = -d; } return true; } // When 22 < power && power < 22 + 16, we could // hope for another, secondary fast path. It was // described by David M. Gay in "Correctly rounded // binary-decimal and decimal-binary conversions." (1990) // If you need to compute i * 10^(22 + x) for x < 16, // first compute i * 10^x, if you know that result is exact // (e.g., when i * 10^x < 2^53), // then you can still proceed and do (i * 10^x) * 10^22. // Is this worth your time? // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) // for this second fast path to work. // If you you have 22 < power *and* power < 22 + 16, and then you // optimistically compute "i * 10^(x-22)", there is still a chance that you // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of // this optimization maybe less common than we would like. Source: // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html // The fast path has now failed, so we are failing back on the slower path. // In the slow path, we need to adjust i so that it is > 1<<63 which is always // possible, except if i == 0, so we handle i == 0 separately. if(i == 0) { d = negative ? -0.0 : 0.0; return true; } // The exponent is 1024 + 63 + power // + floor(log(5**power)/log(2)). // The 1024 comes from the ieee64 standard. // The 63 comes from the fact that we use a 64-bit word. // // Computing floor(log(5**power)/log(2)) could be // slow. Instead we use a fast function. // // For power in (-400,350), we have that // (((152170 + 65536) * power ) >> 16); // is equal to // floor(log(5**power)/log(2)) + power when power >= 0 // and it is equal to // ceil(log(5**-power)/log(2)) + power when power < 0 // // The 65536 is (1<<16) and corresponds to // (65536 * power) >> 16 ---> power // // ((152170 * power ) >> 16) is equal to // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is // approximatively equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; // We want the most significant bit of i to be 1. Shift if needed. int lz = leading_zeroes(i); i <<= lz; // We are going to need to do some 64-bit arithmetic to get a precise product. // We use a table lookup approach. // It is safe because // power >= smallest_power // and power <= largest_power // We recover the mantissa of the power, it has a leading 1. It is always // rounded down. // // We want the most significant 64 bits of the product. We know // this will be non-zero because the most significant bit of i is // 1. const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) // // The full_multiplication function computes the 128-bit product of two 64-bit words // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use // we make of the product. It also makes it easy to reason about the product: there // is 0 or 1 leading zero in the product. // Unless the least significant 9 bits of the high (64-bit) part of the full // product are all 1s, then we know that the most significant 55 bits are // exact and no further work is needed. Having 55 bits is necessary because // we need 53 bits for the mantissa but we have to have one rounding bit and // we can waste a bit if the most significant bit of the product is zero. if((firstproduct.high & 0x1FF) == 0x1FF) { // We want to compute i * 5^q, but only care about the top 55 bits at most. // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing // the full computation is wasteful. So we do what is called a "truncated // multiplication". // We take the most significant 64-bits, and we put them in // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q // to the desired approximation using one multiplication. Sometimes it does not suffice. // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and // then we get a better approximation to i * 5^q. // // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat // more complicated. // // There is an extra layer of complexity in that we need more than 55 bits of // accuracy in the round-to-even scenario. // // The full_multiplication function computes the 128-bit product of two 64-bit words // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); firstproduct.low += secondproduct.high; if(secondproduct.high > firstproduct.low) { firstproduct.high++; } // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product // is sufficiently accurate, and more computation is not needed. } uint64_t lower = firstproduct.low; uint64_t upper = firstproduct.high; // The final mantissa should be 53 bits with a leading 1. // We shift it so that it occupies 54 bits with a leading 1. /////// uint64_t upperbit = upper >> 63; uint64_t mantissa = upper >> (upperbit + 9); lz += int(1 ^ upperbit); // Here we have mantissa < (1<<54). int64_t real_exponent = exponent - lz; if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? // Here have that real_exponent <= 0 so -real_exponent >= 0 if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. d = negative ? -0.0 : 0.0; return true; } // next line is safe because -real_exponent + 1 < 0 mantissa >>= -real_exponent + 1; // Thankfully, we can't have both "round-to-even" and subnormals because // "round-to-even" only occurs for powers close to 0. mantissa += (mantissa & 1); // round up mantissa >>= 1; // There is a weird scenario where we don't have a subnormal but just. // Suppose we start with 2.2250738585072013e-308, we end up // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer // subnormal, but we can only know this after rounding. // So we only declare a subnormal if we are smaller than the threshold. real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; d = to_double(mantissa, real_exponent, negative); return true; } // We have to round to even. The "to even" part // is only a problem when we are right in between two floats // which we guard against. // If we have lots of trailing zeros, we may fall right between two // floating-point values. // // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] // times a power of two. That is, it is right between a number with binary significand // m and another number with binary significand m+1; and it must be the case // that it cannot be represented by a float itself. // // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. // Recall that 10^q = 5^q * 2^q. // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have // 2^{53} x 5^{-q} < 2^{64}. // Hence we have 5^{-q} < 2^{11}$ or q>= -4. // // We require lower <= 1 and not lower == 0 because we could not prove that // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { mantissa &= ~1; // flip it so that we do not round up } } mantissa += mantissa & 1; mantissa >>= 1; // Here we have mantissa < (1<<53), unless there was an overflow if (mantissa >= (1ULL << 53)) { ////////// // This will happen when parsing values such as 7.2057594037927933e+16 //////// mantissa = (1ULL << 52); real_exponent++; } mantissa &= ~(1ULL << 52); // we have to check that real_exponent is in range, otherwise we bail out if (simdjson_unlikely(real_exponent > 2046)) { // We have an infinite value!!! We could actually throw an error here if we could. return false; } d = to_double(mantissa, real_exponent, negative); return true; } // We call a fallback floating-point parser that might be slow. Note // it will accept JSON numbers, but the JSON spec. is more restrictive so // before you call parse_float_fallback, you need to have validated the input // string with the JSON grammar. // It will return an error (false) if the parsed number is infinite. // The string parsing itself always succeeds. We know that there is at least // one digit. static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); // We do not accept infinite values. // Detecting finite values in a portable manner is ridiculously hard, ideally // we would want to do: // return !std::isfinite(*outDouble); // but that mysteriously fails under legacy/old libc++ libraries, see // https://github.com/simdjson/simdjson/issues/1286 // // Therefore, fall back to this solution (the extra parens are there // to handle that max may be a macro on windows). return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); // We do not accept infinite values. // Detecting finite values in a portable manner is ridiculously hard, ideally // we would want to do: // return !std::isfinite(*outDouble); // but that mysteriously fails under legacy/old libc++ libraries, see // https://github.com/simdjson/simdjson/issues/1286 // // Therefore, fall back to this solution (the extra parens are there // to handle that max may be a macro on windows). return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } // check quickly whether the next 8 chars are made of digits // at a glance, it looks better than Mula's // http://0x80.pl/articles/swar-digits-validate.html simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { uint64_t val; // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); std::memcpy(&val, chars, 8); // a branchy method might be faster: // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == // 0x3030303030303030); return (((val & 0xF0F0F0F0F0F0F0F0) | (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == 0x3333333333333333); } template SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later simdjson_inline bool parse_digit(const uint8_t c, I &i) { const uint8_t digit = static_cast(c - '0'); if (digit > 9) { return false; } // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication i = 10 * i + digit; // might overflow, we will handle the overflow later return true; } simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the // the integer into a float in a lossless manner. const uint8_t *const first_after_period = p; #ifdef SIMDJSON_SWAR_NUMBER_PARSING #if SIMDJSON_SWAR_NUMBER_PARSING // this helps if we have lots of decimals! // this turns out to be frequent enough. if (is_made_of_eight_digits_fast(p)) { i = i * 100000000 + parse_eight_digits_unrolled(p); p += 8; } #endif // SIMDJSON_SWAR_NUMBER_PARSING #endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) if (parse_digit(*p, i)) { ++p; } while (parse_digit(*p, i)) { p++; } exponent = first_after_period - p; // Decimal without digits (123.) is illegal if (exponent == 0) { return INVALID_NUMBER(src); } return SUCCESS; } simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well // Exponent: -123.456e-[78] auto start_exp = p; int64_t exp_number = 0; while (parse_digit(*p, exp_number)) { ++p; } // It is possible for parse_digit to overflow. // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. // Thus we *must* check for possible overflow before we negate exp_number. // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may // not oblige and may, in fact, generate two distinct paths in any case. It might be // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off // instructions for a simdjson_likely branch, an unconclusive gain. // If there were no digits, it's an error. if (simdjson_unlikely(p == start_exp)) { return INVALID_NUMBER(src); } // We have a valid positive exponent in exp_number at this point, except that // it may have overflowed. // If there were more than 18 digits, we may have overflowed the integer. We have to do // something!!!! if (simdjson_unlikely(p > start_exp+18)) { // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow while (*start_exp == '0') { start_exp++; } // 19 digits could overflow int64_t and is kind of absurd anyway. We don't // support exponents smaller than -999,999,999,999,999,999 and bigger // than 999,999,999,999,999,999. // We can truncate. // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could // truncate at 324. // Note that there is no reason to fail per se at this point in time. // E.g., 0e999999999999999999999 is a fine number. if (p > start_exp+18) { exp_number = 999999999999999999; } } // At this point, we know that exp_number is a sane, positive, signed integer. // It is <= 999,999,999,999,999,999. As long as 'exponent' is in // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' // is bounded in magnitude by the size of the JSON input, we are fine in this universe. // To sum it up: the next line should never overflow. exponent += (neg_exp ? -exp_number : exp_number); return SUCCESS; } simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const uint8_t *start = start_digits; while ((*start == '0') || (*start == '.')) { ++start; } // we over-decrement by one when there is a '.' return digit_count - size_t(start - start_digits); } } // unnamed namespace /** @private */ template error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { double d; if (parse_float_fallback(src, &d)) { writer.append_double(d); return SUCCESS; } return INVALID_NUMBER(src); } /** @private */ template simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. // // 9999999999999999999 < 2**64 so we can accommodate 19 digits. // If we have a decimal separator, then digit_count - 1 is the number of digits, but we // may not have a decimal separator! if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { // Ok, chances are good that we had an overflow! // this is almost never going to get called!!! // we start anew, going slowly!!! // This will happen in the following examples: // 10000000000000000000000000000000000000000000e+308 // 3.1415926535897932384626433832795028841971693993751 // // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens // because slow_float_parsing is a non-inlined function. If we passed our writer reference to // it, it would force it to be stored in memory, preventing the compiler from picking it apart // and putting into registers. i.e. if we pass it as reference, it gets slow. // This is what forces the skip_double, as well. error_code error = slow_float_parsing(src, writer); writer.skip_double(); return error; } // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 // To future reader: we'd love if someone found a better way, or at least could explain this result! if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { // // Important: smallest_power is such that it leads to a zero value. // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero // so something x 10^-343 goes to zero, but not so with something x 10^-342. static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); // if((exponent < simdjson::internal::smallest_power) || (i == 0)) { // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); return SUCCESS; } else { // (exponent > largest_power) and (i != 0) // We have, for sure, an infinite value and simdjson refuses to parse infinite values. return INVALID_NUMBER(src); } } double d; if (!compute_float_64(exponent, i, negative, d)) { // we are almost never going to get here. if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } } WRITE_DOUBLE(d, src, writer); return SUCCESS; } // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING template simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } #else // parse the number at src // define JSON_TEST_NUMBERS for unit testing // // It is assumed that the number is followed by a structural ({,},],[) character // or a white space character. If that is not the case (e.g., when the JSON // document is made of a single number), then it is necessary to copy the // content and append a space before calling this function. // // Our objective is accurate parsing (ULP of 0) at high speed. template simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // bool negative = (*src == '-'); const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } // // Handle floats if there is a . or e (or both) // int64_t exponent = 0; bool is_float = false; if ('.' == *p) { is_float = true; ++p; SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); digit_count = int(p - start_digits); // used later to guard against overflows } if (('e' == *p) || ('E' == *p)) { is_float = true; ++p; SIMDJSON_TRY( parse_exponent(src, p, exponent) ); } if (is_float) { const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); if (dirty_end) { return INVALID_NUMBER(src); } return SUCCESS; } // The longest negative 64-bit number is 19 digits. // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. size_t longest_digit_count = negative ? 19 : 20; if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } if (digit_count == longest_digit_count) { if (negative) { // Anything negative above INT64_MAX+1 is invalid if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } WRITE_INTEGER(~i+1, src, writer); if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } } // Write unsigned if it doesn't fit in a signed integer. if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { WRITE_INTEGER(negative ? (~i+1) : i, src, writer); } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; } // Inlineable functions namespace { // This table can be used to characterize the final character of an integer // string. For JSON structural character and allowable white space characters, // we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise // we return NUMBER_ERROR. // Optimization note: we could easily reduce the size of the table by half (to 128) // at the cost of an extra branch. // Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); const uint8_t integer_string_finisher[256] = { NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR}; // Parse any number from 0 to 18,446,744,073,709,551,615 simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { const uint8_t *p = src; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > 20)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } if (digit_count == 20) { // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } return i; } // Parse any number from 0 to 18,446,744,073,709,551,615 // Never read at src_end or beyond simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { const uint8_t *p = src; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while ((p != src_end) && parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > 20)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } if (digit_count == 20) { // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } return i; } // Parse any number from 0 to 18,446,744,073,709,551,615 simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { const uint8_t *p = src + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > 20)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if (*p != '"') { return NUMBER_ERROR; } if (digit_count == 20) { // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // // Note: we use src[1] and not src[0] because src[0] is the quote character in this // instance. if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } return i; } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. size_t longest_digit_count = 19; // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > longest_digit_count)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } return negative ? (~i+1) : i; } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // Never read at src_end or beyond simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { // // Check for minus sign // if(src == src_end) { return NUMBER_ERROR; } bool negative = (*src == '-'); const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while ((p != src_end) && parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. size_t longest_digit_count = 19; // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > longest_digit_count)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } return negative ? (~i+1) : i; } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); src += uint8_t(negative) + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = src; uint64_t i = 0; while (parse_digit(*src, i)) { src++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(src - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. size_t longest_digit_count = 19; // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > longest_digit_count)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*src)) { // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if(*src != '"') { return NUMBER_ERROR; } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } return negative ? (~i+1) : i; } simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); src += uint8_t(negative); // // Parse the integer part. // uint64_t i = 0; const uint8_t *p = src; p += parse_digit(*p, i); bool leading_zero = (i == 0); while (parse_digit(*p, i)) { p++; } // no integer digits, or 0123 (zero must be solo) if ( p == src ) { return INCORRECT_TYPE; } if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } // // Parse the decimal part. // int64_t exponent = 0; bool overflow; if (simdjson_likely(*p == '.')) { p++; const uint8_t *start_decimal_digits = p; if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits p++; while (parse_digit(*p, i)) { p++; } exponent = -(p - start_decimal_digits); // Overflow check. More than 19 digits (minus the decimal) may be overflow. overflow = p-src-1 > 19; if (simdjson_unlikely(overflow && leading_zero)) { // Skip leading 0.00000 and see if it still overflows const uint8_t *start_digits = src + 2; while (*start_digits == '0') { start_digits++; } overflow = p-start_digits > 19; } } else { overflow = p-src > 19; } // // Parse the exponent // if (*p == 'e' || *p == 'E') { p++; bool exp_neg = *p == '-'; p += exp_neg || *p == '+'; uint64_t exp = 0; const uint8_t *start_exp_digits = p; while (parse_digit(*p, exp)) { p++; } // no exp digits, or 20+ exp digits if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } exponent += exp_neg ? 0-exp : exp; } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; // // Assemble (or slow-parse) the float // double d; if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return (*src == '-'); } simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { bool negative = (*src == '-'); src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } return false; } simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { bool negative = (*src == '-'); src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } if (jsoncharutils::is_structural_or_whitespace(*p)) { // We have an integer. // If the number is negative and valid, it must be a signed integer. if(negative) { return number_type::signed_integer; } // We want values larger or equal to 9223372036854775808 to be unsigned // integers, and the other values to be signed integers. int digit_count = int(p - src); if(digit_count >= 19) { const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { return number_type::unsigned_integer; } } return number_type::signed_integer; } // Hopefully, we have 'e' or 'E' or '.'. return number_type::floating_point_number; } // Never read at src_end or beyond simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { if(src == src_end) { return NUMBER_ERROR; } // // Check for minus sign // bool negative = (*src == '-'); src += uint8_t(negative); // // Parse the integer part. // uint64_t i = 0; const uint8_t *p = src; if(p == src_end) { return NUMBER_ERROR; } p += parse_digit(*p, i); bool leading_zero = (i == 0); while ((p != src_end) && parse_digit(*p, i)) { p++; } // no integer digits, or 0123 (zero must be solo) if ( p == src ) { return INCORRECT_TYPE; } if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } // // Parse the decimal part. // int64_t exponent = 0; bool overflow; if (simdjson_likely((p != src_end) && (*p == '.'))) { p++; const uint8_t *start_decimal_digits = p; if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits p++; while ((p != src_end) && parse_digit(*p, i)) { p++; } exponent = -(p - start_decimal_digits); // Overflow check. More than 19 digits (minus the decimal) may be overflow. overflow = p-src-1 > 19; if (simdjson_unlikely(overflow && leading_zero)) { // Skip leading 0.00000 and see if it still overflows const uint8_t *start_digits = src + 2; while (*start_digits == '0') { start_digits++; } overflow = start_digits-src > 19; } } else { overflow = p-src > 19; } // // Parse the exponent // if ((p != src_end) && (*p == 'e' || *p == 'E')) { p++; if(p == src_end) { return NUMBER_ERROR; } bool exp_neg = *p == '-'; p += exp_neg || *p == '+'; uint64_t exp = 0; const uint8_t *start_exp_digits = p; while ((p != src_end) && parse_digit(*p, exp)) { p++; } // no exp digits, or 20+ exp digits if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } exponent += exp_neg ? 0-exp : exp; } if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; // // Assemble (or slow-parse) the float // double d; if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { return NUMBER_ERROR; } return d; } simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); src += uint8_t(negative) + 1; // // Parse the integer part. // uint64_t i = 0; const uint8_t *p = src; p += parse_digit(*p, i); bool leading_zero = (i == 0); while (parse_digit(*p, i)) { p++; } // no integer digits, or 0123 (zero must be solo) if ( p == src ) { return INCORRECT_TYPE; } if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } // // Parse the decimal part. // int64_t exponent = 0; bool overflow; if (simdjson_likely(*p == '.')) { p++; const uint8_t *start_decimal_digits = p; if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits p++; while (parse_digit(*p, i)) { p++; } exponent = -(p - start_decimal_digits); // Overflow check. More than 19 digits (minus the decimal) may be overflow. overflow = p-src-1 > 19; if (simdjson_unlikely(overflow && leading_zero)) { // Skip leading 0.00000 and see if it still overflows const uint8_t *start_digits = src + 2; while (*start_digits == '0') { start_digits++; } overflow = p-start_digits > 19; } } else { overflow = p-src > 19; } // // Parse the exponent // if (*p == 'e' || *p == 'E') { p++; bool exp_neg = *p == '-'; p += exp_neg || *p == '+'; uint64_t exp = 0; const uint8_t *start_exp_digits = p; while (parse_digit(*p, exp)) { p++; } // no exp digits, or 20+ exp digits if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } exponent += exp_neg ? 0-exp : exp; } if (*p != '"') { return NUMBER_ERROR; } overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; // // Assemble (or slow-parse) the float // double d; if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } } // unnamed namespace #endif // SIMDJSON_SKIPNUMBERPARSING } // namespace numberparsing inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { switch (type) { case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; case number_type::floating_point_number: out << "floating-point number (binary64)"; break; default: SIMDJSON_UNREACHABLE(); } return out; } } // namespace icelake } // namespace simdjson #endif // SIMDJSON_GENERIC_NUMBERPARSING_H /* end file simdjson/generic/numberparsing.h for icelake */ /* including simdjson/generic/implementation_simdjson_result_base-inl.h for icelake: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for icelake */ #ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { // // internal::implementation_simdjson_result_base inline implementation // template simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { error = this->second; if (!error) { value = std::forward>(*this).first; } } template simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { error_code error; std::forward>(*this).tie(value, error); return error; } template simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } #if SIMDJSON_EXCEPTIONS template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return this->first; } template simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { return std::forward>(*this).take_value(); } template simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { if (error()) { throw simdjson_error(error()); } return std::forward(this->first); } template simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { return std::forward>(*this).take_value(); } #endif // SIMDJSON_EXCEPTIONS template simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { return this->first; } template simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { return this->first; } template simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { return std::forward(this->first); } template simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept : first{std::forward(value)}, second{error} {} template simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept : implementation_simdjson_result_base(T{}, error) {} template simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} } // namespace icelake } // namespace simdjson #endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H /* end file simdjson/generic/implementation_simdjson_result_base-inl.h for icelake */ /* end file simdjson/generic/amalgamated.h for icelake */ /* including simdjson/icelake/end.h: #include "simdjson/icelake/end.h" */ /* begin file simdjson/icelake/end.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE SIMDJSON_UNTARGET_REGION #endif /* undefining SIMDJSON_IMPLEMENTATION from "icelake" */ #undef SIMDJSON_IMPLEMENTATION /* end file simdjson/icelake/end.h */ #endif // SIMDJSON_ICELAKE_H /* end file simdjson/icelake.h */ #elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(ppc64) /* including simdjson/ppc64.h: #include "simdjson/ppc64.h" */ /* begin file simdjson/ppc64.h */ #ifndef SIMDJSON_PPC64_H #define SIMDJSON_PPC64_H /* including simdjson/ppc64/begin.h: #include "simdjson/ppc64/begin.h" */ /* begin file simdjson/ppc64/begin.h */ /* defining SIMDJSON_IMPLEMENTATION to "ppc64" */ #define SIMDJSON_IMPLEMENTATION ppc64 /* including simdjson/ppc64/base.h: #include "simdjson/ppc64/base.h" */ /* begin file simdjson/ppc64/base.h */ #ifndef SIMDJSON_PPC64_BASE_H #define SIMDJSON_PPC64_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { /** * Implementation for ALTIVEC (PPC64). */ namespace ppc64 { class implementation; namespace { namespace simd { template struct simd8; template struct simd8x64; } // namespace simd } // unnamed namespace } // namespace ppc64 } // namespace simdjson #endif // SIMDJSON_PPC64_BASE_H /* end file simdjson/ppc64/base.h */ /* including simdjson/ppc64/intrinsics.h: #include "simdjson/ppc64/intrinsics.h" */ /* begin file simdjson/ppc64/intrinsics.h */ #ifndef SIMDJSON_PPC64_INTRINSICS_H #define SIMDJSON_PPC64_INTRINSICS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ // This should be the correct header whether // you use visual studio or other compilers. #include // These are defined by altivec.h in GCC toolchain, it is safe to undef them. #ifdef bool #undef bool #endif #ifdef vector #undef vector #endif static_assert(sizeof(__vector unsigned char) <= simdjson::SIMDJSON_PADDING, "insufficient padding for ppc64"); #endif // SIMDJSON_PPC64_INTRINSICS_H /* end file simdjson/ppc64/intrinsics.h */ /* including simdjson/ppc64/bitmanipulation.h: #include "simdjson/ppc64/bitmanipulation.h" */ /* begin file simdjson/ppc64/bitmanipulation.h */ #ifndef SIMDJSON_PPC64_BITMANIPULATION_H #define SIMDJSON_PPC64_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace { // We sometimes call trailing_zero on inputs that are zero, // but the algorithms do not end up using the returned value. // Sadly, sanitizers are not smart enough to figure it out. SIMDJSON_NO_SANITIZE_UNDEFINED // This function can be used safely even if not all bytes have been // initialized. // See issue https://github.com/simdjson/simdjson/issues/1965 SIMDJSON_NO_SANITIZE_MEMORY simdjson_inline int trailing_zeroes(uint64_t input_num) { #if SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long ret; // Search the mask data from least significant bit (LSB) // to the most significant bit (MSB) for a set bit (1). _BitScanForward64(&ret, input_num); return (int)ret; #else // SIMDJSON_REGULAR_VISUAL_STUDIO return __builtin_ctzll(input_num); #endif // SIMDJSON_REGULAR_VISUAL_STUDIO } /* result might be undefined when input_num is zero */ simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return input_num & (input_num - 1); } /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { #if SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long leading_zero = 0; // Search the mask data from most significant bit (MSB) // to least significant bit (LSB) for a set bit (1). if (_BitScanReverse64(&leading_zero, input_num)) return (int)(63 - leading_zero); else return 64; #else return __builtin_clzll(input_num); #endif // SIMDJSON_REGULAR_VISUAL_STUDIO } #if SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_inline int count_ones(uint64_t input_num) { // note: we do not support legacy 32-bit Windows in this kernel return __popcnt64(input_num); // Visual Studio wants two underscores } #else simdjson_inline int count_ones(uint64_t input_num) { return __builtin_popcountll(input_num); } #endif simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { #if SIMDJSON_REGULAR_VISUAL_STUDIO *result = value1 + value2; return *result < value1; #else return __builtin_uaddll_overflow(value1, value2, reinterpret_cast(result)); #endif } } // unnamed namespace } // namespace ppc64 } // namespace simdjson #endif // SIMDJSON_PPC64_BITMANIPULATION_H /* end file simdjson/ppc64/bitmanipulation.h */ /* including simdjson/ppc64/bitmask.h: #include "simdjson/ppc64/bitmask.h" */ /* begin file simdjson/ppc64/bitmask.h */ #ifndef SIMDJSON_PPC64_BITMASK_H #define SIMDJSON_PPC64_BITMASK_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace { // // Perform a "cumulative bitwise xor," flipping bits each time a 1 is // encountered. // // For example, prefix_xor(00100100) == 00011100 // simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { // You can use the version below, however gcc sometimes miscompiles // vec_pmsum_be, it happens somewhere around between 8 and 9th version. // The performance boost was not noticeable, falling back to a usual // implementation. // __vector unsigned long long all_ones = {~0ull, ~0ull}; // __vector unsigned long long mask = {bitmask, 0}; // // Clang and GCC return different values for pmsum for ull so cast it to one. // // Generally it is not specified by ALTIVEC ISA what is returned by // // vec_pmsum_be. // #if defined(__LITTLE_ENDIAN__) // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]); // #else // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]); // #endif bitmask ^= bitmask << 1; bitmask ^= bitmask << 2; bitmask ^= bitmask << 4; bitmask ^= bitmask << 8; bitmask ^= bitmask << 16; bitmask ^= bitmask << 32; return bitmask; } } // unnamed namespace } // namespace ppc64 } // namespace simdjson #endif /* end file simdjson/ppc64/bitmask.h */ /* including simdjson/ppc64/numberparsing_defs.h: #include "simdjson/ppc64/numberparsing_defs.h" */ /* begin file simdjson/ppc64/numberparsing_defs.h */ #ifndef SIMDJSON_PPC64_NUMBERPARSING_DEFS_H #define SIMDJSON_PPC64_NUMBERPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/intrinsics.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #if defined(__linux__) #include #elif defined(__FreeBSD__) #include #endif namespace simdjson { namespace ppc64 { namespace numberparsing { // we don't have appropriate instructions, so let us use a scalar function // credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ /** @private */ static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { uint64_t val; std::memcpy(&val, chars, sizeof(uint64_t)); #ifdef __BIG_ENDIAN__ #if defined(__linux__) val = bswap_64(val); #elif defined(__FreeBSD__) val = bswap64(val); #endif #endif val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); } /** @private */ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { internal::value128 answer; #if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS #ifdef _M_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); answer.low = value1 * value2; #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 #endif // _M_ARM64 #else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); #endif return answer; } } // namespace numberparsing } // namespace ppc64 } // namespace simdjson #define SIMDJSON_SWAR_NUMBER_PARSING 1 #endif // SIMDJSON_PPC64_NUMBERPARSING_DEFS_H /* end file simdjson/ppc64/numberparsing_defs.h */ /* including simdjson/ppc64/simd.h: #include "simdjson/ppc64/simd.h" */ /* begin file simdjson/ppc64/simd.h */ #ifndef SIMDJSON_PPC64_SIMD_H #define SIMDJSON_PPC64_SIMD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include namespace simdjson { namespace ppc64 { namespace { namespace simd { using __m128i = __vector unsigned char; template struct base { __m128i value; // Zero constructor simdjson_inline base() : value{__m128i()} {} // Conversion from SIMD register simdjson_inline base(const __m128i _value) : value(_value) {} // Conversion to SIMD register simdjson_inline operator const __m128i &() const { return this->value; } simdjson_inline operator __m128i &() { return this->value; } // Bit operations simdjson_inline Child operator|(const Child other) const { return vec_or(this->value, (__m128i)other); } simdjson_inline Child operator&(const Child other) const { return vec_and(this->value, (__m128i)other); } simdjson_inline Child operator^(const Child other) const { return vec_xor(this->value, (__m128i)other); } simdjson_inline Child bit_andnot(const Child other) const { return vec_andc(this->value, (__m128i)other); } simdjson_inline Child &operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } simdjson_inline Child &operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } simdjson_inline Child &operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } }; template > struct base8 : base> { typedef uint16_t bitmask_t; typedef uint32_t bitmask2_t; simdjson_inline base8() : base>() {} simdjson_inline base8(const __m128i _value) : base>(_value) {} friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs); } static const int SIZE = sizeof(base>::value); template simdjson_inline simd8 prev(simd8 prev_chunk) const { __m128i chunk = this->value; #ifdef __LITTLE_ENDIAN__ chunk = (__m128i)vec_reve(this->value); prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk); #endif chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N); #ifdef __LITTLE_ENDIAN__ chunk = (__m128i)vec_reve((__m128i)chunk); #endif return chunk; } }; // SIMD byte mask type (returned by things like eq and gt) template <> struct simd8 : base8 { static simdjson_inline simd8 splat(bool _value) { return (__m128i)vec_splats((unsigned char)(-(!!_value))); } simdjson_inline simd8() : base8() {} simdjson_inline simd8(const __m128i _value) : base8(_value) {} // Splat constructor simdjson_inline simd8(bool _value) : base8(splat(_value)) {} simdjson_inline int to_bitmask() const { __vector unsigned long long result; const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value, (__m128i)perm_mask)); #ifdef __LITTLE_ENDIAN__ return static_cast(result[1]); #else return static_cast(result[0]); #endif } simdjson_inline bool any() const { return !vec_all_eq(this->value, (__m128i)vec_splats(0)); } simdjson_inline simd8 operator~() const { return this->value ^ (__m128i)splat(true); } }; template struct base8_numeric : base8 { static simdjson_inline simd8 splat(T value) { (void)value; return (__m128i)vec_splats(value); } static simdjson_inline simd8 zero() { return splat(0); } static simdjson_inline simd8 load(const T values[16]) { return (__m128i)(vec_vsx_ld(0, reinterpret_cast(values))); } // Repeat 16 values as many times as necessary (usually for lookup tables) static simdjson_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15) { return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15); } simdjson_inline base8_numeric() : base8() {} simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} // Store to array simdjson_inline void store(T dst[16]) const { vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst)); } // Override to distinguish from bool version simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } // Addition/subtraction are the same for signed and unsigned simdjson_inline simd8 operator+(const simd8 other) const { return (__m128i)((__m128i)this->value + (__m128i)other); } simdjson_inline simd8 operator-(const simd8 other) const { return (__m128i)((__m128i)this->value - (__m128i)other); } simdjson_inline simd8 &operator+=(const simd8 other) { *this = *this + other; return *static_cast *>(this); } simdjson_inline simd8 &operator-=(const simd8 other) { *this = *this - other; return *static_cast *>(this); } // Perform a lookup assuming the value is between 0 and 16 (undefined behavior // for out of range values) template simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value); } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted // as a bitset). Passing a 0 value for mask would be equivalent to writing out // every byte to output. Only the first 16 - count_ones(mask) bytes of the // result are significant but 16 bytes get written. Design consideration: it // seems like a function with the signature simd8 compress(uint32_t mask) // would be sensible, but the AVX ISA makes this kind of approach difficult. template simdjson_inline void compress(uint16_t mask, L *output) const { using internal::BitsSetTable256mul2; using internal::pshufb_combine_table; using internal::thintable_epi8; // this particular implementation was inspired by work done by @animetosho // we do it in two steps, first 8 bytes and then second 8 bytes uint8_t mask1 = uint8_t(mask); // least significant 8 bits uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits // next line just loads the 64-bit values thintable_epi8[mask1] and // thintable_epi8[mask2] into a 128-bit register, using only // two instructions on most compilers. #ifdef __LITTLE_ENDIAN__ __m128i shufmask = (__m128i)(__vector unsigned long long){ thintable_epi8[mask1], thintable_epi8[mask2]}; #else __m128i shufmask = (__m128i)(__vector unsigned long long){ thintable_epi8[mask2], thintable_epi8[mask1]}; shufmask = (__m128i)vec_reve((__m128i)shufmask); #endif // we increment by 0x08 the second half of the mask shufmask = ((__m128i)shufmask) + ((__m128i)(__vector int){0, 0, 0x08080808, 0x08080808}); // this is the version "nearly pruned" __m128i pruned = vec_perm(this->value, this->value, shufmask); // we still need to put the two halves together. // we compute the popcount of the first half: int pop1 = BitsSetTable256mul2[mask1]; // then load the corresponding mask, what it does is to write // only the first pop1 bytes from the first 8 bytes, and then // it fills in with the bytes from the second 8 bytes + some filling // at the end. __m128i compactmask = vec_vsx_ld(0, reinterpret_cast(pshufb_combine_table + pop1 * 8)); __m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask); vec_vsx_st(answer, 0, reinterpret_cast<__m128i *>(output)); } template simdjson_inline simd8 lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, L replace12, L replace13, L replace14, L replace15) const { return lookup_16(simd8::repeat_16( replace0, replace1, replace2, replace3, replace4, replace5, replace6, replace7, replace8, replace9, replace10, replace11, replace12, replace13, replace14, replace15)); } }; // Signed bytes template <> struct simd8 : base8_numeric { simdjson_inline simd8() : base8_numeric() {} simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor simdjson_inline simd8(const int8_t *values) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15) : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15) { return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15); } // Order-sensitive comparisons simdjson_inline simd8 max_val(const simd8 other) const { return (__m128i)vec_max((__vector signed char)this->value, (__vector signed char)(__m128i)other); } simdjson_inline simd8 min_val(const simd8 other) const { return (__m128i)vec_min((__vector signed char)this->value, (__vector signed char)(__m128i)other); } simdjson_inline simd8 operator>(const simd8 other) const { return (__m128i)vec_cmpgt((__vector signed char)this->value, (__vector signed char)(__m128i)other); } simdjson_inline simd8 operator<(const simd8 other) const { return (__m128i)vec_cmplt((__vector signed char)this->value, (__vector signed char)(__m128i)other); } }; // Unsigned bytes template <> struct simd8 : base8_numeric { simdjson_inline simd8() : base8_numeric() {} simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Array constructor simdjson_inline simd8(const uint8_t *values) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) { return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15); } // Saturated math simdjson_inline simd8 saturating_add(const simd8 other) const { return (__m128i)vec_adds(this->value, (__m128i)other); } simdjson_inline simd8 saturating_sub(const simd8 other) const { return (__m128i)vec_subs(this->value, (__m128i)other); } // Order-specific operations simdjson_inline simd8 max_val(const simd8 other) const { return (__m128i)vec_max(this->value, (__m128i)other); } simdjson_inline simd8 min_val(const simd8 other) const { return (__m128i)vec_min(this->value, (__m128i)other); } // Same as >, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } // Same as <, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } // Bit-specific operations simdjson_inline simd8 bits_not_set() const { return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0))); } simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } simdjson_inline bool bits_not_set_anywhere() const { return vec_all_eq(this->value, (__m128i)vec_splats(0)); } simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return vec_all_eq(vec_and(this->value, (__m128i)bits), (__m128i)vec_splats(0)); } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } template simdjson_inline simd8 shr() const { return simd8( (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N))); } template simdjson_inline simd8 shl() const { return simd8( (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N))); } }; template struct simd8x64 { static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); static_assert(NUM_CHUNKS == 4, "PPC64 kernel should use four registers per 64-byte block."); const simd8 chunks[NUM_CHUNKS]; simd8x64(const simd8x64 &o) = delete; // no copy allowed simd8x64 & operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr + 16), simd8::load(ptr + 32), simd8::load(ptr + 48)} {} simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr + sizeof(simd8) * 0); this->chunks[1].store(ptr + sizeof(simd8) * 1); this->chunks[2].store(ptr + sizeof(simd8) * 2); this->chunks[3].store(ptr + sizeof(simd8) * 3); } simdjson_inline simd8 reduce_or() const { return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); } simdjson_inline uint64_t compress(uint64_t mask, T *output) const { this->chunks[0].compress(uint16_t(mask), output); this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); return 64 - count_ones(mask); } simdjson_inline uint64_t to_bitmask() const { uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); uint64_t r1 = this->chunks[1].to_bitmask(); uint64_t r2 = this->chunks[2].to_bitmask(); uint64_t r3 = this->chunks[3].to_bitmask(); return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); } simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, this->chunks[2] == mask, this->chunks[3] == mask) .to_bitmask(); } simdjson_inline uint64_t eq(const simd8x64 &other) const { return simd8x64(this->chunks[0] == other.chunks[0], this->chunks[1] == other.chunks[1], this->chunks[2] == other.chunks[2], this->chunks[3] == other.chunks[3]) .to_bitmask(); } simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, this->chunks[2] <= mask, this->chunks[3] <= mask) .to_bitmask(); } }; // struct simd8x64 } // namespace simd } // unnamed namespace } // namespace ppc64 } // namespace simdjson #endif // SIMDJSON_PPC64_SIMD_INPUT_H /* end file simdjson/ppc64/simd.h */ /* including simdjson/ppc64/stringparsing_defs.h: #include "simdjson/ppc64/stringparsing_defs.h" */ /* begin file simdjson/ppc64/stringparsing_defs.h */ #ifndef SIMDJSON_PPC64_STRINGPARSING_DEFS_H #define SIMDJSON_PPC64_STRINGPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/simd.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace { using namespace simd; // Holds backslashes and quotes locations. struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return bs_bits != 0; } simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } uint32_t bs_bits; uint32_t quote_bits; }; // struct backslash_and_quote simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than " "SIMDJSON_PADDING bytes"); simd8 v0(src); simd8 v1(src + sizeof(v0)); v0.store(dst); v1.store(dst + sizeof(v0)); // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on // PPC; therefore, we smash them together into a 64-byte mask and get the // bitmask from there. uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); return { uint32_t(bs_and_quote), // bs_bits uint32_t(bs_and_quote >> 32) // quote_bits }; } } // unnamed namespace } // namespace ppc64 } // namespace simdjson #endif // SIMDJSON_PPC64_STRINGPARSING_DEFS_H /* end file simdjson/ppc64/stringparsing_defs.h */ #define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 /* end file simdjson/ppc64/begin.h */ /* including simdjson/generic/amalgamated.h for ppc64: #include "simdjson/generic/amalgamated.h" */ /* begin file simdjson/generic/amalgamated.h for ppc64 */ #if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) #error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! #endif /* including simdjson/generic/base.h for ppc64: #include "simdjson/generic/base.h" */ /* begin file simdjson/generic/base.h for ppc64 */ #ifndef SIMDJSON_GENERIC_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ /* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ /* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ /* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ /* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ /* amalgamation skipped (editor-only): #else */ /* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ /* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { struct open_container; class dom_parser_implementation; /** * The type of a JSON number */ enum class number_type { floating_point_number=1, /// a binary64 number signed_integer, /// a signed integer that fits in a 64-bit word using two's complement unsigned_integer /// a positive integer larger or equal to 1<<63 }; } // namespace ppc64 } // namespace simdjson #endif // SIMDJSON_GENERIC_BASE_H /* end file simdjson/generic/base.h for ppc64 */ /* including simdjson/generic/jsoncharutils.h for ppc64: #include "simdjson/generic/jsoncharutils.h" */ /* begin file simdjson/generic/jsoncharutils.h for ppc64 */ #ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace { namespace jsoncharutils { // return non-zero if not a structural or whitespace char // zero otherwise simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace_negated[c]; } simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace[c]; } // returns a value with the high 16 bits set if not valid // otherwise returns the conversion of the 4 hex digits at src into the bottom // 16 bits of the 32-bit return register // // see // https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ static inline uint32_t hex_to_u32_nocheck( const uint8_t *src) { // strictly speaking, static inline is a C-ism uint32_t v1 = internal::digit_to_val32[630 + src[0]]; uint32_t v2 = internal::digit_to_val32[420 + src[1]]; uint32_t v3 = internal::digit_to_val32[210 + src[2]]; uint32_t v4 = internal::digit_to_val32[0 + src[3]]; return v1 | v2 | v3 | v4; } // given a code point cp, writes to c // the utf-8 code, outputting the length in // bytes, if the length is zero, the code point // is invalid // // This can possibly be made faster using pdep // and clz and table lookups, but JSON documents // have few escaped code points, and the following // function looks cheap. // // Note: we assume that surrogates are treated separately // simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { if (cp <= 0x7F) { c[0] = uint8_t(cp); return 1; // ascii } if (cp <= 0x7FF) { c[0] = uint8_t((cp >> 6) + 192); c[1] = uint8_t((cp & 63) + 128); return 2; // universal plane // Surrogates are treated elsewhere... //} //else if (0xd800 <= cp && cp <= 0xdfff) { // return 0; // surrogates // could put assert here } else if (cp <= 0xFFFF) { c[0] = uint8_t((cp >> 12) + 224); c[1] = uint8_t(((cp >> 6) & 63) + 128); c[2] = uint8_t((cp & 63) + 128); return 3; } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this // is not needed c[0] = uint8_t((cp >> 18) + 240); c[1] = uint8_t(((cp >> 12) & 63) + 128); c[2] = uint8_t(((cp >> 6) & 63) + 128); c[3] = uint8_t((cp & 63) + 128); return 4; } // will return 0 when the code point was too large. return 0; // bad r } #if SIMDJSON_IS_32BITS // _umul128 for x86, arm // this is a slow emulation routine for 32-bit // static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; } static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); uint64_t adbc_carry = !!(adbc < ad); uint64_t lo = bd + (adbc << 32); *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + (adbc_carry << 32) + !!(lo < bd); return lo; } #endif } // namespace jsoncharutils } // unnamed namespace } // namespace ppc64 } // namespace simdjson #endif // SIMDJSON_GENERIC_JSONCHARUTILS_H /* end file simdjson/generic/jsoncharutils.h for ppc64 */ /* including simdjson/generic/atomparsing.h for ppc64: #include "simdjson/generic/atomparsing.h" */ /* begin file simdjson/generic/atomparsing.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ATOMPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include namespace simdjson { namespace ppc64 { namespace { /// @private namespace atomparsing { // The string_to_uint32 is exclusively used to map literal strings to 32-bit values. // We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot // be certain that the character pointer will be properly aligned. // You might think that using memcpy makes this function expensive, but you'd be wrong. // All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); // to the compile-time constant 1936482662. simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } // Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. // Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. simdjson_warn_unused simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); std::memcpy(&srcval, src, sizeof(uint32_t)); return srcval ^ string_to_uint32(atom); } simdjson_warn_unused simdjson_inline bool is_valid_true_atom(const uint8_t *src) { return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_true_atom(src); } else if (len == 4) { return !str4ncmp(src, "true"); } else { return false; } } simdjson_warn_unused simdjson_inline bool is_valid_false_atom(const uint8_t *src) { return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; } simdjson_warn_unused simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { if (len > 5) { return is_valid_false_atom(src); } else if (len == 5) { return !str4ncmp(src+1, "alse"); } else { return false; } } simdjson_warn_unused simdjson_inline bool is_valid_null_atom(const uint8_t *src) { return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_null_atom(src); } else if (len == 4) { return !str4ncmp(src, "null"); } else { return false; } } } // namespace atomparsing } // unnamed namespace } // namespace ppc64 } // namespace simdjson #endif // SIMDJSON_GENERIC_ATOMPARSING_H /* end file simdjson/generic/atomparsing.h for ppc64 */ /* including simdjson/generic/dom_parser_implementation.h for ppc64: #include "simdjson/generic/dom_parser_implementation.h" */ /* begin file simdjson/generic/dom_parser_implementation.h for ppc64 */ #ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { // expectation: sizeof(open_container) = 64/8. struct open_container { uint32_t tape_index; // where, on the tape, does the scope ([,{) begins uint32_t count; // how many elements in the scope }; // struct open_container static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); class dom_parser_implementation final : public internal::dom_parser_implementation { public: /** Tape location of each open { or [ */ std::unique_ptr open_containers{}; /** Whether each open container is a [ or { */ std::unique_ptr is_array{}; /** Buffer passed to stage 1 */ const uint8_t *buf{}; /** Length passed to stage 1 */ size_t len{0}; /** Document passed to stage 2 */ dom::document *doc{}; inline dom_parser_implementation() noexcept; inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; dom_parser_implementation(const dom_parser_implementation &) = delete; dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; private: simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); }; } // namespace ppc64 } // namespace simdjson namespace simdjson { namespace ppc64 { inline dom_parser_implementation::dom_parser_implementation() noexcept = default; inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; // Leaving these here so they can be inlined if so desired inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } // Stage 1 index output size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); if (!structural_indexes) { _capacity = 0; return MEMALLOC; } structural_indexes[0] = 0; n_structural_indexes = 0; _capacity = capacity; return SUCCESS; } inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { // Stage 2 stacks open_containers.reset(new (std::nothrow) open_container[max_depth]); is_array.reset(new (std::nothrow) bool[max_depth]); if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } _max_depth = max_depth; return SUCCESS; } } // namespace ppc64 } // namespace simdjson #endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H /* end file simdjson/generic/dom_parser_implementation.h for ppc64 */ /* including simdjson/generic/implementation_simdjson_result_base.h for ppc64: #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* begin file simdjson/generic/implementation_simdjson_result_base.h for ppc64 */ #ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { // This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair // so we can avoid inlining errors // TODO reconcile these! /** * The result of a simdjson operation that could fail. * * Gives the option of reading error codes, or throwing an exception by casting to the desired result. * * This is a base class for implementations that want to add functions to the result type for * chaining. * * Override like: * * struct simdjson_result : public internal::implementation_simdjson_result_base { * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} * // Your extra methods here * } * * Then any method returning simdjson_result will be chainable with your methods. */ template struct implementation_simdjson_result_base { /** * Create a new empty result with error = UNINITIALIZED. */ simdjson_inline implementation_simdjson_result_base() noexcept = default; /** * Create a new error result. */ simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; /** * Create a new successful result. */ simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; /** * Create a new result with both things (use if you don't want to branch when creating the result). */ simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; /** * Move the value and the error to the provided variables. * * @param value The variable to assign the value to. May not be set if there is an error. * @param error The variable to assign the error to. Set to SUCCESS if there is no error. */ simdjson_inline void tie(T &value, error_code &error) && noexcept; /** * Move the value to the provided variable. * * @param value The variable to assign the value to. May not be set if there is an error. */ simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ simdjson_inline error_code error() const noexcept; #if SIMDJSON_EXCEPTIONS /** * Get the result value. * * @throw simdjson_error if there was an error. */ simdjson_inline T& value() & noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ simdjson_inline T&& value() && noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ simdjson_inline T&& take_value() && noexcept(false); /** * Cast to the value (will throw on error). * * @throw simdjson_error if there was an error. */ simdjson_inline operator T&&() && noexcept(false); #endif // SIMDJSON_EXCEPTIONS /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. */ simdjson_inline const T& value_unsafe() const& noexcept; /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. */ simdjson_inline T& value_unsafe() & noexcept; /** * Take the result value (move it). This function is safe if and only * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ }; // struct implementation_simdjson_result_base } // namespace ppc64 } // namespace simdjson #endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H /* end file simdjson/generic/implementation_simdjson_result_base.h for ppc64 */ /* including simdjson/generic/numberparsing.h for ppc64: #include "simdjson/generic/numberparsing.h" */ /* begin file simdjson/generic/numberparsing.h for ppc64 */ #ifndef SIMDJSON_GENERIC_NUMBERPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #include #include namespace simdjson { namespace ppc64 { namespace numberparsing { #ifdef JSON_TEST_NUMBERS #define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) #define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) #define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) #define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) #else #define INVALID_NUMBER(SRC) (NUMBER_ERROR) #define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) #define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) #define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) #endif namespace { // Convert a mantissa, an exponent and a sign bit into an ieee64 double. // The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). // The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { double d; mantissa &= ~(1ULL << 52); mantissa |= real_exponent << 52; mantissa |= ((static_cast(negative)) << 63); std::memcpy(&d, &mantissa, sizeof(d)); return d; } // Attempts to compute i * 10^(power) exactly; and if "negative" is // true, negate the result. // This function will only work in some cases, when it does not work, success is // set to false. This should work *most of the time* (like 99% of the time). // We assume that power is in the [smallest_power, // largest_power] interval: the caller is responsible for this check. simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { // we start with a fast path // It was described in // Clinger WD. How to read floating point numbers accurately. // ACM SIGPLAN Notices. 1990 #ifndef FLT_EVAL_METHOD #error "FLT_EVAL_METHOD should be defined, please include cfloat." #endif #if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) // We cannot be certain that x/y is rounded to nearest. if (0 <= power && power <= 22 && i <= 9007199254740991) #else if (-22 <= power && power <= 22 && i <= 9007199254740991) #endif { // convert the integer into a double. This is lossless since // 0 <= i <= 2^53 - 1. d = double(i); // // The general idea is as follows. // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then // 1) Both s and p can be represented exactly as 64-bit floating-point // values // (binary64). // 2) Because s and p can be represented exactly as floating-point values, // then s * p // and s / p will produce correctly rounded values. // if (power < 0) { d = d / simdjson::internal::power_of_ten[-power]; } else { d = d * simdjson::internal::power_of_ten[power]; } if (negative) { d = -d; } return true; } // When 22 < power && power < 22 + 16, we could // hope for another, secondary fast path. It was // described by David M. Gay in "Correctly rounded // binary-decimal and decimal-binary conversions." (1990) // If you need to compute i * 10^(22 + x) for x < 16, // first compute i * 10^x, if you know that result is exact // (e.g., when i * 10^x < 2^53), // then you can still proceed and do (i * 10^x) * 10^22. // Is this worth your time? // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) // for this second fast path to work. // If you you have 22 < power *and* power < 22 + 16, and then you // optimistically compute "i * 10^(x-22)", there is still a chance that you // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of // this optimization maybe less common than we would like. Source: // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html // The fast path has now failed, so we are failing back on the slower path. // In the slow path, we need to adjust i so that it is > 1<<63 which is always // possible, except if i == 0, so we handle i == 0 separately. if(i == 0) { d = negative ? -0.0 : 0.0; return true; } // The exponent is 1024 + 63 + power // + floor(log(5**power)/log(2)). // The 1024 comes from the ieee64 standard. // The 63 comes from the fact that we use a 64-bit word. // // Computing floor(log(5**power)/log(2)) could be // slow. Instead we use a fast function. // // For power in (-400,350), we have that // (((152170 + 65536) * power ) >> 16); // is equal to // floor(log(5**power)/log(2)) + power when power >= 0 // and it is equal to // ceil(log(5**-power)/log(2)) + power when power < 0 // // The 65536 is (1<<16) and corresponds to // (65536 * power) >> 16 ---> power // // ((152170 * power ) >> 16) is equal to // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is // approximatively equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; // We want the most significant bit of i to be 1. Shift if needed. int lz = leading_zeroes(i); i <<= lz; // We are going to need to do some 64-bit arithmetic to get a precise product. // We use a table lookup approach. // It is safe because // power >= smallest_power // and power <= largest_power // We recover the mantissa of the power, it has a leading 1. It is always // rounded down. // // We want the most significant 64 bits of the product. We know // this will be non-zero because the most significant bit of i is // 1. const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) // // The full_multiplication function computes the 128-bit product of two 64-bit words // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use // we make of the product. It also makes it easy to reason about the product: there // is 0 or 1 leading zero in the product. // Unless the least significant 9 bits of the high (64-bit) part of the full // product are all 1s, then we know that the most significant 55 bits are // exact and no further work is needed. Having 55 bits is necessary because // we need 53 bits for the mantissa but we have to have one rounding bit and // we can waste a bit if the most significant bit of the product is zero. if((firstproduct.high & 0x1FF) == 0x1FF) { // We want to compute i * 5^q, but only care about the top 55 bits at most. // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing // the full computation is wasteful. So we do what is called a "truncated // multiplication". // We take the most significant 64-bits, and we put them in // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q // to the desired approximation using one multiplication. Sometimes it does not suffice. // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and // then we get a better approximation to i * 5^q. // // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat // more complicated. // // There is an extra layer of complexity in that we need more than 55 bits of // accuracy in the round-to-even scenario. // // The full_multiplication function computes the 128-bit product of two 64-bit words // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); firstproduct.low += secondproduct.high; if(secondproduct.high > firstproduct.low) { firstproduct.high++; } // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product // is sufficiently accurate, and more computation is not needed. } uint64_t lower = firstproduct.low; uint64_t upper = firstproduct.high; // The final mantissa should be 53 bits with a leading 1. // We shift it so that it occupies 54 bits with a leading 1. /////// uint64_t upperbit = upper >> 63; uint64_t mantissa = upper >> (upperbit + 9); lz += int(1 ^ upperbit); // Here we have mantissa < (1<<54). int64_t real_exponent = exponent - lz; if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? // Here have that real_exponent <= 0 so -real_exponent >= 0 if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. d = negative ? -0.0 : 0.0; return true; } // next line is safe because -real_exponent + 1 < 0 mantissa >>= -real_exponent + 1; // Thankfully, we can't have both "round-to-even" and subnormals because // "round-to-even" only occurs for powers close to 0. mantissa += (mantissa & 1); // round up mantissa >>= 1; // There is a weird scenario where we don't have a subnormal but just. // Suppose we start with 2.2250738585072013e-308, we end up // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer // subnormal, but we can only know this after rounding. // So we only declare a subnormal if we are smaller than the threshold. real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; d = to_double(mantissa, real_exponent, negative); return true; } // We have to round to even. The "to even" part // is only a problem when we are right in between two floats // which we guard against. // If we have lots of trailing zeros, we may fall right between two // floating-point values. // // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] // times a power of two. That is, it is right between a number with binary significand // m and another number with binary significand m+1; and it must be the case // that it cannot be represented by a float itself. // // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. // Recall that 10^q = 5^q * 2^q. // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have // 2^{53} x 5^{-q} < 2^{64}. // Hence we have 5^{-q} < 2^{11}$ or q>= -4. // // We require lower <= 1 and not lower == 0 because we could not prove that // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { mantissa &= ~1; // flip it so that we do not round up } } mantissa += mantissa & 1; mantissa >>= 1; // Here we have mantissa < (1<<53), unless there was an overflow if (mantissa >= (1ULL << 53)) { ////////// // This will happen when parsing values such as 7.2057594037927933e+16 //////// mantissa = (1ULL << 52); real_exponent++; } mantissa &= ~(1ULL << 52); // we have to check that real_exponent is in range, otherwise we bail out if (simdjson_unlikely(real_exponent > 2046)) { // We have an infinite value!!! We could actually throw an error here if we could. return false; } d = to_double(mantissa, real_exponent, negative); return true; } // We call a fallback floating-point parser that might be slow. Note // it will accept JSON numbers, but the JSON spec. is more restrictive so // before you call parse_float_fallback, you need to have validated the input // string with the JSON grammar. // It will return an error (false) if the parsed number is infinite. // The string parsing itself always succeeds. We know that there is at least // one digit. static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); // We do not accept infinite values. // Detecting finite values in a portable manner is ridiculously hard, ideally // we would want to do: // return !std::isfinite(*outDouble); // but that mysteriously fails under legacy/old libc++ libraries, see // https://github.com/simdjson/simdjson/issues/1286 // // Therefore, fall back to this solution (the extra parens are there // to handle that max may be a macro on windows). return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); // We do not accept infinite values. // Detecting finite values in a portable manner is ridiculously hard, ideally // we would want to do: // return !std::isfinite(*outDouble); // but that mysteriously fails under legacy/old libc++ libraries, see // https://github.com/simdjson/simdjson/issues/1286 // // Therefore, fall back to this solution (the extra parens are there // to handle that max may be a macro on windows). return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } // check quickly whether the next 8 chars are made of digits // at a glance, it looks better than Mula's // http://0x80.pl/articles/swar-digits-validate.html simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { uint64_t val; // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); std::memcpy(&val, chars, 8); // a branchy method might be faster: // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == // 0x3030303030303030); return (((val & 0xF0F0F0F0F0F0F0F0) | (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == 0x3333333333333333); } template SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later simdjson_inline bool parse_digit(const uint8_t c, I &i) { const uint8_t digit = static_cast(c - '0'); if (digit > 9) { return false; } // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication i = 10 * i + digit; // might overflow, we will handle the overflow later return true; } simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the // the integer into a float in a lossless manner. const uint8_t *const first_after_period = p; #ifdef SIMDJSON_SWAR_NUMBER_PARSING #if SIMDJSON_SWAR_NUMBER_PARSING // this helps if we have lots of decimals! // this turns out to be frequent enough. if (is_made_of_eight_digits_fast(p)) { i = i * 100000000 + parse_eight_digits_unrolled(p); p += 8; } #endif // SIMDJSON_SWAR_NUMBER_PARSING #endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) if (parse_digit(*p, i)) { ++p; } while (parse_digit(*p, i)) { p++; } exponent = first_after_period - p; // Decimal without digits (123.) is illegal if (exponent == 0) { return INVALID_NUMBER(src); } return SUCCESS; } simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well // Exponent: -123.456e-[78] auto start_exp = p; int64_t exp_number = 0; while (parse_digit(*p, exp_number)) { ++p; } // It is possible for parse_digit to overflow. // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. // Thus we *must* check for possible overflow before we negate exp_number. // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may // not oblige and may, in fact, generate two distinct paths in any case. It might be // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off // instructions for a simdjson_likely branch, an unconclusive gain. // If there were no digits, it's an error. if (simdjson_unlikely(p == start_exp)) { return INVALID_NUMBER(src); } // We have a valid positive exponent in exp_number at this point, except that // it may have overflowed. // If there were more than 18 digits, we may have overflowed the integer. We have to do // something!!!! if (simdjson_unlikely(p > start_exp+18)) { // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow while (*start_exp == '0') { start_exp++; } // 19 digits could overflow int64_t and is kind of absurd anyway. We don't // support exponents smaller than -999,999,999,999,999,999 and bigger // than 999,999,999,999,999,999. // We can truncate. // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could // truncate at 324. // Note that there is no reason to fail per se at this point in time. // E.g., 0e999999999999999999999 is a fine number. if (p > start_exp+18) { exp_number = 999999999999999999; } } // At this point, we know that exp_number is a sane, positive, signed integer. // It is <= 999,999,999,999,999,999. As long as 'exponent' is in // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' // is bounded in magnitude by the size of the JSON input, we are fine in this universe. // To sum it up: the next line should never overflow. exponent += (neg_exp ? -exp_number : exp_number); return SUCCESS; } simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const uint8_t *start = start_digits; while ((*start == '0') || (*start == '.')) { ++start; } // we over-decrement by one when there is a '.' return digit_count - size_t(start - start_digits); } } // unnamed namespace /** @private */ template error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { double d; if (parse_float_fallback(src, &d)) { writer.append_double(d); return SUCCESS; } return INVALID_NUMBER(src); } /** @private */ template simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. // // 9999999999999999999 < 2**64 so we can accommodate 19 digits. // If we have a decimal separator, then digit_count - 1 is the number of digits, but we // may not have a decimal separator! if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { // Ok, chances are good that we had an overflow! // this is almost never going to get called!!! // we start anew, going slowly!!! // This will happen in the following examples: // 10000000000000000000000000000000000000000000e+308 // 3.1415926535897932384626433832795028841971693993751 // // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens // because slow_float_parsing is a non-inlined function. If we passed our writer reference to // it, it would force it to be stored in memory, preventing the compiler from picking it apart // and putting into registers. i.e. if we pass it as reference, it gets slow. // This is what forces the skip_double, as well. error_code error = slow_float_parsing(src, writer); writer.skip_double(); return error; } // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 // To future reader: we'd love if someone found a better way, or at least could explain this result! if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { // // Important: smallest_power is such that it leads to a zero value. // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero // so something x 10^-343 goes to zero, but not so with something x 10^-342. static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); // if((exponent < simdjson::internal::smallest_power) || (i == 0)) { // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); return SUCCESS; } else { // (exponent > largest_power) and (i != 0) // We have, for sure, an infinite value and simdjson refuses to parse infinite values. return INVALID_NUMBER(src); } } double d; if (!compute_float_64(exponent, i, negative, d)) { // we are almost never going to get here. if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } } WRITE_DOUBLE(d, src, writer); return SUCCESS; } // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING template simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } #else // parse the number at src // define JSON_TEST_NUMBERS for unit testing // // It is assumed that the number is followed by a structural ({,},],[) character // or a white space character. If that is not the case (e.g., when the JSON // document is made of a single number), then it is necessary to copy the // content and append a space before calling this function. // // Our objective is accurate parsing (ULP of 0) at high speed. template simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // bool negative = (*src == '-'); const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } // // Handle floats if there is a . or e (or both) // int64_t exponent = 0; bool is_float = false; if ('.' == *p) { is_float = true; ++p; SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); digit_count = int(p - start_digits); // used later to guard against overflows } if (('e' == *p) || ('E' == *p)) { is_float = true; ++p; SIMDJSON_TRY( parse_exponent(src, p, exponent) ); } if (is_float) { const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); if (dirty_end) { return INVALID_NUMBER(src); } return SUCCESS; } // The longest negative 64-bit number is 19 digits. // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. size_t longest_digit_count = negative ? 19 : 20; if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } if (digit_count == longest_digit_count) { if (negative) { // Anything negative above INT64_MAX+1 is invalid if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } WRITE_INTEGER(~i+1, src, writer); if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } } // Write unsigned if it doesn't fit in a signed integer. if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { WRITE_INTEGER(negative ? (~i+1) : i, src, writer); } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; } // Inlineable functions namespace { // This table can be used to characterize the final character of an integer // string. For JSON structural character and allowable white space characters, // we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise // we return NUMBER_ERROR. // Optimization note: we could easily reduce the size of the table by half (to 128) // at the cost of an extra branch. // Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); const uint8_t integer_string_finisher[256] = { NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR}; // Parse any number from 0 to 18,446,744,073,709,551,615 simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { const uint8_t *p = src; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > 20)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } if (digit_count == 20) { // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } return i; } // Parse any number from 0 to 18,446,744,073,709,551,615 // Never read at src_end or beyond simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { const uint8_t *p = src; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while ((p != src_end) && parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > 20)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } if (digit_count == 20) { // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } return i; } // Parse any number from 0 to 18,446,744,073,709,551,615 simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { const uint8_t *p = src + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > 20)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if (*p != '"') { return NUMBER_ERROR; } if (digit_count == 20) { // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // // Note: we use src[1] and not src[0] because src[0] is the quote character in this // instance. if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } return i; } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. size_t longest_digit_count = 19; // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > longest_digit_count)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } return negative ? (~i+1) : i; } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // Never read at src_end or beyond simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { // // Check for minus sign // if(src == src_end) { return NUMBER_ERROR; } bool negative = (*src == '-'); const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while ((p != src_end) && parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. size_t longest_digit_count = 19; // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > longest_digit_count)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } return negative ? (~i+1) : i; } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); src += uint8_t(negative) + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = src; uint64_t i = 0; while (parse_digit(*src, i)) { src++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(src - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. size_t longest_digit_count = 19; // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > longest_digit_count)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*src)) { // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if(*src != '"') { return NUMBER_ERROR; } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } return negative ? (~i+1) : i; } simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); src += uint8_t(negative); // // Parse the integer part. // uint64_t i = 0; const uint8_t *p = src; p += parse_digit(*p, i); bool leading_zero = (i == 0); while (parse_digit(*p, i)) { p++; } // no integer digits, or 0123 (zero must be solo) if ( p == src ) { return INCORRECT_TYPE; } if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } // // Parse the decimal part. // int64_t exponent = 0; bool overflow; if (simdjson_likely(*p == '.')) { p++; const uint8_t *start_decimal_digits = p; if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits p++; while (parse_digit(*p, i)) { p++; } exponent = -(p - start_decimal_digits); // Overflow check. More than 19 digits (minus the decimal) may be overflow. overflow = p-src-1 > 19; if (simdjson_unlikely(overflow && leading_zero)) { // Skip leading 0.00000 and see if it still overflows const uint8_t *start_digits = src + 2; while (*start_digits == '0') { start_digits++; } overflow = p-start_digits > 19; } } else { overflow = p-src > 19; } // // Parse the exponent // if (*p == 'e' || *p == 'E') { p++; bool exp_neg = *p == '-'; p += exp_neg || *p == '+'; uint64_t exp = 0; const uint8_t *start_exp_digits = p; while (parse_digit(*p, exp)) { p++; } // no exp digits, or 20+ exp digits if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } exponent += exp_neg ? 0-exp : exp; } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; // // Assemble (or slow-parse) the float // double d; if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return (*src == '-'); } simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { bool negative = (*src == '-'); src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } return false; } simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { bool negative = (*src == '-'); src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } if (jsoncharutils::is_structural_or_whitespace(*p)) { // We have an integer. // If the number is negative and valid, it must be a signed integer. if(negative) { return number_type::signed_integer; } // We want values larger or equal to 9223372036854775808 to be unsigned // integers, and the other values to be signed integers. int digit_count = int(p - src); if(digit_count >= 19) { const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { return number_type::unsigned_integer; } } return number_type::signed_integer; } // Hopefully, we have 'e' or 'E' or '.'. return number_type::floating_point_number; } // Never read at src_end or beyond simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { if(src == src_end) { return NUMBER_ERROR; } // // Check for minus sign // bool negative = (*src == '-'); src += uint8_t(negative); // // Parse the integer part. // uint64_t i = 0; const uint8_t *p = src; if(p == src_end) { return NUMBER_ERROR; } p += parse_digit(*p, i); bool leading_zero = (i == 0); while ((p != src_end) && parse_digit(*p, i)) { p++; } // no integer digits, or 0123 (zero must be solo) if ( p == src ) { return INCORRECT_TYPE; } if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } // // Parse the decimal part. // int64_t exponent = 0; bool overflow; if (simdjson_likely((p != src_end) && (*p == '.'))) { p++; const uint8_t *start_decimal_digits = p; if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits p++; while ((p != src_end) && parse_digit(*p, i)) { p++; } exponent = -(p - start_decimal_digits); // Overflow check. More than 19 digits (minus the decimal) may be overflow. overflow = p-src-1 > 19; if (simdjson_unlikely(overflow && leading_zero)) { // Skip leading 0.00000 and see if it still overflows const uint8_t *start_digits = src + 2; while (*start_digits == '0') { start_digits++; } overflow = start_digits-src > 19; } } else { overflow = p-src > 19; } // // Parse the exponent // if ((p != src_end) && (*p == 'e' || *p == 'E')) { p++; if(p == src_end) { return NUMBER_ERROR; } bool exp_neg = *p == '-'; p += exp_neg || *p == '+'; uint64_t exp = 0; const uint8_t *start_exp_digits = p; while ((p != src_end) && parse_digit(*p, exp)) { p++; } // no exp digits, or 20+ exp digits if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } exponent += exp_neg ? 0-exp : exp; } if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; // // Assemble (or slow-parse) the float // double d; if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { return NUMBER_ERROR; } return d; } simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); src += uint8_t(negative) + 1; // // Parse the integer part. // uint64_t i = 0; const uint8_t *p = src; p += parse_digit(*p, i); bool leading_zero = (i == 0); while (parse_digit(*p, i)) { p++; } // no integer digits, or 0123 (zero must be solo) if ( p == src ) { return INCORRECT_TYPE; } if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } // // Parse the decimal part. // int64_t exponent = 0; bool overflow; if (simdjson_likely(*p == '.')) { p++; const uint8_t *start_decimal_digits = p; if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits p++; while (parse_digit(*p, i)) { p++; } exponent = -(p - start_decimal_digits); // Overflow check. More than 19 digits (minus the decimal) may be overflow. overflow = p-src-1 > 19; if (simdjson_unlikely(overflow && leading_zero)) { // Skip leading 0.00000 and see if it still overflows const uint8_t *start_digits = src + 2; while (*start_digits == '0') { start_digits++; } overflow = p-start_digits > 19; } } else { overflow = p-src > 19; } // // Parse the exponent // if (*p == 'e' || *p == 'E') { p++; bool exp_neg = *p == '-'; p += exp_neg || *p == '+'; uint64_t exp = 0; const uint8_t *start_exp_digits = p; while (parse_digit(*p, exp)) { p++; } // no exp digits, or 20+ exp digits if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } exponent += exp_neg ? 0-exp : exp; } if (*p != '"') { return NUMBER_ERROR; } overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; // // Assemble (or slow-parse) the float // double d; if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } } // unnamed namespace #endif // SIMDJSON_SKIPNUMBERPARSING } // namespace numberparsing inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { switch (type) { case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; case number_type::floating_point_number: out << "floating-point number (binary64)"; break; default: SIMDJSON_UNREACHABLE(); } return out; } } // namespace ppc64 } // namespace simdjson #endif // SIMDJSON_GENERIC_NUMBERPARSING_H /* end file simdjson/generic/numberparsing.h for ppc64 */ /* including simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { // // internal::implementation_simdjson_result_base inline implementation // template simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { error = this->second; if (!error) { value = std::forward>(*this).first; } } template simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { error_code error; std::forward>(*this).tie(value, error); return error; } template simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } #if SIMDJSON_EXCEPTIONS template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return this->first; } template simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { return std::forward>(*this).take_value(); } template simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { if (error()) { throw simdjson_error(error()); } return std::forward(this->first); } template simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { return std::forward>(*this).take_value(); } #endif // SIMDJSON_EXCEPTIONS template simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { return this->first; } template simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { return this->first; } template simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { return std::forward(this->first); } template simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept : first{std::forward(value)}, second{error} {} template simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept : implementation_simdjson_result_base(T{}, error) {} template simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} } // namespace ppc64 } // namespace simdjson #endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H /* end file simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64 */ /* end file simdjson/generic/amalgamated.h for ppc64 */ /* including simdjson/ppc64/end.h: #include "simdjson/ppc64/end.h" */ /* begin file simdjson/ppc64/end.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT /* undefining SIMDJSON_IMPLEMENTATION from "ppc64" */ #undef SIMDJSON_IMPLEMENTATION /* end file simdjson/ppc64/end.h */ #endif // SIMDJSON_PPC64_H /* end file simdjson/ppc64.h */ #elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(westmere) /* including simdjson/westmere.h: #include "simdjson/westmere.h" */ /* begin file simdjson/westmere.h */ #ifndef SIMDJSON_WESTMERE_H #define SIMDJSON_WESTMERE_H /* including simdjson/westmere/begin.h: #include "simdjson/westmere/begin.h" */ /* begin file simdjson/westmere/begin.h */ /* defining SIMDJSON_IMPLEMENTATION to "westmere" */ #define SIMDJSON_IMPLEMENTATION westmere /* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ /* begin file simdjson/westmere/base.h */ #ifndef SIMDJSON_WESTMERE_BASE_H #define SIMDJSON_WESTMERE_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ // The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE namespace simdjson { /** * Implementation for Westmere (Intel SSE4.2). */ namespace westmere { class implementation; namespace { namespace simd { template struct simd8; template struct simd8x64; } // namespace simd } // unnamed namespace } // namespace westmere } // namespace simdjson #endif // SIMDJSON_WESTMERE_BASE_H /* end file simdjson/westmere/base.h */ /* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ /* begin file simdjson/westmere/intrinsics.h */ #ifndef SIMDJSON_WESTMERE_INTRINSICS_H #define SIMDJSON_WESTMERE_INTRINSICS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #if SIMDJSON_VISUAL_STUDIO // under clang within visual studio, this will include #include // visual studio or clang #else #include // elsewhere #endif // SIMDJSON_VISUAL_STUDIO #if SIMDJSON_CLANG_VISUAL_STUDIO /** * You are not supposed, normally, to include these * headers directly. Instead you should either include intrin.h * or x86intrin.h. However, when compiling with clang * under Windows (i.e., when _MSC_VER is set), these headers * only get included *if* the corresponding features are detected * from macros: */ #include // for _mm_alignr_epi8 #include // for _mm_clmulepi64_si128 #endif static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); #endif // SIMDJSON_WESTMERE_INTRINSICS_H /* end file simdjson/westmere/intrinsics.h */ #if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE SIMDJSON_TARGET_REGION("sse4.2,pclmul,popcnt") #endif /* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ /* begin file simdjson/westmere/bitmanipulation.h */ #ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H #define SIMDJSON_WESTMERE_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace { // We sometimes call trailing_zero on inputs that are zero, // but the algorithms do not end up using the returned value. // Sadly, sanitizers are not smart enough to figure it out. SIMDJSON_NO_SANITIZE_UNDEFINED // This function can be used safely even if not all bytes have been // initialized. // See issue https://github.com/simdjson/simdjson/issues/1965 SIMDJSON_NO_SANITIZE_MEMORY simdjson_inline int trailing_zeroes(uint64_t input_num) { #if SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long ret; // Search the mask data from least significant bit (LSB) // to the most significant bit (MSB) for a set bit (1). _BitScanForward64(&ret, input_num); return (int)ret; #else // SIMDJSON_REGULAR_VISUAL_STUDIO return __builtin_ctzll(input_num); #endif // SIMDJSON_REGULAR_VISUAL_STUDIO } /* result might be undefined when input_num is zero */ simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return input_num & (input_num-1); } /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { #if SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long leading_zero = 0; // Search the mask data from most significant bit (MSB) // to least significant bit (LSB) for a set bit (1). if (_BitScanReverse64(&leading_zero, input_num)) return (int)(63 - leading_zero); else return 64; #else return __builtin_clzll(input_num); #endif// SIMDJSON_REGULAR_VISUAL_STUDIO } #if SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { // note: we do not support legacy 32-bit Windows in this kernel return __popcnt64(input_num);// Visual Studio wants two underscores } #else simdjson_inline long long int count_ones(uint64_t input_num) { return _popcnt64(input_num); } #endif simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { #if SIMDJSON_REGULAR_VISUAL_STUDIO return _addcarry_u64(0, value1, value2, reinterpret_cast(result)); #else return __builtin_uaddll_overflow(value1, value2, reinterpret_cast(result)); #endif } } // unnamed namespace } // namespace westmere } // namespace simdjson #endif // SIMDJSON_WESTMERE_BITMANIPULATION_H /* end file simdjson/westmere/bitmanipulation.h */ /* including simdjson/westmere/bitmask.h: #include "simdjson/westmere/bitmask.h" */ /* begin file simdjson/westmere/bitmask.h */ #ifndef SIMDJSON_WESTMERE_BITMASK_H #define SIMDJSON_WESTMERE_BITMASK_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace { // // Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. // // For example, prefix_xor(00100100) == 00011100 // simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { // There should be no such thing with a processing supporting avx2 // but not clmul. __m128i all_ones = _mm_set1_epi8('\xFF'); __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); return _mm_cvtsi128_si64(result); } } // unnamed namespace } // namespace westmere } // namespace simdjson #endif // SIMDJSON_WESTMERE_BITMASK_H /* end file simdjson/westmere/bitmask.h */ /* including simdjson/westmere/numberparsing_defs.h: #include "simdjson/westmere/numberparsing_defs.h" */ /* begin file simdjson/westmere/numberparsing_defs.h */ #ifndef SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H #define SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H /* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ /* begin file simdjson/westmere/base.h */ #ifndef SIMDJSON_WESTMERE_BASE_H #define SIMDJSON_WESTMERE_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ // The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE namespace simdjson { /** * Implementation for Westmere (Intel SSE4.2). */ namespace westmere { class implementation; namespace { namespace simd { template struct simd8; template struct simd8x64; } // namespace simd } // unnamed namespace } // namespace westmere } // namespace simdjson #endif // SIMDJSON_WESTMERE_BASE_H /* end file simdjson/westmere/base.h */ /* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ /* begin file simdjson/westmere/intrinsics.h */ #ifndef SIMDJSON_WESTMERE_INTRINSICS_H #define SIMDJSON_WESTMERE_INTRINSICS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #if SIMDJSON_VISUAL_STUDIO // under clang within visual studio, this will include #include // visual studio or clang #else #include // elsewhere #endif // SIMDJSON_VISUAL_STUDIO #if SIMDJSON_CLANG_VISUAL_STUDIO /** * You are not supposed, normally, to include these * headers directly. Instead you should either include intrin.h * or x86intrin.h. However, when compiling with clang * under Windows (i.e., when _MSC_VER is set), these headers * only get included *if* the corresponding features are detected * from macros: */ #include // for _mm_alignr_epi8 #include // for _mm_clmulepi64_si128 #endif static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); #endif // SIMDJSON_WESTMERE_INTRINSICS_H /* end file simdjson/westmere/intrinsics.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace numberparsing { /** @private */ static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { // this actually computes *16* values so we are being wasteful. const __m128i ascii0 = _mm_set1_epi8('0'); const __m128i mul_1_10 = _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); const __m128i mul_1_10000 = _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); const __m128i input = _mm_sub_epi8( _mm_loadu_si128(reinterpret_cast(chars)), ascii0); const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); const __m128i t3 = _mm_packus_epi32(t2, t2); const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); return _mm_cvtsi128_si32( t4); // only captures the sum of the first 8 digits, drop the rest } /** @private */ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { internal::value128 answer; #if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS #ifdef _M_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); answer.low = value1 * value2; #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 #endif // _M_ARM64 #else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); #endif return answer; } } // namespace numberparsing } // namespace westmere } // namespace simdjson #define SIMDJSON_SWAR_NUMBER_PARSING 1 #endif // SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H /* end file simdjson/westmere/numberparsing_defs.h */ /* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ /* begin file simdjson/westmere/simd.h */ #ifndef SIMDJSON_WESTMERE_SIMD_H #define SIMDJSON_WESTMERE_SIMD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace { namespace simd { template struct base { __m128i value; // Zero constructor simdjson_inline base() : value{__m128i()} {} // Conversion from SIMD register simdjson_inline base(const __m128i _value) : value(_value) {} // Conversion to SIMD register simdjson_inline operator const __m128i&() const { return this->value; } simdjson_inline operator __m128i&() { return this->value; } // Bit operations simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } }; template> struct base8: base> { typedef uint16_t bitmask_t; typedef uint32_t bitmask2_t; simdjson_inline base8() : base>() {} simdjson_inline base8(const __m128i _value) : base>(_value) {} friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } static const int SIZE = sizeof(base>::value); template simdjson_inline simd8 prev(const simd8 prev_chunk) const { return _mm_alignr_epi8(*this, prev_chunk, 16 - N); } }; // SIMD byte mask type (returned by things like eq and gt) template<> struct simd8: base8 { static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } simdjson_inline simd8() : base8() {} simdjson_inline simd8(const __m128i _value) : base8(_value) {} // Splat constructor simdjson_inline simd8(bool _value) : base8(splat(_value)) {} simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } simdjson_inline simd8 operator~() const { return *this ^ true; } }; template struct base8_numeric: base8 { static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } static simdjson_inline simd8 load(const T values[16]) { return _mm_loadu_si128(reinterpret_cast(values)); } // Repeat 16 values as many times as necessary (usually for lookup tables) static simdjson_inline simd8 repeat_16( T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } simdjson_inline base8_numeric() : base8() {} simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} // Store to array simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } // Override to distinguish from bool version simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } // Addition/subtraction are the same for signed and unsigned simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) template simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return _mm_shuffle_epi8(lookup_table, *this); } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes // get written. // Design consideration: it seems like a function with the // signature simd8 compress(uint32_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template simdjson_inline void compress(uint16_t mask, L * output) const { using internal::thintable_epi8; using internal::BitsSetTable256mul2; using internal::pshufb_combine_table; // this particular implementation was inspired by work done by @animetosho // we do it in two steps, first 8 bytes and then second 8 bytes uint8_t mask1 = uint8_t(mask); // least significant 8 bits uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits // next line just loads the 64-bit values thintable_epi8[mask1] and // thintable_epi8[mask2] into a 128-bit register, using only // two instructions on most compilers. __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); // we increment by 0x08 the second half of the mask shufmask = _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); // this is the version "nearly pruned" __m128i pruned = _mm_shuffle_epi8(*this, shufmask); // we still need to put the two halves together. // we compute the popcount of the first half: int pop1 = BitsSetTable256mul2[mask1]; // then load the corresponding mask, what it does is to write // only the first pop1 bytes from the first 8 bytes, and then // it fills in with the bytes from the second 8 bytes + some filling // at the end. __m128i compactmask = _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); __m128i answer = _mm_shuffle_epi8(pruned, compactmask); _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); } template simdjson_inline simd8 lookup_16( L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, L replace12, L replace13, L replace14, L replace15) const { return lookup_16(simd8::repeat_16( replace0, replace1, replace2, replace3, replace4, replace5, replace6, replace7, replace8, replace9, replace10, replace11, replace12, replace13, replace14, replace15 )); } }; // Signed bytes template<> struct simd8 : base8_numeric { simdjson_inline simd8() : base8_numeric() {} simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) : simd8(_mm_setr_epi8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Order-sensitive comparisons simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } }; // Unsigned bytes template<> struct simd8: base8_numeric { simdjson_inline simd8() : base8_numeric() {} simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Array constructor simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) : simd8(_mm_setr_epi8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Saturated math simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } // Order-specific operations simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } // Same as >, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } // Same as <, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } // Bit-specific operations simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } template simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } template simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } // Get one of the bits and make a bitmask out of it. // e.g. value.get_bit<7>() gets the high bit template simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } }; template struct simd8x64 { static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); const simd8 chunks[NUM_CHUNKS]; simd8x64(const simd8x64& o) = delete; // no copy allowed simd8x64& operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr+sizeof(simd8)*0); this->chunks[1].store(ptr+sizeof(simd8)*1); this->chunks[2].store(ptr+sizeof(simd8)*2); this->chunks[3].store(ptr+sizeof(simd8)*3); } simdjson_inline simd8 reduce_or() const { return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); } simdjson_inline uint64_t compress(uint64_t mask, T * output) const { this->chunks[0].compress(uint16_t(mask), output); this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); return 64 - count_ones(mask); } simdjson_inline uint64_t to_bitmask() const { uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); uint64_t r1 = this->chunks[1].to_bitmask() ; uint64_t r2 = this->chunks[2].to_bitmask() ; uint64_t r3 = this->chunks[3].to_bitmask() ; return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); } simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] == mask, this->chunks[1] == mask, this->chunks[2] == mask, this->chunks[3] == mask ).to_bitmask(); } simdjson_inline uint64_t eq(const simd8x64 &other) const { return simd8x64( this->chunks[0] == other.chunks[0], this->chunks[1] == other.chunks[1], this->chunks[2] == other.chunks[2], this->chunks[3] == other.chunks[3] ).to_bitmask(); } simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] <= mask, this->chunks[1] <= mask, this->chunks[2] <= mask, this->chunks[3] <= mask ).to_bitmask(); } }; // struct simd8x64 } // namespace simd } // unnamed namespace } // namespace westmere } // namespace simdjson #endif // SIMDJSON_WESTMERE_SIMD_INPUT_H /* end file simdjson/westmere/simd.h */ /* including simdjson/westmere/stringparsing_defs.h: #include "simdjson/westmere/stringparsing_defs.h" */ /* begin file simdjson/westmere/stringparsing_defs.h */ #ifndef SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H #define SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H /* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ /* begin file simdjson/westmere/bitmanipulation.h */ #ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H #define SIMDJSON_WESTMERE_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace { // We sometimes call trailing_zero on inputs that are zero, // but the algorithms do not end up using the returned value. // Sadly, sanitizers are not smart enough to figure it out. SIMDJSON_NO_SANITIZE_UNDEFINED // This function can be used safely even if not all bytes have been // initialized. // See issue https://github.com/simdjson/simdjson/issues/1965 SIMDJSON_NO_SANITIZE_MEMORY simdjson_inline int trailing_zeroes(uint64_t input_num) { #if SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long ret; // Search the mask data from least significant bit (LSB) // to the most significant bit (MSB) for a set bit (1). _BitScanForward64(&ret, input_num); return (int)ret; #else // SIMDJSON_REGULAR_VISUAL_STUDIO return __builtin_ctzll(input_num); #endif // SIMDJSON_REGULAR_VISUAL_STUDIO } /* result might be undefined when input_num is zero */ simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return input_num & (input_num-1); } /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { #if SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long leading_zero = 0; // Search the mask data from most significant bit (MSB) // to least significant bit (LSB) for a set bit (1). if (_BitScanReverse64(&leading_zero, input_num)) return (int)(63 - leading_zero); else return 64; #else return __builtin_clzll(input_num); #endif// SIMDJSON_REGULAR_VISUAL_STUDIO } #if SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { // note: we do not support legacy 32-bit Windows in this kernel return __popcnt64(input_num);// Visual Studio wants two underscores } #else simdjson_inline long long int count_ones(uint64_t input_num) { return _popcnt64(input_num); } #endif simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { #if SIMDJSON_REGULAR_VISUAL_STUDIO return _addcarry_u64(0, value1, value2, reinterpret_cast(result)); #else return __builtin_uaddll_overflow(value1, value2, reinterpret_cast(result)); #endif } } // unnamed namespace } // namespace westmere } // namespace simdjson #endif // SIMDJSON_WESTMERE_BITMANIPULATION_H /* end file simdjson/westmere/bitmanipulation.h */ /* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ /* begin file simdjson/westmere/simd.h */ #ifndef SIMDJSON_WESTMERE_SIMD_H #define SIMDJSON_WESTMERE_SIMD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace { namespace simd { template struct base { __m128i value; // Zero constructor simdjson_inline base() : value{__m128i()} {} // Conversion from SIMD register simdjson_inline base(const __m128i _value) : value(_value) {} // Conversion to SIMD register simdjson_inline operator const __m128i&() const { return this->value; } simdjson_inline operator __m128i&() { return this->value; } // Bit operations simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } }; template> struct base8: base> { typedef uint16_t bitmask_t; typedef uint32_t bitmask2_t; simdjson_inline base8() : base>() {} simdjson_inline base8(const __m128i _value) : base>(_value) {} friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } static const int SIZE = sizeof(base>::value); template simdjson_inline simd8 prev(const simd8 prev_chunk) const { return _mm_alignr_epi8(*this, prev_chunk, 16 - N); } }; // SIMD byte mask type (returned by things like eq and gt) template<> struct simd8: base8 { static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } simdjson_inline simd8() : base8() {} simdjson_inline simd8(const __m128i _value) : base8(_value) {} // Splat constructor simdjson_inline simd8(bool _value) : base8(splat(_value)) {} simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } simdjson_inline simd8 operator~() const { return *this ^ true; } }; template struct base8_numeric: base8 { static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } static simdjson_inline simd8 load(const T values[16]) { return _mm_loadu_si128(reinterpret_cast(values)); } // Repeat 16 values as many times as necessary (usually for lookup tables) static simdjson_inline simd8 repeat_16( T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } simdjson_inline base8_numeric() : base8() {} simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} // Store to array simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } // Override to distinguish from bool version simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } // Addition/subtraction are the same for signed and unsigned simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) template simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return _mm_shuffle_epi8(lookup_table, *this); } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes // get written. // Design consideration: it seems like a function with the // signature simd8 compress(uint32_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template simdjson_inline void compress(uint16_t mask, L * output) const { using internal::thintable_epi8; using internal::BitsSetTable256mul2; using internal::pshufb_combine_table; // this particular implementation was inspired by work done by @animetosho // we do it in two steps, first 8 bytes and then second 8 bytes uint8_t mask1 = uint8_t(mask); // least significant 8 bits uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits // next line just loads the 64-bit values thintable_epi8[mask1] and // thintable_epi8[mask2] into a 128-bit register, using only // two instructions on most compilers. __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); // we increment by 0x08 the second half of the mask shufmask = _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); // this is the version "nearly pruned" __m128i pruned = _mm_shuffle_epi8(*this, shufmask); // we still need to put the two halves together. // we compute the popcount of the first half: int pop1 = BitsSetTable256mul2[mask1]; // then load the corresponding mask, what it does is to write // only the first pop1 bytes from the first 8 bytes, and then // it fills in with the bytes from the second 8 bytes + some filling // at the end. __m128i compactmask = _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); __m128i answer = _mm_shuffle_epi8(pruned, compactmask); _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); } template simdjson_inline simd8 lookup_16( L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, L replace12, L replace13, L replace14, L replace15) const { return lookup_16(simd8::repeat_16( replace0, replace1, replace2, replace3, replace4, replace5, replace6, replace7, replace8, replace9, replace10, replace11, replace12, replace13, replace14, replace15 )); } }; // Signed bytes template<> struct simd8 : base8_numeric { simdjson_inline simd8() : base8_numeric() {} simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) : simd8(_mm_setr_epi8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Order-sensitive comparisons simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } }; // Unsigned bytes template<> struct simd8: base8_numeric { simdjson_inline simd8() : base8_numeric() {} simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Array constructor simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) : simd8(_mm_setr_epi8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Saturated math simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } // Order-specific operations simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } // Same as >, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } // Same as <, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } // Bit-specific operations simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } template simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } template simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } // Get one of the bits and make a bitmask out of it. // e.g. value.get_bit<7>() gets the high bit template simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } }; template struct simd8x64 { static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); const simd8 chunks[NUM_CHUNKS]; simd8x64(const simd8x64& o) = delete; // no copy allowed simd8x64& operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr+sizeof(simd8)*0); this->chunks[1].store(ptr+sizeof(simd8)*1); this->chunks[2].store(ptr+sizeof(simd8)*2); this->chunks[3].store(ptr+sizeof(simd8)*3); } simdjson_inline simd8 reduce_or() const { return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); } simdjson_inline uint64_t compress(uint64_t mask, T * output) const { this->chunks[0].compress(uint16_t(mask), output); this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); return 64 - count_ones(mask); } simdjson_inline uint64_t to_bitmask() const { uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); uint64_t r1 = this->chunks[1].to_bitmask() ; uint64_t r2 = this->chunks[2].to_bitmask() ; uint64_t r3 = this->chunks[3].to_bitmask() ; return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); } simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] == mask, this->chunks[1] == mask, this->chunks[2] == mask, this->chunks[3] == mask ).to_bitmask(); } simdjson_inline uint64_t eq(const simd8x64 &other) const { return simd8x64( this->chunks[0] == other.chunks[0], this->chunks[1] == other.chunks[1], this->chunks[2] == other.chunks[2], this->chunks[3] == other.chunks[3] ).to_bitmask(); } simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] <= mask, this->chunks[1] <= mask, this->chunks[2] <= mask, this->chunks[3] <= mask ).to_bitmask(); } }; // struct simd8x64 } // namespace simd } // unnamed namespace } // namespace westmere } // namespace simdjson #endif // SIMDJSON_WESTMERE_SIMD_INPUT_H /* end file simdjson/westmere/simd.h */ namespace simdjson { namespace westmere { namespace { using namespace simd; // Holds backslashes and quotes locations. struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return bs_bits != 0; } simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } uint32_t bs_bits; uint32_t quote_bits; }; // struct backslash_and_quote simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); simd8 v0(src); simd8 v1(src + 16); v0.store(dst); v1.store(dst + 16); uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); return { uint32_t(bs_and_quote), // bs_bits uint32_t(bs_and_quote >> 32) // quote_bits }; } } // unnamed namespace } // namespace westmere } // namespace simdjson #endif // SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H /* end file simdjson/westmere/stringparsing_defs.h */ /* end file simdjson/westmere/begin.h */ /* including simdjson/generic/amalgamated.h for westmere: #include "simdjson/generic/amalgamated.h" */ /* begin file simdjson/generic/amalgamated.h for westmere */ #if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) #error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! #endif /* including simdjson/generic/base.h for westmere: #include "simdjson/generic/base.h" */ /* begin file simdjson/generic/base.h for westmere */ #ifndef SIMDJSON_GENERIC_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ /* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ /* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ /* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ /* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ /* amalgamation skipped (editor-only): #else */ /* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ /* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { struct open_container; class dom_parser_implementation; /** * The type of a JSON number */ enum class number_type { floating_point_number=1, /// a binary64 number signed_integer, /// a signed integer that fits in a 64-bit word using two's complement unsigned_integer /// a positive integer larger or equal to 1<<63 }; } // namespace westmere } // namespace simdjson #endif // SIMDJSON_GENERIC_BASE_H /* end file simdjson/generic/base.h for westmere */ /* including simdjson/generic/jsoncharutils.h for westmere: #include "simdjson/generic/jsoncharutils.h" */ /* begin file simdjson/generic/jsoncharutils.h for westmere */ #ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace { namespace jsoncharutils { // return non-zero if not a structural or whitespace char // zero otherwise simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace_negated[c]; } simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace[c]; } // returns a value with the high 16 bits set if not valid // otherwise returns the conversion of the 4 hex digits at src into the bottom // 16 bits of the 32-bit return register // // see // https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ static inline uint32_t hex_to_u32_nocheck( const uint8_t *src) { // strictly speaking, static inline is a C-ism uint32_t v1 = internal::digit_to_val32[630 + src[0]]; uint32_t v2 = internal::digit_to_val32[420 + src[1]]; uint32_t v3 = internal::digit_to_val32[210 + src[2]]; uint32_t v4 = internal::digit_to_val32[0 + src[3]]; return v1 | v2 | v3 | v4; } // given a code point cp, writes to c // the utf-8 code, outputting the length in // bytes, if the length is zero, the code point // is invalid // // This can possibly be made faster using pdep // and clz and table lookups, but JSON documents // have few escaped code points, and the following // function looks cheap. // // Note: we assume that surrogates are treated separately // simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { if (cp <= 0x7F) { c[0] = uint8_t(cp); return 1; // ascii } if (cp <= 0x7FF) { c[0] = uint8_t((cp >> 6) + 192); c[1] = uint8_t((cp & 63) + 128); return 2; // universal plane // Surrogates are treated elsewhere... //} //else if (0xd800 <= cp && cp <= 0xdfff) { // return 0; // surrogates // could put assert here } else if (cp <= 0xFFFF) { c[0] = uint8_t((cp >> 12) + 224); c[1] = uint8_t(((cp >> 6) & 63) + 128); c[2] = uint8_t((cp & 63) + 128); return 3; } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this // is not needed c[0] = uint8_t((cp >> 18) + 240); c[1] = uint8_t(((cp >> 12) & 63) + 128); c[2] = uint8_t(((cp >> 6) & 63) + 128); c[3] = uint8_t((cp & 63) + 128); return 4; } // will return 0 when the code point was too large. return 0; // bad r } #if SIMDJSON_IS_32BITS // _umul128 for x86, arm // this is a slow emulation routine for 32-bit // static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; } static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); uint64_t adbc_carry = !!(adbc < ad); uint64_t lo = bd + (adbc << 32); *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + (adbc_carry << 32) + !!(lo < bd); return lo; } #endif } // namespace jsoncharutils } // unnamed namespace } // namespace westmere } // namespace simdjson #endif // SIMDJSON_GENERIC_JSONCHARUTILS_H /* end file simdjson/generic/jsoncharutils.h for westmere */ /* including simdjson/generic/atomparsing.h for westmere: #include "simdjson/generic/atomparsing.h" */ /* begin file simdjson/generic/atomparsing.h for westmere */ #ifndef SIMDJSON_GENERIC_ATOMPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include namespace simdjson { namespace westmere { namespace { /// @private namespace atomparsing { // The string_to_uint32 is exclusively used to map literal strings to 32-bit values. // We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot // be certain that the character pointer will be properly aligned. // You might think that using memcpy makes this function expensive, but you'd be wrong. // All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); // to the compile-time constant 1936482662. simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } // Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. // Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. simdjson_warn_unused simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); std::memcpy(&srcval, src, sizeof(uint32_t)); return srcval ^ string_to_uint32(atom); } simdjson_warn_unused simdjson_inline bool is_valid_true_atom(const uint8_t *src) { return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_true_atom(src); } else if (len == 4) { return !str4ncmp(src, "true"); } else { return false; } } simdjson_warn_unused simdjson_inline bool is_valid_false_atom(const uint8_t *src) { return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; } simdjson_warn_unused simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { if (len > 5) { return is_valid_false_atom(src); } else if (len == 5) { return !str4ncmp(src+1, "alse"); } else { return false; } } simdjson_warn_unused simdjson_inline bool is_valid_null_atom(const uint8_t *src) { return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_null_atom(src); } else if (len == 4) { return !str4ncmp(src, "null"); } else { return false; } } } // namespace atomparsing } // unnamed namespace } // namespace westmere } // namespace simdjson #endif // SIMDJSON_GENERIC_ATOMPARSING_H /* end file simdjson/generic/atomparsing.h for westmere */ /* including simdjson/generic/dom_parser_implementation.h for westmere: #include "simdjson/generic/dom_parser_implementation.h" */ /* begin file simdjson/generic/dom_parser_implementation.h for westmere */ #ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { // expectation: sizeof(open_container) = 64/8. struct open_container { uint32_t tape_index; // where, on the tape, does the scope ([,{) begins uint32_t count; // how many elements in the scope }; // struct open_container static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); class dom_parser_implementation final : public internal::dom_parser_implementation { public: /** Tape location of each open { or [ */ std::unique_ptr open_containers{}; /** Whether each open container is a [ or { */ std::unique_ptr is_array{}; /** Buffer passed to stage 1 */ const uint8_t *buf{}; /** Length passed to stage 1 */ size_t len{0}; /** Document passed to stage 2 */ dom::document *doc{}; inline dom_parser_implementation() noexcept; inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; dom_parser_implementation(const dom_parser_implementation &) = delete; dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; private: simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); }; } // namespace westmere } // namespace simdjson namespace simdjson { namespace westmere { inline dom_parser_implementation::dom_parser_implementation() noexcept = default; inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; // Leaving these here so they can be inlined if so desired inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } // Stage 1 index output size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); if (!structural_indexes) { _capacity = 0; return MEMALLOC; } structural_indexes[0] = 0; n_structural_indexes = 0; _capacity = capacity; return SUCCESS; } inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { // Stage 2 stacks open_containers.reset(new (std::nothrow) open_container[max_depth]); is_array.reset(new (std::nothrow) bool[max_depth]); if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } _max_depth = max_depth; return SUCCESS; } } // namespace westmere } // namespace simdjson #endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H /* end file simdjson/generic/dom_parser_implementation.h for westmere */ /* including simdjson/generic/implementation_simdjson_result_base.h for westmere: #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* begin file simdjson/generic/implementation_simdjson_result_base.h for westmere */ #ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { // This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair // so we can avoid inlining errors // TODO reconcile these! /** * The result of a simdjson operation that could fail. * * Gives the option of reading error codes, or throwing an exception by casting to the desired result. * * This is a base class for implementations that want to add functions to the result type for * chaining. * * Override like: * * struct simdjson_result : public internal::implementation_simdjson_result_base { * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} * // Your extra methods here * } * * Then any method returning simdjson_result will be chainable with your methods. */ template struct implementation_simdjson_result_base { /** * Create a new empty result with error = UNINITIALIZED. */ simdjson_inline implementation_simdjson_result_base() noexcept = default; /** * Create a new error result. */ simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; /** * Create a new successful result. */ simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; /** * Create a new result with both things (use if you don't want to branch when creating the result). */ simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; /** * Move the value and the error to the provided variables. * * @param value The variable to assign the value to. May not be set if there is an error. * @param error The variable to assign the error to. Set to SUCCESS if there is no error. */ simdjson_inline void tie(T &value, error_code &error) && noexcept; /** * Move the value to the provided variable. * * @param value The variable to assign the value to. May not be set if there is an error. */ simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ simdjson_inline error_code error() const noexcept; #if SIMDJSON_EXCEPTIONS /** * Get the result value. * * @throw simdjson_error if there was an error. */ simdjson_inline T& value() & noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ simdjson_inline T&& value() && noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ simdjson_inline T&& take_value() && noexcept(false); /** * Cast to the value (will throw on error). * * @throw simdjson_error if there was an error. */ simdjson_inline operator T&&() && noexcept(false); #endif // SIMDJSON_EXCEPTIONS /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. */ simdjson_inline const T& value_unsafe() const& noexcept; /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. */ simdjson_inline T& value_unsafe() & noexcept; /** * Take the result value (move it). This function is safe if and only * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ }; // struct implementation_simdjson_result_base } // namespace westmere } // namespace simdjson #endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H /* end file simdjson/generic/implementation_simdjson_result_base.h for westmere */ /* including simdjson/generic/numberparsing.h for westmere: #include "simdjson/generic/numberparsing.h" */ /* begin file simdjson/generic/numberparsing.h for westmere */ #ifndef SIMDJSON_GENERIC_NUMBERPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #include #include namespace simdjson { namespace westmere { namespace numberparsing { #ifdef JSON_TEST_NUMBERS #define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) #define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) #define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) #define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) #else #define INVALID_NUMBER(SRC) (NUMBER_ERROR) #define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) #define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) #define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) #endif namespace { // Convert a mantissa, an exponent and a sign bit into an ieee64 double. // The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). // The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { double d; mantissa &= ~(1ULL << 52); mantissa |= real_exponent << 52; mantissa |= ((static_cast(negative)) << 63); std::memcpy(&d, &mantissa, sizeof(d)); return d; } // Attempts to compute i * 10^(power) exactly; and if "negative" is // true, negate the result. // This function will only work in some cases, when it does not work, success is // set to false. This should work *most of the time* (like 99% of the time). // We assume that power is in the [smallest_power, // largest_power] interval: the caller is responsible for this check. simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { // we start with a fast path // It was described in // Clinger WD. How to read floating point numbers accurately. // ACM SIGPLAN Notices. 1990 #ifndef FLT_EVAL_METHOD #error "FLT_EVAL_METHOD should be defined, please include cfloat." #endif #if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) // We cannot be certain that x/y is rounded to nearest. if (0 <= power && power <= 22 && i <= 9007199254740991) #else if (-22 <= power && power <= 22 && i <= 9007199254740991) #endif { // convert the integer into a double. This is lossless since // 0 <= i <= 2^53 - 1. d = double(i); // // The general idea is as follows. // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then // 1) Both s and p can be represented exactly as 64-bit floating-point // values // (binary64). // 2) Because s and p can be represented exactly as floating-point values, // then s * p // and s / p will produce correctly rounded values. // if (power < 0) { d = d / simdjson::internal::power_of_ten[-power]; } else { d = d * simdjson::internal::power_of_ten[power]; } if (negative) { d = -d; } return true; } // When 22 < power && power < 22 + 16, we could // hope for another, secondary fast path. It was // described by David M. Gay in "Correctly rounded // binary-decimal and decimal-binary conversions." (1990) // If you need to compute i * 10^(22 + x) for x < 16, // first compute i * 10^x, if you know that result is exact // (e.g., when i * 10^x < 2^53), // then you can still proceed and do (i * 10^x) * 10^22. // Is this worth your time? // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) // for this second fast path to work. // If you you have 22 < power *and* power < 22 + 16, and then you // optimistically compute "i * 10^(x-22)", there is still a chance that you // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of // this optimization maybe less common than we would like. Source: // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html // The fast path has now failed, so we are failing back on the slower path. // In the slow path, we need to adjust i so that it is > 1<<63 which is always // possible, except if i == 0, so we handle i == 0 separately. if(i == 0) { d = negative ? -0.0 : 0.0; return true; } // The exponent is 1024 + 63 + power // + floor(log(5**power)/log(2)). // The 1024 comes from the ieee64 standard. // The 63 comes from the fact that we use a 64-bit word. // // Computing floor(log(5**power)/log(2)) could be // slow. Instead we use a fast function. // // For power in (-400,350), we have that // (((152170 + 65536) * power ) >> 16); // is equal to // floor(log(5**power)/log(2)) + power when power >= 0 // and it is equal to // ceil(log(5**-power)/log(2)) + power when power < 0 // // The 65536 is (1<<16) and corresponds to // (65536 * power) >> 16 ---> power // // ((152170 * power ) >> 16) is equal to // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is // approximatively equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; // We want the most significant bit of i to be 1. Shift if needed. int lz = leading_zeroes(i); i <<= lz; // We are going to need to do some 64-bit arithmetic to get a precise product. // We use a table lookup approach. // It is safe because // power >= smallest_power // and power <= largest_power // We recover the mantissa of the power, it has a leading 1. It is always // rounded down. // // We want the most significant 64 bits of the product. We know // this will be non-zero because the most significant bit of i is // 1. const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) // // The full_multiplication function computes the 128-bit product of two 64-bit words // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use // we make of the product. It also makes it easy to reason about the product: there // is 0 or 1 leading zero in the product. // Unless the least significant 9 bits of the high (64-bit) part of the full // product are all 1s, then we know that the most significant 55 bits are // exact and no further work is needed. Having 55 bits is necessary because // we need 53 bits for the mantissa but we have to have one rounding bit and // we can waste a bit if the most significant bit of the product is zero. if((firstproduct.high & 0x1FF) == 0x1FF) { // We want to compute i * 5^q, but only care about the top 55 bits at most. // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing // the full computation is wasteful. So we do what is called a "truncated // multiplication". // We take the most significant 64-bits, and we put them in // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q // to the desired approximation using one multiplication. Sometimes it does not suffice. // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and // then we get a better approximation to i * 5^q. // // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat // more complicated. // // There is an extra layer of complexity in that we need more than 55 bits of // accuracy in the round-to-even scenario. // // The full_multiplication function computes the 128-bit product of two 64-bit words // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); firstproduct.low += secondproduct.high; if(secondproduct.high > firstproduct.low) { firstproduct.high++; } // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product // is sufficiently accurate, and more computation is not needed. } uint64_t lower = firstproduct.low; uint64_t upper = firstproduct.high; // The final mantissa should be 53 bits with a leading 1. // We shift it so that it occupies 54 bits with a leading 1. /////// uint64_t upperbit = upper >> 63; uint64_t mantissa = upper >> (upperbit + 9); lz += int(1 ^ upperbit); // Here we have mantissa < (1<<54). int64_t real_exponent = exponent - lz; if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? // Here have that real_exponent <= 0 so -real_exponent >= 0 if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. d = negative ? -0.0 : 0.0; return true; } // next line is safe because -real_exponent + 1 < 0 mantissa >>= -real_exponent + 1; // Thankfully, we can't have both "round-to-even" and subnormals because // "round-to-even" only occurs for powers close to 0. mantissa += (mantissa & 1); // round up mantissa >>= 1; // There is a weird scenario where we don't have a subnormal but just. // Suppose we start with 2.2250738585072013e-308, we end up // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer // subnormal, but we can only know this after rounding. // So we only declare a subnormal if we are smaller than the threshold. real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; d = to_double(mantissa, real_exponent, negative); return true; } // We have to round to even. The "to even" part // is only a problem when we are right in between two floats // which we guard against. // If we have lots of trailing zeros, we may fall right between two // floating-point values. // // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] // times a power of two. That is, it is right between a number with binary significand // m and another number with binary significand m+1; and it must be the case // that it cannot be represented by a float itself. // // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. // Recall that 10^q = 5^q * 2^q. // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have // 2^{53} x 5^{-q} < 2^{64}. // Hence we have 5^{-q} < 2^{11}$ or q>= -4. // // We require lower <= 1 and not lower == 0 because we could not prove that // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { mantissa &= ~1; // flip it so that we do not round up } } mantissa += mantissa & 1; mantissa >>= 1; // Here we have mantissa < (1<<53), unless there was an overflow if (mantissa >= (1ULL << 53)) { ////////// // This will happen when parsing values such as 7.2057594037927933e+16 //////// mantissa = (1ULL << 52); real_exponent++; } mantissa &= ~(1ULL << 52); // we have to check that real_exponent is in range, otherwise we bail out if (simdjson_unlikely(real_exponent > 2046)) { // We have an infinite value!!! We could actually throw an error here if we could. return false; } d = to_double(mantissa, real_exponent, negative); return true; } // We call a fallback floating-point parser that might be slow. Note // it will accept JSON numbers, but the JSON spec. is more restrictive so // before you call parse_float_fallback, you need to have validated the input // string with the JSON grammar. // It will return an error (false) if the parsed number is infinite. // The string parsing itself always succeeds. We know that there is at least // one digit. static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); // We do not accept infinite values. // Detecting finite values in a portable manner is ridiculously hard, ideally // we would want to do: // return !std::isfinite(*outDouble); // but that mysteriously fails under legacy/old libc++ libraries, see // https://github.com/simdjson/simdjson/issues/1286 // // Therefore, fall back to this solution (the extra parens are there // to handle that max may be a macro on windows). return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); // We do not accept infinite values. // Detecting finite values in a portable manner is ridiculously hard, ideally // we would want to do: // return !std::isfinite(*outDouble); // but that mysteriously fails under legacy/old libc++ libraries, see // https://github.com/simdjson/simdjson/issues/1286 // // Therefore, fall back to this solution (the extra parens are there // to handle that max may be a macro on windows). return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } // check quickly whether the next 8 chars are made of digits // at a glance, it looks better than Mula's // http://0x80.pl/articles/swar-digits-validate.html simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { uint64_t val; // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); std::memcpy(&val, chars, 8); // a branchy method might be faster: // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == // 0x3030303030303030); return (((val & 0xF0F0F0F0F0F0F0F0) | (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == 0x3333333333333333); } template SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later simdjson_inline bool parse_digit(const uint8_t c, I &i) { const uint8_t digit = static_cast(c - '0'); if (digit > 9) { return false; } // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication i = 10 * i + digit; // might overflow, we will handle the overflow later return true; } simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the // the integer into a float in a lossless manner. const uint8_t *const first_after_period = p; #ifdef SIMDJSON_SWAR_NUMBER_PARSING #if SIMDJSON_SWAR_NUMBER_PARSING // this helps if we have lots of decimals! // this turns out to be frequent enough. if (is_made_of_eight_digits_fast(p)) { i = i * 100000000 + parse_eight_digits_unrolled(p); p += 8; } #endif // SIMDJSON_SWAR_NUMBER_PARSING #endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) if (parse_digit(*p, i)) { ++p; } while (parse_digit(*p, i)) { p++; } exponent = first_after_period - p; // Decimal without digits (123.) is illegal if (exponent == 0) { return INVALID_NUMBER(src); } return SUCCESS; } simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well // Exponent: -123.456e-[78] auto start_exp = p; int64_t exp_number = 0; while (parse_digit(*p, exp_number)) { ++p; } // It is possible for parse_digit to overflow. // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. // Thus we *must* check for possible overflow before we negate exp_number. // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may // not oblige and may, in fact, generate two distinct paths in any case. It might be // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off // instructions for a simdjson_likely branch, an unconclusive gain. // If there were no digits, it's an error. if (simdjson_unlikely(p == start_exp)) { return INVALID_NUMBER(src); } // We have a valid positive exponent in exp_number at this point, except that // it may have overflowed. // If there were more than 18 digits, we may have overflowed the integer. We have to do // something!!!! if (simdjson_unlikely(p > start_exp+18)) { // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow while (*start_exp == '0') { start_exp++; } // 19 digits could overflow int64_t and is kind of absurd anyway. We don't // support exponents smaller than -999,999,999,999,999,999 and bigger // than 999,999,999,999,999,999. // We can truncate. // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could // truncate at 324. // Note that there is no reason to fail per se at this point in time. // E.g., 0e999999999999999999999 is a fine number. if (p > start_exp+18) { exp_number = 999999999999999999; } } // At this point, we know that exp_number is a sane, positive, signed integer. // It is <= 999,999,999,999,999,999. As long as 'exponent' is in // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' // is bounded in magnitude by the size of the JSON input, we are fine in this universe. // To sum it up: the next line should never overflow. exponent += (neg_exp ? -exp_number : exp_number); return SUCCESS; } simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const uint8_t *start = start_digits; while ((*start == '0') || (*start == '.')) { ++start; } // we over-decrement by one when there is a '.' return digit_count - size_t(start - start_digits); } } // unnamed namespace /** @private */ template error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { double d; if (parse_float_fallback(src, &d)) { writer.append_double(d); return SUCCESS; } return INVALID_NUMBER(src); } /** @private */ template simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. // // 9999999999999999999 < 2**64 so we can accommodate 19 digits. // If we have a decimal separator, then digit_count - 1 is the number of digits, but we // may not have a decimal separator! if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { // Ok, chances are good that we had an overflow! // this is almost never going to get called!!! // we start anew, going slowly!!! // This will happen in the following examples: // 10000000000000000000000000000000000000000000e+308 // 3.1415926535897932384626433832795028841971693993751 // // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens // because slow_float_parsing is a non-inlined function. If we passed our writer reference to // it, it would force it to be stored in memory, preventing the compiler from picking it apart // and putting into registers. i.e. if we pass it as reference, it gets slow. // This is what forces the skip_double, as well. error_code error = slow_float_parsing(src, writer); writer.skip_double(); return error; } // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 // To future reader: we'd love if someone found a better way, or at least could explain this result! if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { // // Important: smallest_power is such that it leads to a zero value. // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero // so something x 10^-343 goes to zero, but not so with something x 10^-342. static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); // if((exponent < simdjson::internal::smallest_power) || (i == 0)) { // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); return SUCCESS; } else { // (exponent > largest_power) and (i != 0) // We have, for sure, an infinite value and simdjson refuses to parse infinite values. return INVALID_NUMBER(src); } } double d; if (!compute_float_64(exponent, i, negative, d)) { // we are almost never going to get here. if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } } WRITE_DOUBLE(d, src, writer); return SUCCESS; } // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING template simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } #else // parse the number at src // define JSON_TEST_NUMBERS for unit testing // // It is assumed that the number is followed by a structural ({,},],[) character // or a white space character. If that is not the case (e.g., when the JSON // document is made of a single number), then it is necessary to copy the // content and append a space before calling this function. // // Our objective is accurate parsing (ULP of 0) at high speed. template simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // bool negative = (*src == '-'); const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } // // Handle floats if there is a . or e (or both) // int64_t exponent = 0; bool is_float = false; if ('.' == *p) { is_float = true; ++p; SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); digit_count = int(p - start_digits); // used later to guard against overflows } if (('e' == *p) || ('E' == *p)) { is_float = true; ++p; SIMDJSON_TRY( parse_exponent(src, p, exponent) ); } if (is_float) { const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); if (dirty_end) { return INVALID_NUMBER(src); } return SUCCESS; } // The longest negative 64-bit number is 19 digits. // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. size_t longest_digit_count = negative ? 19 : 20; if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } if (digit_count == longest_digit_count) { if (negative) { // Anything negative above INT64_MAX+1 is invalid if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } WRITE_INTEGER(~i+1, src, writer); if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } } // Write unsigned if it doesn't fit in a signed integer. if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { WRITE_INTEGER(negative ? (~i+1) : i, src, writer); } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; } // Inlineable functions namespace { // This table can be used to characterize the final character of an integer // string. For JSON structural character and allowable white space characters, // we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise // we return NUMBER_ERROR. // Optimization note: we could easily reduce the size of the table by half (to 128) // at the cost of an extra branch. // Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); const uint8_t integer_string_finisher[256] = { NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR}; // Parse any number from 0 to 18,446,744,073,709,551,615 simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { const uint8_t *p = src; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > 20)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } if (digit_count == 20) { // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } return i; } // Parse any number from 0 to 18,446,744,073,709,551,615 // Never read at src_end or beyond simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { const uint8_t *p = src; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while ((p != src_end) && parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > 20)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } if (digit_count == 20) { // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } return i; } // Parse any number from 0 to 18,446,744,073,709,551,615 simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { const uint8_t *p = src + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > 20)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if (*p != '"') { return NUMBER_ERROR; } if (digit_count == 20) { // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. // If we got here, it's a 20 digit number starting with the digit "1". // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller // than 1,553,255,926,290,448,384. // - That is smaller than the smallest possible 20-digit number the user could write: // 10,000,000,000,000,000,000. // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to INT64_MAX. // // Note: we use src[1] and not src[0] because src[0] is the quote character in this // instance. if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } return i; } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while (parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. size_t longest_digit_count = 19; // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > longest_digit_count)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } return negative ? (~i+1) : i; } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // Never read at src_end or beyond simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { // // Check for minus sign // if(src == src_end) { return NUMBER_ERROR; } bool negative = (*src == '-'); const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = p; uint64_t i = 0; while ((p != src_end) && parse_digit(*p, i)) { p++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. size_t longest_digit_count = 19; // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > longest_digit_count)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*p)) { // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } return negative ? (~i+1) : i; } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); src += uint8_t(negative) + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare const uint8_t *const start_digits = src; uint64_t i = 0; while (parse_digit(*src, i)) { src++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(src - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. size_t longest_digit_count = 19; // Optimization note: the compiler can probably merge // ((digit_count == 0) || (digit_count > longest_digit_count)) // into a single branch since digit_count is unsigned. if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... // if (!jsoncharutils::is_structural_or_whitespace(*src)) { // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: if(*src != '"') { return NUMBER_ERROR; } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } return negative ? (~i+1) : i; } simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); src += uint8_t(negative); // // Parse the integer part. // uint64_t i = 0; const uint8_t *p = src; p += parse_digit(*p, i); bool leading_zero = (i == 0); while (parse_digit(*p, i)) { p++; } // no integer digits, or 0123 (zero must be solo) if ( p == src ) { return INCORRECT_TYPE; } if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } // // Parse the decimal part. // int64_t exponent = 0; bool overflow; if (simdjson_likely(*p == '.')) { p++; const uint8_t *start_decimal_digits = p; if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits p++; while (parse_digit(*p, i)) { p++; } exponent = -(p - start_decimal_digits); // Overflow check. More than 19 digits (minus the decimal) may be overflow. overflow = p-src-1 > 19; if (simdjson_unlikely(overflow && leading_zero)) { // Skip leading 0.00000 and see if it still overflows const uint8_t *start_digits = src + 2; while (*start_digits == '0') { start_digits++; } overflow = p-start_digits > 19; } } else { overflow = p-src > 19; } // // Parse the exponent // if (*p == 'e' || *p == 'E') { p++; bool exp_neg = *p == '-'; p += exp_neg || *p == '+'; uint64_t exp = 0; const uint8_t *start_exp_digits = p; while (parse_digit(*p, exp)) { p++; } // no exp digits, or 20+ exp digits if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } exponent += exp_neg ? 0-exp : exp; } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; // // Assemble (or slow-parse) the float // double d; if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return (*src == '-'); } simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { bool negative = (*src == '-'); src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } return false; } simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { bool negative = (*src == '-'); src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } if (jsoncharutils::is_structural_or_whitespace(*p)) { // We have an integer. // If the number is negative and valid, it must be a signed integer. if(negative) { return number_type::signed_integer; } // We want values larger or equal to 9223372036854775808 to be unsigned // integers, and the other values to be signed integers. int digit_count = int(p - src); if(digit_count >= 19) { const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { return number_type::unsigned_integer; } } return number_type::signed_integer; } // Hopefully, we have 'e' or 'E' or '.'. return number_type::floating_point_number; } // Never read at src_end or beyond simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { if(src == src_end) { return NUMBER_ERROR; } // // Check for minus sign // bool negative = (*src == '-'); src += uint8_t(negative); // // Parse the integer part. // uint64_t i = 0; const uint8_t *p = src; if(p == src_end) { return NUMBER_ERROR; } p += parse_digit(*p, i); bool leading_zero = (i == 0); while ((p != src_end) && parse_digit(*p, i)) { p++; } // no integer digits, or 0123 (zero must be solo) if ( p == src ) { return INCORRECT_TYPE; } if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } // // Parse the decimal part. // int64_t exponent = 0; bool overflow; if (simdjson_likely((p != src_end) && (*p == '.'))) { p++; const uint8_t *start_decimal_digits = p; if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits p++; while ((p != src_end) && parse_digit(*p, i)) { p++; } exponent = -(p - start_decimal_digits); // Overflow check. More than 19 digits (minus the decimal) may be overflow. overflow = p-src-1 > 19; if (simdjson_unlikely(overflow && leading_zero)) { // Skip leading 0.00000 and see if it still overflows const uint8_t *start_digits = src + 2; while (*start_digits == '0') { start_digits++; } overflow = start_digits-src > 19; } } else { overflow = p-src > 19; } // // Parse the exponent // if ((p != src_end) && (*p == 'e' || *p == 'E')) { p++; if(p == src_end) { return NUMBER_ERROR; } bool exp_neg = *p == '-'; p += exp_neg || *p == '+'; uint64_t exp = 0; const uint8_t *start_exp_digits = p; while ((p != src_end) && parse_digit(*p, exp)) { p++; } // no exp digits, or 20+ exp digits if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } exponent += exp_neg ? 0-exp : exp; } if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; // // Assemble (or slow-parse) the float // double d; if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { return NUMBER_ERROR; } return d; } simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); src += uint8_t(negative) + 1; // // Parse the integer part. // uint64_t i = 0; const uint8_t *p = src; p += parse_digit(*p, i); bool leading_zero = (i == 0); while (parse_digit(*p, i)) { p++; } // no integer digits, or 0123 (zero must be solo) if ( p == src ) { return INCORRECT_TYPE; } if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } // // Parse the decimal part. // int64_t exponent = 0; bool overflow; if (simdjson_likely(*p == '.')) { p++; const uint8_t *start_decimal_digits = p; if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits p++; while (parse_digit(*p, i)) { p++; } exponent = -(p - start_decimal_digits); // Overflow check. More than 19 digits (minus the decimal) may be overflow. overflow = p-src-1 > 19; if (simdjson_unlikely(overflow && leading_zero)) { // Skip leading 0.00000 and see if it still overflows const uint8_t *start_digits = src + 2; while (*start_digits == '0') { start_digits++; } overflow = p-start_digits > 19; } } else { overflow = p-src > 19; } // // Parse the exponent // if (*p == 'e' || *p == 'E') { p++; bool exp_neg = *p == '-'; p += exp_neg || *p == '+'; uint64_t exp = 0; const uint8_t *start_exp_digits = p; while (parse_digit(*p, exp)) { p++; } // no exp digits, or 20+ exp digits if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } exponent += exp_neg ? 0-exp : exp; } if (*p != '"') { return NUMBER_ERROR; } overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; // // Assemble (or slow-parse) the float // double d; if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } } // unnamed namespace #endif // SIMDJSON_SKIPNUMBERPARSING } // namespace numberparsing inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { switch (type) { case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; case number_type::floating_point_number: out << "floating-point number (binary64)"; break; default: SIMDJSON_UNREACHABLE(); } return out; } } // namespace westmere } // namespace simdjson #endif // SIMDJSON_GENERIC_NUMBERPARSING_H /* end file simdjson/generic/numberparsing.h for westmere */ /* including simdjson/generic/implementation_simdjson_result_base-inl.h for westmere: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { // // internal::implementation_simdjson_result_base inline implementation // template simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { error = this->second; if (!error) { value = std::forward>(*this).first; } } template simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { error_code error; std::forward>(*this).tie(value, error); return error; } template simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } #if SIMDJSON_EXCEPTIONS template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return this->first; } template simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { return std::forward>(*this).take_value(); } template simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { if (error()) { throw simdjson_error(error()); } return std::forward(this->first); } template simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { return std::forward>(*this).take_value(); } #endif // SIMDJSON_EXCEPTIONS template simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { return this->first; } template simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { return this->first; } template simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { return std::forward(this->first); } template simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept : first{std::forward(value)}, second{error} {} template simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept : implementation_simdjson_result_base(T{}, error) {} template simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} } // namespace westmere } // namespace simdjson #endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H /* end file simdjson/generic/implementation_simdjson_result_base-inl.h for westmere */ /* end file simdjson/generic/amalgamated.h for westmere */ /* including simdjson/westmere/end.h: #include "simdjson/westmere/end.h" */ /* begin file simdjson/westmere/end.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE SIMDJSON_UNTARGET_REGION #endif /* undefining SIMDJSON_IMPLEMENTATION from "westmere" */ #undef SIMDJSON_IMPLEMENTATION /* end file simdjson/westmere/end.h */ #endif // SIMDJSON_WESTMERE_H /* end file simdjson/westmere.h */ #else #error Unknown SIMDJSON_BUILTIN_IMPLEMENTATION #endif /* undefining SIMDJSON_CONDITIONAL_INCLUDE */ #undef SIMDJSON_CONDITIONAL_INCLUDE #endif // SIMDJSON_BUILTIN_H /* end file simdjson/builtin.h */ /* skipped duplicate #include "simdjson/builtin/base.h" */ /* including simdjson/generic/ondemand/dependencies.h: #include "simdjson/generic/ondemand/dependencies.h" */ /* begin file simdjson/generic/ondemand/dependencies.h */ #ifdef SIMDJSON_CONDITIONAL_INCLUDE #error simdjson/generic/ondemand/dependencies.h must be included before defining SIMDJSON_CONDITIONAL_INCLUDE! #endif #ifndef SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H #define SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H // Internal headers needed for ondemand generics. // All includes not under simdjson/generic/ondemand must be here! // Otherwise, amalgamation will fail. /* skipped duplicate #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ /* skipped duplicate #include "simdjson/implementation.h" */ /* skipped duplicate #include "simdjson/padded_string.h" */ /* skipped duplicate #include "simdjson/padded_string_view.h" */ /* skipped duplicate #include "simdjson/internal/dom_parser_implementation.h" */ #endif // SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H /* end file simdjson/generic/ondemand/dependencies.h */ /* defining SIMDJSON_CONDITIONAL_INCLUDE */ #define SIMDJSON_CONDITIONAL_INCLUDE #if SIMDJSON_BUILTIN_IMPLEMENTATION_IS(arm64) /* including simdjson/arm64/ondemand.h: #include "simdjson/arm64/ondemand.h" */ /* begin file simdjson/arm64/ondemand.h */ #ifndef SIMDJSON_ARM64_ONDEMAND_H #define SIMDJSON_ARM64_ONDEMAND_H /* including simdjson/arm64/begin.h: #include "simdjson/arm64/begin.h" */ /* begin file simdjson/arm64/begin.h */ /* defining SIMDJSON_IMPLEMENTATION to "arm64" */ #define SIMDJSON_IMPLEMENTATION arm64 /* including simdjson/arm64/base.h: #include "simdjson/arm64/base.h" */ /* begin file simdjson/arm64/base.h */ #ifndef SIMDJSON_ARM64_BASE_H #define SIMDJSON_ARM64_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { /** * Implementation for NEON (ARMv8). */ namespace arm64 { class implementation; namespace { namespace simd { template struct simd8; template struct simd8x64; } // namespace simd } // unnamed namespace } // namespace arm64 } // namespace simdjson #endif // SIMDJSON_ARM64_BASE_H /* end file simdjson/arm64/base.h */ /* including simdjson/arm64/intrinsics.h: #include "simdjson/arm64/intrinsics.h" */ /* begin file simdjson/arm64/intrinsics.h */ #ifndef SIMDJSON_ARM64_INTRINSICS_H #define SIMDJSON_ARM64_INTRINSICS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ // This should be the correct header whether // you use visual studio or other compilers. #include static_assert(sizeof(uint8x16_t) <= simdjson::SIMDJSON_PADDING, "insufficient padding for arm64"); #endif // SIMDJSON_ARM64_INTRINSICS_H /* end file simdjson/arm64/intrinsics.h */ /* including simdjson/arm64/bitmanipulation.h: #include "simdjson/arm64/bitmanipulation.h" */ /* begin file simdjson/arm64/bitmanipulation.h */ #ifndef SIMDJSON_ARM64_BITMANIPULATION_H #define SIMDJSON_ARM64_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace { // We sometimes call trailing_zero on inputs that are zero, // but the algorithms do not end up using the returned value. // Sadly, sanitizers are not smart enough to figure it out. SIMDJSON_NO_SANITIZE_UNDEFINED // This function can be used safely even if not all bytes have been // initialized. // See issue https://github.com/simdjson/simdjson/issues/1965 SIMDJSON_NO_SANITIZE_MEMORY simdjson_inline int trailing_zeroes(uint64_t input_num) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long ret; // Search the mask data from least significant bit (LSB) // to the most significant bit (MSB) for a set bit (1). _BitScanForward64(&ret, input_num); return (int)ret; #else // SIMDJSON_REGULAR_VISUAL_STUDIO return __builtin_ctzll(input_num); #endif // SIMDJSON_REGULAR_VISUAL_STUDIO } /* result might be undefined when input_num is zero */ simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return input_num & (input_num-1); } /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long leading_zero = 0; // Search the mask data from most significant bit (MSB) // to least significant bit (LSB) for a set bit (1). if (_BitScanReverse64(&leading_zero, input_num)) return (int)(63 - leading_zero); else return 64; #else return __builtin_clzll(input_num); #endif// SIMDJSON_REGULAR_VISUAL_STUDIO } /* result might be undefined when input_num is zero */ simdjson_inline int count_ones(uint64_t input_num) { return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); } #if defined(__GNUC__) // catches clang and gcc /** * ARM has a fast 64-bit "bit reversal function" that is handy. However, * it is not generally available as an intrinsic function under Visual * Studio (though this might be changing). Even under clang/gcc, we * apparently need to invoke inline assembly. */ /* * We use SIMDJSON_PREFER_REVERSE_BITS as a hint that algorithms that * work well with bit reversal may use it. */ #define SIMDJSON_PREFER_REVERSE_BITS 1 /* reverse the bits */ simdjson_inline uint64_t reverse_bits(uint64_t input_num) { uint64_t rev_bits; __asm("rbit %0, %1" : "=r"(rev_bits) : "r"(input_num)); return rev_bits; } /** * Flips bit at index 63 - lz. Thus if you have 'leading_zeroes' leading zeroes, * then this will set to zero the leading bit. It is possible for leading_zeroes to be * greating or equal to 63 in which case we trigger undefined behavior, but the output * of such undefined behavior is never used. **/ SIMDJSON_NO_SANITIZE_UNDEFINED simdjson_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) { return rev_bits ^ (uint64_t(0x8000000000000000) >> leading_zeroes); } #endif simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO *result = value1 + value2; return *result < value1; #else return __builtin_uaddll_overflow(value1, value2, reinterpret_cast(result)); #endif } } // unnamed namespace } // namespace arm64 } // namespace simdjson #endif // SIMDJSON_ARM64_BITMANIPULATION_H /* end file simdjson/arm64/bitmanipulation.h */ /* including simdjson/arm64/bitmask.h: #include "simdjson/arm64/bitmask.h" */ /* begin file simdjson/arm64/bitmask.h */ #ifndef SIMDJSON_ARM64_BITMASK_H #define SIMDJSON_ARM64_BITMASK_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace { // // Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. // // For example, prefix_xor(00100100) == 00011100 // simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { ///////////// // We could do this with PMULL, but it is apparently slow. // //#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension //return vmull_p64(-1ULL, bitmask); //#else // Analysis by @sebpop: // When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out // in between other vector code, so effectively the extra cycles of the sequence do not matter // because the GPR units are idle otherwise and the critical path is on the FP side. // Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 ) // and FP->GPR (2 cycles on N1 and 5 cycles on A72.) /////////// bitmask ^= bitmask << 1; bitmask ^= bitmask << 2; bitmask ^= bitmask << 4; bitmask ^= bitmask << 8; bitmask ^= bitmask << 16; bitmask ^= bitmask << 32; return bitmask; } } // unnamed namespace } // namespace arm64 } // namespace simdjson #endif /* end file simdjson/arm64/bitmask.h */ /* including simdjson/arm64/numberparsing_defs.h: #include "simdjson/arm64/numberparsing_defs.h" */ /* begin file simdjson/arm64/numberparsing_defs.h */ #ifndef SIMDJSON_ARM64_NUMBERPARSING_DEFS_H #define SIMDJSON_ARM64_NUMBERPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #if _M_ARM64 // __umulh requires intrin.h #include #endif // _M_ARM64 namespace simdjson { namespace arm64 { namespace numberparsing { // we don't have SSE, so let us use a scalar function // credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ /** @private */ static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { uint64_t val; std::memcpy(&val, chars, sizeof(uint64_t)); val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); } simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { internal::value128 answer; #if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS #ifdef _M_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); answer.low = value1 * value2; #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 #endif // _M_ARM64 #else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); #endif return answer; } } // namespace numberparsing } // namespace arm64 } // namespace simdjson #define SIMDJSON_SWAR_NUMBER_PARSING 1 #endif // SIMDJSON_ARM64_NUMBERPARSING_DEFS_H /* end file simdjson/arm64/numberparsing_defs.h */ /* including simdjson/arm64/simd.h: #include "simdjson/arm64/simd.h" */ /* begin file simdjson/arm64/simd.h */ #ifndef SIMDJSON_ARM64_SIMD_H #define SIMDJSON_ARM64_SIMD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace { namespace simd { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO namespace { // Start of private section with Visual Studio workaround #ifndef simdjson_make_uint8x16_t #define simdjson_make_uint8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ x13, x14, x15, x16) \ ([=]() { \ uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ x9, x10, x11, x12, x13, x14, x15, x16}; \ return vld1q_u8(array); \ }()) #endif #ifndef simdjson_make_int8x16_t #define simdjson_make_int8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ x13, x14, x15, x16) \ ([=]() { \ int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ x9, x10, x11, x12, x13, x14, x15, x16}; \ return vld1q_s8(array); \ }()) #endif #ifndef simdjson_make_uint8x8_t #define simdjson_make_uint8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ ([=]() { \ uint8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ return vld1_u8(array); \ }()) #endif #ifndef simdjson_make_int8x8_t #define simdjson_make_int8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ ([=]() { \ int8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ return vld1_s8(array); \ }()) #endif #ifndef simdjson_make_uint16x8_t #define simdjson_make_uint16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ ([=]() { \ uint16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ return vld1q_u16(array); \ }()) #endif #ifndef simdjson_make_int16x8_t #define simdjson_make_int16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ ([=]() { \ int16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ return vld1q_s16(array); \ }()) #endif // End of private section with Visual Studio workaround } // namespace #endif // SIMDJSON_REGULAR_VISUAL_STUDIO template struct simd8; // // Base class of simd8 and simd8, both of which use uint8x16_t internally. // template> struct base_u8 { uint8x16_t value; static const int SIZE = sizeof(value); // Conversion from/to SIMD register simdjson_inline base_u8(const uint8x16_t _value) : value(_value) {} simdjson_inline operator const uint8x16_t&() const { return this->value; } simdjson_inline operator uint8x16_t&() { return this->value; } // Bit operations simdjson_inline simd8 operator|(const simd8 other) const { return vorrq_u8(*this, other); } simdjson_inline simd8 operator&(const simd8 other) const { return vandq_u8(*this, other); } simdjson_inline simd8 operator^(const simd8 other) const { return veorq_u8(*this, other); } simdjson_inline simd8 bit_andnot(const simd8 other) const { return vbicq_u8(*this, other); } simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } simdjson_inline simd8& operator|=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast | other; return *this_cast; } simdjson_inline simd8& operator&=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast & other; return *this_cast; } simdjson_inline simd8& operator^=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast ^ other; return *this_cast; } friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return vceqq_u8(lhs, rhs); } template simdjson_inline simd8 prev(const simd8 prev_chunk) const { return vextq_u8(prev_chunk, *this, 16 - N); } }; // SIMD byte mask type (returned by things like eq and gt) template<> struct simd8: base_u8 { typedef uint16_t bitmask_t; typedef uint32_t bitmask2_t; static simdjson_inline simd8 splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); } simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} // False constructor simdjson_inline simd8() : simd8(vdupq_n_u8(0)) {} // Splat constructor simdjson_inline simd8(bool _value) : simd8(splat(_value)) {} // We return uint32_t instead of uint16_t because that seems to be more efficient for most // purposes (cutting it down to uint16_t costs performance in some compilers). simdjson_inline uint32_t to_bitmask() const { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO const uint8x16_t bit_mask = simdjson_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); #else const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; #endif auto minput = *this & bit_mask; uint8x16_t tmp = vpaddq_u8(minput, minput); tmp = vpaddq_u8(tmp, tmp); tmp = vpaddq_u8(tmp, tmp); return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); } simdjson_inline bool any() const { return vmaxvq_u8(*this) != 0; } }; // Unsigned bytes template<> struct simd8: base_u8 { static simdjson_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); } static simdjson_inline uint8x16_t zero() { return vdupq_n_u8(0); } static simdjson_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); } simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} // Zero constructor simdjson_inline simd8() : simd8(zero()) {} // Array constructor simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} // Splat constructor simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Member-by-member initialization #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) : simd8(simdjson_make_uint8x16_t( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 )) {} #else simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) : simd8(uint8x16_t{ v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 }) {} #endif // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Store to array simdjson_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); } // Saturated math simdjson_inline simd8 saturating_add(const simd8 other) const { return vqaddq_u8(*this, other); } simdjson_inline simd8 saturating_sub(const simd8 other) const { return vqsubq_u8(*this, other); } // Addition/subtraction are the same for signed and unsigned simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_u8(*this, other); } simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_u8(*this, other); } simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } // Order-specific operations simdjson_inline uint8_t max_val() const { return vmaxvq_u8(*this); } simdjson_inline uint8_t min_val() const { return vminvq_u8(*this); } simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_u8(*this, other); } simdjson_inline simd8 min_val(const simd8 other) const { return vminq_u8(*this, other); } simdjson_inline simd8 operator<=(const simd8 other) const { return vcleq_u8(*this, other); } simdjson_inline simd8 operator>=(const simd8 other) const { return vcgeq_u8(*this, other); } simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_u8(*this, other); } simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_u8(*this, other); } // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. simdjson_inline simd8 gt_bits(const simd8 other) const { return simd8(*this > other); } // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. simdjson_inline simd8 lt_bits(const simd8 other) const { return simd8(*this < other); } // Bit-specific operations simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } template simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } template simdjson_inline simd8 shl() const { return vshlq_n_u8(*this, N); } // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) template simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return lookup_table.apply_lookup_16_to(*this); } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes // get written. // Design consideration: it seems like a function with the // signature simd8 compress(uint16_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template simdjson_inline void compress(uint16_t mask, L * output) const { using internal::thintable_epi8; using internal::BitsSetTable256mul2; using internal::pshufb_combine_table; // this particular implementation was inspired by work done by @animetosho // we do it in two steps, first 8 bytes and then second 8 bytes uint8_t mask1 = uint8_t(mask); // least significant 8 bits uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits // next line just loads the 64-bit values thintable_epi8[mask1] and // thintable_epi8[mask2] into a 128-bit register, using only // two instructions on most compilers. uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]}; uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64); // we increment by 0x08 the second half of the mask #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO uint8x16_t inc = simdjson_make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); #else uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; #endif shufmask = vaddq_u8(shufmask, inc); // this is the version "nearly pruned" uint8x16_t pruned = vqtbl1q_u8(*this, shufmask); // we still need to put the two halves together. // we compute the popcount of the first half: int pop1 = BitsSetTable256mul2[mask1]; // then load the corresponding mask, what it does is to write // only the first pop1 bytes from the first 8 bytes, and then // it fills in with the bytes from the second 8 bytes + some filling // at the end. uint8x16_t compactmask = vld1q_u8(reinterpret_cast(pshufb_combine_table + pop1 * 8)); uint8x16_t answer = vqtbl1q_u8(pruned, compactmask); vst1q_u8(reinterpret_cast(output), answer); } // Copies all bytes corresponding to a 0 in the low half of the mask (interpreted as a // bitset) to output1, then those corresponding to a 0 in the high half to output2. template simdjson_inline void compress_halves(uint16_t mask, L *output1, L *output2) const { using internal::thintable_epi8; uint8_t mask1 = uint8_t(mask); // least significant 8 bits uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits uint8x8_t compactmask1 = vcreate_u8(thintable_epi8[mask1]); uint8x8_t compactmask2 = vcreate_u8(thintable_epi8[mask2]); // we increment by 0x08 the second half of the mask #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO uint8x8_t inc = simdjson_make_uint8x8_t(0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); #else uint8x8_t inc = {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; #endif compactmask2 = vadd_u8(compactmask2, inc); // store each result (with the second store possibly overlapping the first) vst1_u8((uint8_t*)output1, vqtbl1_u8(*this, compactmask1)); vst1_u8((uint8_t*)output2, vqtbl1_u8(*this, compactmask2)); } template simdjson_inline simd8 lookup_16( L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, L replace12, L replace13, L replace14, L replace15) const { return lookup_16(simd8::repeat_16( replace0, replace1, replace2, replace3, replace4, replace5, replace6, replace7, replace8, replace9, replace10, replace11, replace12, replace13, replace14, replace15 )); } template simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { return vqtbl1q_u8(*this, simd8(original)); } }; // Signed bytes template<> struct simd8 { int8x16_t value; static simdjson_inline simd8 splat(int8_t _value) { return vmovq_n_s8(_value); } static simdjson_inline simd8 zero() { return vdupq_n_s8(0); } static simdjson_inline simd8 load(const int8_t values[16]) { return vld1q_s8(values); } // Conversion from/to SIMD register simdjson_inline simd8(const int8x16_t _value) : value{_value} {} simdjson_inline operator const int8x16_t&() const { return this->value; } simdjson_inline operator int8x16_t&() { return this->value; } // Zero constructor simdjson_inline simd8() : simd8(zero()) {} // Splat constructor simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} // Member-by-member initialization #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) : simd8(simdjson_make_int8x16_t( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 )) {} #else simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) : simd8(int8x16_t{ v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 }) {} #endif // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Store to array simdjson_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); } // Explicit conversion to/from unsigned // // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same type. // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14 // and relatively ugly and hard to read. #ifndef SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {} #endif simdjson_inline explicit operator simd8() const { return vreinterpretq_u8_s8(this->value); } // Math simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_s8(*this, other); } simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_s8(*this, other); } simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } // Order-sensitive comparisons simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_s8(*this, other); } simdjson_inline simd8 min_val(const simd8 other) const { return vminq_s8(*this, other); } simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_s8(*this, other); } simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_s8(*this, other); } simdjson_inline simd8 operator==(const simd8 other) const { return vceqq_s8(*this, other); } template simdjson_inline simd8 prev(const simd8 prev_chunk) const { return vextq_s8(prev_chunk, *this, 16 - N); } // Perform a lookup assuming no value is larger than 16 template simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return lookup_table.apply_lookup_16_to(*this); } template simdjson_inline simd8 lookup_16( L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, L replace12, L replace13, L replace14, L replace15) const { return lookup_16(simd8::repeat_16( replace0, replace1, replace2, replace3, replace4, replace5, replace6, replace7, replace8, replace9, replace10, replace11, replace12, replace13, replace14, replace15 )); } template simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { return vqtbl1q_s8(*this, simd8(original)); } }; template struct simd8x64 { static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block."); const simd8 chunks[NUM_CHUNKS]; simd8x64(const simd8x64& o) = delete; // no copy allowed simd8x64& operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr+sizeof(simd8)*0); this->chunks[1].store(ptr+sizeof(simd8)*1); this->chunks[2].store(ptr+sizeof(simd8)*2); this->chunks[3].store(ptr+sizeof(simd8)*3); } simdjson_inline simd8 reduce_or() const { return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); } simdjson_inline uint64_t compress(uint64_t mask, T * output) const { uint64_t popcounts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); // compute the prefix sum of the popcounts of each byte uint64_t offsets = popcounts * 0x0101010101010101; this->chunks[0].compress_halves(uint16_t(mask), output, &output[popcounts & 0xFF]); this->chunks[1].compress_halves(uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF], &output[(offsets >> 16) & 0xFF]); this->chunks[2].compress_halves(uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF], &output[(offsets >> 32) & 0xFF]); this->chunks[3].compress_halves(uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF], &output[(offsets >> 48) & 0xFF]); return offsets >> 56; } simdjson_inline uint64_t to_bitmask() const { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO const uint8x16_t bit_mask = simdjson_make_uint8x16_t( 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 ); #else const uint8x16_t bit_mask = { 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 }; #endif // Add each of the elements next to each other, successively, to stuff each 8 byte mask into one. uint8x16_t sum0 = vpaddq_u8(this->chunks[0] & bit_mask, this->chunks[1] & bit_mask); uint8x16_t sum1 = vpaddq_u8(this->chunks[2] & bit_mask, this->chunks[3] & bit_mask); sum0 = vpaddq_u8(sum0, sum1); sum0 = vpaddq_u8(sum0, sum0); return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); } simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] == mask, this->chunks[1] == mask, this->chunks[2] == mask, this->chunks[3] == mask ).to_bitmask(); } simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] <= mask, this->chunks[1] <= mask, this->chunks[2] <= mask, this->chunks[3] <= mask ).to_bitmask(); } }; // struct simd8x64 } // namespace simd } // unnamed namespace } // namespace arm64 } // namespace simdjson #endif // SIMDJSON_ARM64_SIMD_H /* end file simdjson/arm64/simd.h */ /* including simdjson/arm64/stringparsing_defs.h: #include "simdjson/arm64/stringparsing_defs.h" */ /* begin file simdjson/arm64/stringparsing_defs.h */ #ifndef SIMDJSON_ARM64_STRINGPARSING_DEFS_H #define SIMDJSON_ARM64_STRINGPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/simd.h" */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace { using namespace simd; // Holds backslashes and quotes locations. struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return bs_bits != 0; } simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } uint32_t bs_bits; uint32_t quote_bits; }; // struct backslash_and_quote simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); simd8 v0(src); simd8 v1(src + sizeof(v0)); v0.store(dst); v1.store(dst + sizeof(v0)); // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on ARM; therefore, we // smash them together into a 64-byte mask and get the bitmask from there. uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); return { uint32_t(bs_and_quote), // bs_bits uint32_t(bs_and_quote >> 32) // quote_bits }; } } // unnamed namespace } // namespace arm64 } // namespace simdjson #endif // SIMDJSON_ARM64_STRINGPARSING_DEFS_H /* end file simdjson/arm64/stringparsing_defs.h */ #define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 /* end file simdjson/arm64/begin.h */ /* including simdjson/generic/ondemand/amalgamated.h for arm64: #include "simdjson/generic/ondemand/amalgamated.h" */ /* begin file simdjson/generic/ondemand/amalgamated.h for arm64 */ #if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) #error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! #endif // Stuff other things depend on /* including simdjson/generic/ondemand/base.h for arm64: #include "simdjson/generic/ondemand/base.h" */ /* begin file simdjson/generic/ondemand/base.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { /** * A fast, simple, DOM-like interface that parses JSON as you use it. * * Designed for maximum speed and a lower memory profile. */ namespace ondemand { /** Represents the depth of a JSON value (number of nested arrays/objects). */ using depth_t = int32_t; /** @copydoc simdjson::arm64::number_type */ using number_type = simdjson::arm64::number_type; /** @private Position in the JSON buffer indexes */ using token_position = const uint32_t *; class array; class array_iterator; class document; class document_reference; class document_stream; class field; class json_iterator; enum class json_type; struct number; class object; class object_iterator; class parser; class raw_json_string; class token_iterator; class value; class value_iterator; } // namespace ondemand } // namespace arm64 } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H /* end file simdjson/generic/ondemand/base.h for arm64 */ /* including simdjson/generic/ondemand/value_iterator.h for arm64: #include "simdjson/generic/ondemand/value_iterator.h" */ /* begin file simdjson/generic/ondemand/value_iterator.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { /** * Iterates through a single JSON value at a particular depth. * * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects * the caller to call the right ones. * * @private This is not intended for external use. */ class value_iterator { protected: /** The underlying JSON iterator */ json_iterator *_json_iter{}; /** The depth of this value */ depth_t _depth{}; /** * The starting token index for this value */ token_position _start_position{}; public: simdjson_inline value_iterator() noexcept = default; /** * Denote that we're starting a document. */ simdjson_inline void start_document() noexcept; /** * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. * * Optimized for scalars. */ simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; /** * Tell whether the iterator is at the EOF mark */ simdjson_inline bool at_end() const noexcept; /** * Tell whether the iterator is at the start of the value */ simdjson_inline bool at_start() const noexcept; /** * Tell whether the value is open--if the value has not been used, or the array/object is still open. */ simdjson_inline bool is_open() const noexcept; /** * Tell whether the value is at an object's first field (just after the {). */ simdjson_inline bool at_first_field() const noexcept; /** * Abandon all iteration. */ simdjson_inline void abandon() noexcept; /** * Get the child value as a value_iterator. */ simdjson_inline value_iterator child_value() const noexcept; /** * Get the depth of this value. */ simdjson_inline int32_t depth() const noexcept; /** * Get the JSON type of this value. * * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result type() const noexcept; /** * @addtogroup object Object iteration * * Methods to iterate and find object fields. These methods generally *assume* the value is * actually an object; the caller is responsible for keeping track of that fact. * * @{ */ /** * Start an object iteration. * * @returns Whether the object had any fields (returns false for empty). * @error INCORRECT_TYPE if there is no opening { */ simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; /** * Start an object iteration from the root. * * @returns Whether the object had any fields (returns false for empty). * @error INCORRECT_TYPE if there is no opening { * @error TAPE_ERROR if there is no matching } at end of document */ simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; /** * Checks whether an object could be started from the root. May be called by start_root_object. * * @returns SUCCESS if it is possible to safely start an object from the root (document level). * @error INCORRECT_TYPE if there is no opening { * @error TAPE_ERROR if there is no matching } at end of document */ simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; /** * Start an object iteration after the user has already checked and moved past the {. * * Does not move the iterator unless the object is empty ({}). * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** * Start an object iteration from the root, after the user has already checked and moved past the {. * * Does not move the iterator unless the object is empty ({}). * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; /** * Moves to the next field in an object. * * Looks for , and }. If } is found, the object is finished and the iterator advances past it. * Otherwise, it advances to the next value. * * @return whether there is another field in the object. * @error TAPE_ERROR If there is a comma missing between fields. * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. */ simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; /** * Get the current field's key. */ simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; /** * Pass the : in the field and move to its value. */ simdjson_warn_unused simdjson_inline error_code field_value() noexcept; /** * Find the next field with the given key. * * Assumes you have called next_field() or otherwise matched the previous value. * * This means the iterator must be sitting at the next key: * * ``` * { "a": 1, "b": 2 } * ^ * ``` * * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; /** * Find the next field with the given key, *without* unescaping. This assumes object order: it * will not find the field if it was already passed when looking for some *other* field. * * Assumes you have called next_field() or otherwise matched the previous value. * * This means the iterator must be sitting at the next key: * * ``` * { "a": 1, "b": 2 } * ^ * ``` * * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; /** * Find the field with the given key without regard to order, and *without* unescaping. * * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. * * Assumes you have called next_field() or otherwise matched the previous value. * * This means the iterator must be sitting at the next key: * * ``` * { "a": 1, "b": 2 } * ^ * ``` * * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; /** @} */ /** * @addtogroup array Array iteration * Methods to iterate over array elements. These methods generally *assume* the value is actually * an object; the caller is responsible for keeping track of that fact. * @{ */ /** * Check for an opening [ and start an array iteration. * * @returns Whether the array had any elements (returns false for empty). * @error INCORRECT_TYPE If there is no [. */ simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; /** * Check for an opening [ and start an array iteration while at the root. * * @returns Whether the array had any elements (returns false for empty). * @error INCORRECT_TYPE If there is no [. * @error TAPE_ERROR if there is no matching ] at end of document */ simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; /** * Checks whether an array could be started from the root. May be called by start_root_array. * * @returns SUCCESS if it is possible to safely start an array from the root (document level). * @error INCORRECT_TYPE If there is no [. * @error TAPE_ERROR if there is no matching ] at end of document */ simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; /** * Start an array iteration, after the user has already checked and moved past the [. * * Does not move the iterator unless the array is empty ([]). * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** * Start an array iteration from the root, after the user has already checked and moved past the [. * * Does not move the iterator unless the array is empty ([]). * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; /** * Moves to the next element in an array. * * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. * Otherwise, it advances to the next value. * * @return Whether there is another element in the array. * @error TAPE_ERROR If there is a comma missing between elements. */ simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; /** * Get a child value iterator. */ simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; /** @} */ /** * @defgroup scalar Scalar values * @addtogroup scalar * @{ */ simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; template simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; simdjson_warn_unused simdjson_inline bool is_negative() noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; template simdjson_warn_unused simdjson_inline error_code get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; simdjson_inline error_code error() const noexcept; simdjson_inline uint8_t *&string_buf_loc() noexcept; simdjson_inline const json_iterator &json_iter() const noexcept; simdjson_inline json_iterator &json_iter() noexcept; simdjson_inline void assert_is_valid() const noexcept; simdjson_inline bool is_valid() const noexcept; /** @} */ protected: /** * Restarts an array iteration. * @returns Whether the array has any elements (returns false for empty). */ simdjson_inline simdjson_result reset_array() noexcept; /** * Restarts an object iteration. * @returns Whether the object has any fields (returns false for empty). */ simdjson_inline simdjson_result reset_object() noexcept; /** * move_at_start(): moves us so that we are pointing at the beginning of * the container. It updates the index so that at_start() is true and it * syncs the depth. The user can then create a new container instance. * * Usage: used with value::count_elements(). **/ simdjson_inline void move_at_start() noexcept; /** * move_at_container_start(): moves us so that we are pointing at the beginning of * the container so that assert_at_container_start() passes. * * Usage: used with reset_array() and reset_object(). **/ simdjson_inline void move_at_container_start() noexcept; /* Useful for debugging and logging purposes. */ inline std::string to_string() const noexcept; simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; simdjson_inline const uint8_t *peek_start() const noexcept; simdjson_inline uint32_t peek_start_length() const noexcept; /** * The general idea of the advance_... methods and the peek_* methods * is that you first peek and check that you have desired type. If you do, * and only if you do, then you advance. * * We used to unconditionally advance. But this made reasoning about our * current state difficult. * Suppose you always advance. Look at the 'value' matching the key * "shadowable" in the following example... * * ({"globals":{"a":{"shadowable":[}}}}) * * If the user thinks it is a Boolean and asks for it, then we check the '[', * decide it is not a Boolean, but still move into the next character ('}'). Now * we are left pointing at '}' right after a '['. And we have not yet reported * an error, only that we do not have a Boolean. * * If, instead, you just stand your ground until it is content that you know, then * you will only even move beyond the '[' if the user tells you that you have an * array. So you will be at the '}' character inside the array and, hopefully, you * will then catch the error because an array cannot start with '}', but the code * processing Boolean values does not know this. * * So the contract is: first call 'peek_...' and then call 'advance_...' only * if you have determined that it is a type you can handle. * * Unfortunately, it makes the code more verbose, longer and maybe more error prone. */ simdjson_inline void advance_scalar(const char *type) noexcept; simdjson_inline void advance_root_scalar(const char *type) noexcept; simdjson_inline void advance_non_root_scalar(const char *type) noexcept; simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; simdjson_inline error_code end_container() noexcept; /** * Advance to a place expecting a value (increasing depth). * * @return The current token (the one left behind). * @error TAPE_ERROR If the document ended early. */ simdjson_inline simdjson_result advance_to_value() noexcept; simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; simdjson_inline bool is_at_start() const noexcept; /** * is_at_iterator_start() returns true on an array or object after it has just been * created, whether the instance is empty or not. * * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) */ simdjson_inline bool is_at_iterator_start() const noexcept; /** * Assuming that we are within an object, this returns true if we * are pointing at a key. * * Usage: the skip_child() method should never be used while we are pointing * at a key inside an object. */ simdjson_inline bool is_at_key() const noexcept; inline void assert_at_start() const noexcept; inline void assert_at_container_start() const noexcept; inline void assert_at_root() const noexcept; inline void assert_at_child() const noexcept; inline void assert_at_next() const noexcept; inline void assert_at_non_root_start() const noexcept; /** Get the starting position of this value */ simdjson_inline token_position start_position() const noexcept; /** @copydoc error_code json_iterator::position() const noexcept; */ simdjson_inline token_position position() const noexcept; /** @copydoc error_code json_iterator::end_position() const noexcept; */ simdjson_inline token_position last_position() const noexcept; /** @copydoc error_code json_iterator::end_position() const noexcept; */ simdjson_inline token_position end_position() const noexcept; /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ simdjson_inline error_code report_error(error_code error, const char *message) noexcept; friend class document; friend class object; friend class array; friend class value; }; // value_iterator } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public arm64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(arm64::ondemand::value_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H /* end file simdjson/generic/ondemand/value_iterator.h for arm64 */ /* including simdjson/generic/ondemand/value.h for arm64: #include "simdjson/generic/ondemand/value.h" */ /* begin file simdjson/generic/ondemand/value.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { /** * An ephemeral JSON value returned during iteration. It is only valid for as long as you do * not access more data in the JSON document. */ class value { public: /** * Create a new invalid value. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline value() noexcept = default; /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); } /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template simdjson_inline error_code get(T &out) noexcept; /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @returns INCORRECT_TYPE If the JSON value is not an array. */ simdjson_inline simdjson_result get_array() noexcept; /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @returns INCORRECT_TYPE If the JSON value is not an object. */ simdjson_inline simdjson_result get_object() noexcept; /** * Cast this JSON value to an unsigned integer. * * @returns A unsigned 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64() noexcept; /** * Cast this JSON value (inside string) to a unsigned integer. * * @returns A unsigned 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64_in_string() noexcept; /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64() noexcept; /** * Cast this JSON value (inside string) to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64_in_string() noexcept; /** * Cast this JSON value to a double. * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double() noexcept; /** * Cast this JSON value (inside string) to a double * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double_in_string() noexcept; /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). * * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; /** * Attempts to fill the provided std::string reference with the parsed value of the current string. * * The string is guaranteed to be valid UTF-8. * * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. * We recommend you avoid allocating an std::string unless you need to. * * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a "wobbly" string. * * The string is may not be a valid UTF-8 string. * See https://simonsapin.github.io/wtf-8/ * * Important: a value should be consumed once. Calling get_wobbly_string() twice on the same value * is an error. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_wobbly_string() noexcept; /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_raw_json_string() noexcept; /** * Cast this JSON value to a bool. * * @returns A bool value. * @returns INCORRECT_TYPE if the JSON value is not true or false. */ simdjson_inline simdjson_result get_bool() noexcept; /** * Checks if this JSON value is null. If and only if the value is * null, then it is consumed (we advance). If we find a token that * begins with 'n' but is not 'null', then an error is returned. * * @returns Whether the value is null. * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. */ simdjson_inline simdjson_result is_null() noexcept; #if SIMDJSON_EXCEPTIONS /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. */ simdjson_inline operator array() noexcept(false); /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. */ simdjson_inline operator object() noexcept(false); /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline operator uint64_t() noexcept(false); /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. */ simdjson_inline operator int64_t() noexcept(false); /** * Cast this JSON value to a double. * * @returns A double. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. */ simdjson_inline operator double() noexcept(false); /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator std::string_view() noexcept(false); /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator raw_json_string() noexcept(false); /** * Cast this JSON value to a bool. * * @returns A bool value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. */ simdjson_inline operator bool() noexcept(false); #endif /** * Begin array iteration. * * Part of the std::iterable interface. * * @returns INCORRECT_TYPE If the JSON value is not an array. */ simdjson_inline simdjson_result begin() & noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result end() & noexcept; /** * This method scans the array and counts the number of elements. * The count_elements method should always be called before you have begun * iterating through the array: it is expected that you are pointing at * the beginning of the array. * The runtime complexity is linear in the size of the array. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * Performance hint: You should only call count_elements() as a last * resort as it may require scanning the document twice or more. */ simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the object and counts the number of key-value pairs. * The count_fields method should always be called before you have begun * iterating through the object: it is expected that you are pointing at * the beginning of the object. * The runtime complexity is linear in the size of the object. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an object is empty, it is more performant to use * the is_empty() method on the object instance. * * Performance hint: You should only call count_fields() as a last * resort as it may require scanning the document twice or more. */ simdjson_inline simdjson_result count_fields() & noexcept; /** * Get the value at the given index in the array. This function has linear-time complexity. * This function should only be called once on an array instance since the array iterator is not reset between each call. * * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field(const char *key) noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](const char *key) noexcept; /** * Get the type of this JSON value. It does not validate or consume the value. * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * * @return The type of JSON value (json_type::array, json_type::object, json_type::string, * json_type::number, json_type::boolean, or json_type::null). * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result type() noexcept; /** * Checks whether the value is a scalar (string, number, null, Boolean). * Returns false when there it is an array or object. * * @returns true if the type is string, number, null, Boolean * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result is_scalar() noexcept; /** * Checks whether the value is a negative number. * * @returns true if the number if negative. */ simdjson_inline bool is_negative() noexcept; /** * Checks whether the value is an integer number. Note that * this requires to partially parse the number string. If * the value is determined to be an integer, it may still * not parse properly as an integer in subsequent steps * (e.g., it might overflow). * * Performance note: if you call this function systematically * before parsing a number, you may have fallen for a performance * anti-pattern. * * @returns true if the number if negative. */ simdjson_inline simdjson_result is_integer() noexcept; /** * Determine the number type (integer or floating-point number) as quickly * as possible. This function does not fully validate the input. It is * useful when you only need to classify the numbers, without parsing them. * * If you are planning to retrieve the value or you need full validation, * consider using the get_number() method instead: it will fully parse * and validate the input, and give you access to the type: * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have * an integer greater or equal to 9223372036854775808 * get_number_type() is number_type::signed_integer if we have an * integer that is less than 9223372036854775808 * Otherwise, get_number_type() has value number_type::floating_point_number * * This function requires processing the number string, but it is expected * to be faster than get_number().get_number_type() because it is does not * parse the number value. * * @returns the type of the number */ simdjson_inline simdjson_result get_number_type() noexcept; /** * Attempt to parse an ondemand::number. An ondemand::number may * contain an integer value or a floating-point value, the simdjson * library will autodetect the type. Thus it is a dynamically typed * number. Before accessing the value, you must determine the detected * type. * * number.get_number_type() is number_type::signed_integer if we have * an integer in [-9223372036854775808,9223372036854775808) * You can recover the value by calling number.get_int64() and you * have that number.is_int64() is true. * * number.get_number_type() is number_type::unsigned_integer if we have * an integer in [9223372036854775808,18446744073709551616) * You can recover the value by calling number.get_uint64() and you * have that number.is_uint64() is true. * * Otherwise, number.get_number_type() has value number_type::floating_point_number * and we have a binary64 number. * You can recover the value by calling number.get_double() and you * have that number.is_double() is true. * * You must check the type before accessing the value: it is an error * to call "get_int64()" when number.get_number_type() is not * number_type::signed_integer and when number.is_int64() is false. * * Performance note: this is designed with performance in mind. When * calling 'get_number()', you scan the number string only once, determining * efficiently the type and storing it in an efficient manner. */ simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; /** * Get the raw JSON for this token. * * The string_view will always point into the input buffer. * * The string_view will start at the beginning of the token, and include the entire token * *as well as all spaces until the next token (or EOF).* This means, for example, that a * string token always begins with a " and is always terminated by the final ", possibly * followed by a number of spaces. * * The string_view is *not* null-terminated. However, if this is a scalar (string, number, * boolean, or null), the character after the end of the string_view is guaranteed to be * a non-space token. * * Tokens include: * - { * - [ * - "a string (possibly with UTF-8 or backslashed characters like \\\")". * - -1.2e-100 * - true * - false * - null * * See also value::raw_json(). */ simdjson_inline std::string_view raw_json_token() noexcept; /** * Get a string_view pointing at this value in the JSON document. * If this element is an array or an object, it consumes the array or the object * and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. * If this element is a scalar (string, number, Boolean, null), it returns what * raw_json_token() would return. */ simdjson_inline simdjson_result raw_json() noexcept; /** * Returns the current location in the document if in bounds. */ simdjson_inline simdjson_result current_location() noexcept; /** * Returns the current depth in the document if in bounds. * * E.g., * 0 = finished with document * 1 = document root value (could be [ or {, not yet known) * 2 = , or } inside root array/object * 3 = key or value inside root array/object. */ simdjson_inline int32_t current_depth() const noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard. * * ondemand::parser parser; * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/foo/a/1") == 20 * * It is allowed for a key to be the empty string: * * ondemand::parser parser; * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * * Note that at_pointer() called on the document automatically calls the document's rewind * method between each call. It invalidates all previously accessed arrays, objects and values * that have not been consumed. * * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not * standardized (by RFC 6901). We provide some experimental support for JSON pointers * on non-document instances. Yet it is not the case when calling at_pointer on an array * or an object instance: there is no rewind and no invalidation. * * You may only call at_pointer on an array after it has been created, but before it has * been first accessed. When calling at_pointer on an array, the pointer is advanced to * the location indicated by the JSON pointer (in case of success). It is no longer possible * to call at_pointer on the same array. * * You may call at_pointer more than once on an object, but each time the pointer is advanced * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding * key (as well as the current key) can no longer be used with following JSON pointer calls. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; protected: /** * Create a value. */ simdjson_inline value(const value_iterator &iter) noexcept; /** * Skip this value, allowing iteration to continue. */ simdjson_inline void skip() noexcept; /** * Start a value at the current position. * * (It should already be started; this is just a self-documentation method.) */ static simdjson_inline value start(const value_iterator &iter) noexcept; /** * Resume a value. */ static simdjson_inline value resume(const value_iterator &iter) noexcept; /** * Get the object, starting or resuming it as necessary */ simdjson_inline simdjson_result start_or_resume_object() noexcept; // simdjson_inline void log_value(const char *type) const noexcept; // simdjson_inline void log_error(const char *message) const noexcept; value_iterator iter{}; friend class document; friend class array_iterator; friend class field; friend class object; friend struct simdjson_result; friend struct simdjson_result; }; } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public arm64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(arm64::ondemand::value &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result get_array() noexcept; simdjson_inline simdjson_result get_object() noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() noexcept; template simdjson_inline error_code get(T &out) noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator arm64::ondemand::array() noexcept(false); simdjson_inline operator arm64::ondemand::object() noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator arm64::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field(const char *key) noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](const char *key) noexcept; /** * Get the type of this JSON value. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). */ simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result raw_json() noexcept; /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ simdjson_inline simdjson_result current_location() noexcept; /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H /* end file simdjson/generic/ondemand/value.h for arm64 */ /* including simdjson/generic/ondemand/logger.h for arm64: #include "simdjson/generic/ondemand/logger.h" */ /* begin file simdjson/generic/ondemand/logger.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { // Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical // that the call to the log functions be side-effect free. Thus, for example, you should not // create temporary std::string instances. namespace logger { enum class log_level : int32_t { info = 0, error = 1 }; #if SIMDJSON_VERBOSE_LOGGING static constexpr const bool LOG_ENABLED = true; #else static constexpr const bool LOG_ENABLED = false; #endif // We do not want these functions to be 'really inlined' since real inlining is // for performance purposes and if you are using the loggers, you do not care about // performance (or should not). static inline void log_headers() noexcept; // If args are provided, title will be treated as format string template static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; template static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; } // namespace logger } // namespace ondemand } // namespace arm64 } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H /* end file simdjson/generic/ondemand/logger.h for arm64 */ /* including simdjson/generic/ondemand/token_iterator.h for arm64: #include "simdjson/generic/ondemand/token_iterator.h" */ /* begin file simdjson/generic/ondemand/token_iterator.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { /** * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) * detected by stage 1. * * @private This is not intended for external use. */ class token_iterator { public: /** * Create a new invalid token_iterator. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline token_iterator() noexcept = default; simdjson_inline token_iterator(token_iterator &&other) noexcept = default; simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; simdjson_inline token_iterator(const token_iterator &other) noexcept = default; simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; /** * Advance to the next token (returning the current one). */ simdjson_inline const uint8_t *return_current_and_advance() noexcept; /** * Reports the current offset in bytes from the start of the underlying buffer. */ simdjson_inline uint32_t current_offset() const noexcept; /** * Get the JSON text for a given token (relative). * * This is not null-terminated; it is a view into the JSON. * * @param delta The relative position of the token to retrieve. e.g. 0 = current token, * 1 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** * Get the maximum length of the JSON text for a given token. * * The length will include any whitespace at the end of the token. * * @param delta The relative position of the token to retrieve. e.g. 0 = current token, * 1 = next token, -1 = prev token. */ simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; /** * Get the JSON text for a given token. * * This is not null-terminated; it is a view into the JSON. * * @param position The position of the token. * */ simdjson_inline const uint8_t *peek(token_position position) const noexcept; /** * Get the maximum length of the JSON text for a given token. * * The length will include any whitespace at the end of the token. * * @param position The position of the token. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; /** * Return the current index. */ simdjson_inline token_position position() const noexcept; /** * Reset to a previously saved index. */ simdjson_inline void set_position(token_position target_position) noexcept; // NOTE: we don't support a full C++ iterator interface, because we expect people to make // different calls to advance the iterator based on *their own* state. simdjson_inline bool operator==(const token_iterator &other) const noexcept; simdjson_inline bool operator!=(const token_iterator &other) const noexcept; simdjson_inline bool operator>(const token_iterator &other) const noexcept; simdjson_inline bool operator>=(const token_iterator &other) const noexcept; simdjson_inline bool operator<(const token_iterator &other) const noexcept; simdjson_inline bool operator<=(const token_iterator &other) const noexcept; protected: simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; /** * Get the index of the JSON text for a given token (relative). * * This is not null-terminated; it is a view into the JSON. * * @param delta The relative position of the token to retrieve. e.g. 0 = current token, * 1 = next token, -1 = prev token. */ simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; /** * Get the index of the JSON text for a given token. * * This is not null-terminated; it is a view into the JSON. * * @param position The position of the token. * */ simdjson_inline uint32_t peek_index(token_position position) const noexcept; const uint8_t *buf{}; token_position _position{}; friend class json_iterator; friend class value_iterator; friend class object; template friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; template friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; }; } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public arm64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(arm64::ondemand::token_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H /* end file simdjson/generic/ondemand/token_iterator.h for arm64 */ /* including simdjson/generic/ondemand/json_iterator.h for arm64: #include "simdjson/generic/ondemand/json_iterator.h" */ /* begin file simdjson/generic/ondemand/json_iterator.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { /** * Iterates through JSON tokens, keeping track of depth and string buffer. * * @private This is not intended for external use. */ class json_iterator { protected: token_iterator token{}; ondemand::parser *parser{}; /** * Next free location in the string buffer. * * Used by raw_json_string::unescape() to have a place to unescape strings to. */ uint8_t *_string_buf_loc{}; /** * JSON error, if there is one. * * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. * * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If * this is not elided, we should make sure it's at least not using up a register. Failing that, * we should store it in document so there's only one of them. */ error_code error{SUCCESS}; /** * Depth of the current token in the JSON. * * - 0 = finished with document * - 1 = document root value (could be [ or {, not yet known) * - 2 = , or } inside root array/object * - 3 = key or value inside root array/object. */ depth_t _depth{}; /** * Beginning of the document indexes. * Normally we have root == parser->implementation->structural_indexes.get() * but this may differ, especially in streaming mode (where we have several * documents); */ token_position _root{}; /** * Normally, a json_iterator operates over a single document, but in * some cases, we may have a stream of documents. This attribute is meant * as meta-data: the json_iterator works the same irrespective of the * value of this attribute. */ bool _streaming{false}; public: simdjson_inline json_iterator() noexcept = default; simdjson_inline json_iterator(json_iterator &&other) noexcept; simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; /** * Skips a JSON value, whether it is a scalar, array or object. */ simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; /** * Tell whether the iterator is still at the start */ simdjson_inline bool at_root() const noexcept; /** * Tell whether we should be expected to run in streaming * mode (iterating over many documents). It is pure metadata * that does not affect how the iterator works. It is used by * start_root_array() and start_root_object(). */ simdjson_inline bool streaming() const noexcept; /** * Get the root value iterator */ simdjson_inline token_position root_position() const noexcept; /** * Assert that we are at the document depth (== 1) */ simdjson_inline void assert_at_document_depth() const noexcept; /** * Assert that we are at the root of the document */ simdjson_inline void assert_at_root() const noexcept; /** * Tell whether the iterator is at the EOF mark */ simdjson_inline bool at_end() const noexcept; /** * Tell whether the iterator is live (has not been moved). */ simdjson_inline bool is_alive() const noexcept; /** * Abandon this iterator, setting depth to 0 (as if the document is finished). */ simdjson_inline void abandon() noexcept; /** * Advance the current token without modifying depth. */ simdjson_inline const uint8_t *return_current_and_advance() noexcept; /** * Returns true if there is a single token in the index (i.e., it is * a JSON with a scalar value such as a single number). * * @return whether there is a single token */ simdjson_inline bool is_single_token() const noexcept; /** * Assert that there are at least the given number of tokens left. * * Has no effect in release builds. */ simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; /** * Assert that the given position addresses an actual token (is within bounds). * * Has no effect in release builds. */ simdjson_inline void assert_valid_position(token_position position) const noexcept; /** * Get the JSON text for a given token (relative). * * This is not null-terminated; it is a view into the JSON. * * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** * Get the maximum length of the JSON text for the current token (or relative). * * The length will include any whitespace at the end of the token. * * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. */ simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; /** * Get a pointer to the current location in the input buffer. * * This is not null-terminated; it is a view into the JSON. * * You may be pointing outside of the input buffer: it is not generally * safe to dereference this pointer. */ simdjson_inline const uint8_t *unsafe_pointer() const noexcept; /** * Get the JSON text for a given token. * * This is not null-terminated; it is a view into the JSON. * * @param position The position of the token to retrieve. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek(token_position position) const noexcept; /** * Get the maximum length of the JSON text for the current token (or relative). * * The length will include any whitespace at the end of the token. * * @param position The position of the token to retrieve. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; /** * Get the JSON text for the last token in the document. * * This is not null-terminated; it is a view into the JSON. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek_last() const noexcept; /** * Ascend one level. * * Validates that the depth - 1 == parent_depth. * * @param parent_depth the expected parent depth. */ simdjson_inline void ascend_to(depth_t parent_depth) noexcept; /** * Descend one level. * * Validates that the new depth == child_depth. * * @param child_depth the expected child depth. */ simdjson_inline void descend_to(depth_t child_depth) noexcept; simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; /** * Get current depth. */ simdjson_inline depth_t depth() const noexcept; /** * Get current (writeable) location in the string buffer. */ simdjson_inline uint8_t *&string_buf_loc() noexcept; /** * Report an unrecoverable error, preventing further iteration. * * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ simdjson_inline error_code report_error(error_code error, const char *message) noexcept; /** * Log error, but don't stop iteration. * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; /** * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with * N bytes of capacity. It will return false if N is too small (smaller than max_len) of if it is zero. * The buffer (tmpbuf) is padded with space characters. */ simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept; simdjson_inline token_position position() const noexcept; /** * Write the raw_json_string to the string buffer and return a string_view. * Each raw_json_string should be unescaped once, or else the string buffer might * overflow. */ simdjson_inline simdjson_result unescape(raw_json_string in, bool allow_replacement) noexcept; simdjson_inline simdjson_result unescape_wobbly(raw_json_string in) noexcept; simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; simdjson_inline error_code consume_character(char c) noexcept; #if SIMDJSON_DEVELOPMENT_CHECKS simdjson_inline token_position start_position(depth_t depth) const noexcept; simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; #endif /* Useful for debugging and logging purposes. */ inline std::string to_string() const noexcept; /** * Returns the current location in the document if in bounds. */ inline simdjson_result current_location() const noexcept; /** * Updates this json iterator so that it is back at the beginning of the document, * as if it had just been created. */ inline void rewind() noexcept; /** * This checks whether the {,},[,] are balanced so that the document * ends with proper zero depth. This requires scanning the whole document * and it may be expensive. It is expected that it will be rarely called. * It does not attempt to match { with } and [ with ]. */ inline bool balanced() const noexcept; protected: simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; /// The last token before the end simdjson_inline token_position last_position() const noexcept; /// The token *at* the end. This points at gibberish and should only be used for comparison. simdjson_inline token_position end_position() const noexcept; /// The end of the buffer. simdjson_inline token_position end() const noexcept; friend class document; friend class document_stream; friend class object; friend class array; friend class value; friend class raw_json_string; friend class parser; friend class value_iterator; template friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; template friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; }; // json_iterator } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public arm64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(arm64::ondemand::json_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H /* end file simdjson/generic/ondemand/json_iterator.h for arm64 */ /* including simdjson/generic/ondemand/json_type.h for arm64: #include "simdjson/generic/ondemand/json_type.h" */ /* begin file simdjson/generic/ondemand/json_type.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { /** * The type of a JSON value. */ enum class json_type { // Start at 1 to catch uninitialized / default values more easily array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) string, ///< A JSON string ( "a" or "hello world\n" ...) boolean, ///< A JSON boolean (true or false) null ///< A JSON null (null) }; /** * A type representing a JSON number. * The design of the struct is deliberately straight-forward. All * functions return standard values with no error check. */ struct number { /** * return the automatically determined type of * the number: number_type::floating_point_number, * number_type::signed_integer or number_type::unsigned_integer. * * enum class number_type { * floating_point_number=1, /// a binary64 number * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement * unsigned_integer /// a positive integer larger or equal to 1<<63 * }; */ simdjson_inline ondemand::number_type get_number_type() const noexcept; /** * return true if the automatically determined type of * the number is number_type::unsigned_integer. */ simdjson_inline bool is_uint64() const noexcept; /** * return the value as a uint64_t, only valid if is_uint64() is true. */ simdjson_inline uint64_t get_uint64() const noexcept; simdjson_inline operator uint64_t() const noexcept; /** * return true if the automatically determined type of * the number is number_type::signed_integer. */ simdjson_inline bool is_int64() const noexcept; /** * return the value as a int64_t, only valid if is_int64() is true. */ simdjson_inline int64_t get_int64() const noexcept; simdjson_inline operator int64_t() const noexcept; /** * return true if the automatically determined type of * the number is number_type::floating_point_number. */ simdjson_inline bool is_double() const noexcept; /** * return the value as a double, only valid if is_double() is true. */ simdjson_inline double get_double() const noexcept; simdjson_inline operator double() const noexcept; /** * Convert the number to a double. Though it always succeed, the conversion * may be lossy if the number cannot be represented exactly. */ simdjson_inline double as_double() const noexcept; protected: /** * The next block of declaration is designed so that we can call the number parsing * functions on a number type. They are protected and should never be used outside * of the core simdjson library. */ friend class value_iterator; template friend error_code numberparsing::slow_float_parsing(simdjson_unused const uint8_t * src, W writer); template friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); template friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); /** Store a signed 64-bit value to the number. */ simdjson_inline void append_s64(int64_t value) noexcept; /** Store an unsigned 64-bit value to the number. */ simdjson_inline void append_u64(uint64_t value) noexcept; /** Store a double value to the number. */ simdjson_inline void append_double(double value) noexcept; /** Specifies that the value is a double, but leave it undefined. */ simdjson_inline void skip_double() noexcept; /** * End of friend declarations. */ /** * Our attributes are a union type (size = 64 bits) * followed by a type indicator. */ union { double floating_point_number; int64_t signed_integer; uint64_t unsigned_integer; } payload{0}; number_type type{number_type::signed_integer}; }; /** * Write the JSON type to the output stream * * @param out The output stream. * @param type The json_type. */ inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; #if SIMDJSON_EXCEPTIONS /** * Send JSON type to an output stream. * * @param out The output stream. * @param type The json_type. * @throw simdjson_error if the result being printed has an error. If there is an error with the * underlying output stream, that error will be propagated (simdjson_error will not be * thrown). */ inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); #endif } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public arm64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(arm64::ondemand::json_type &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H /* end file simdjson/generic/ondemand/json_type.h for arm64 */ /* including simdjson/generic/ondemand/raw_json_string.h for arm64: #include "simdjson/generic/ondemand/raw_json_string.h" */ /* begin file simdjson/generic/ondemand/raw_json_string.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { /** * A string escaped per JSON rules, terminated with quote ("). They are used to represent * unescaped keys inside JSON documents. * * (In other words, a pointer to the beginning of a string, just after the start quote, inside a * JSON file.) * * This class is deliberately simplistic and has little functionality. You can * compare a raw_json_string instance with an unescaped C string, but * that is nearly all you can do. * * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser * instance. Doing so requires you to have a sufficiently large buffer. * * The raw_json_string instances originate typically from field instance which in turn represent * key-value pairs from object instances. From a field instance, you get the raw_json_string * instance by calling key(). You can, if you want a more usable string_view instance, call * the unescaped_key() method on the field instance. You may also create a raw_json_string from * any other string value, with the value.get_raw_json_string() method. Again, you can get * a more usable string_view instance by calling get_string(). * */ class raw_json_string { public: /** * Create a new invalid raw_json_string. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline raw_json_string() noexcept = default; /** * Create a new invalid raw_json_string pointed at the given location in the JSON. * * The given location must be just *after* the beginning quote (") in the JSON file. * * It *must* be terminated by a ", and be a valid JSON string. */ simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; /** * Get the raw pointer to the beginning of the string in the JSON (just after the "). * * It is possible for this function to return a null pointer if the instance * has outlived its existence. */ simdjson_inline const char * raw() const noexcept; /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done) on target.size() characters, * and if the raw_json_string instance has a quote character at byte index target.size(). * We never read more than length + 1 bytes in the raw_json_string instance. * If length is smaller than target.size(), this will return false. * * The std::string_view instance may contain any characters. However, the caller * is responsible for setting length so that length bytes may be read in the * raw_json_string. * * Performance: the comparison may be done using memcmp which may be efficient * for long strings. */ simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done). * The std::string_view instance should not contain unescaped quote characters: * the caller is responsible for this check. See is_free_from_unescaped_quote. * * Performance: the comparison is done byte-by-byte which might be inefficient for * long strings. * * If target is a compile-time constant, and your compiler likes you, * you should be able to do the following without performance penalty... * * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); * s.unsafe_is_equal(target); */ simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; /** * This compares the current instance to the C string target: returns true if * they are byte-by-byte equal (no escaping is done). * The provided C string should not contain an unescaped quote character: * the caller is responsible for this check. See is_free_from_unescaped_quote. * * If target is a compile-time constant, and your compiler likes you, * you should be able to do the following without performance penalty... * * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); * s.unsafe_is_equal(target); */ simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done). */ simdjson_inline bool is_equal(std::string_view target) const noexcept; /** * This compares the current instance to the C string target: returns true if * they are byte-by-byte equal (no escaping is done). */ simdjson_inline bool is_equal(const char* target) const noexcept; /** * Returns true if target is free from unescaped quote. If target is known at * compile-time, we might expect the computation to happen at compile time with * many compilers (not all!). */ static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; private: /** * This will set the inner pointer to zero, effectively making * this instance unusable. */ simdjson_inline void consume() noexcept { buf = nullptr; } /** * Checks whether the inner pointer is non-null and thus usable. */ simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. * The result will be a valid UTF-8. * * ## IMPORTANT: string_view lifetime * * The string_view is only valid until the next parse() call on the parser. * * @param iter A json_iterator, which contains a buffer where the string will be written. * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. */ simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter, bool allow_replacement) const noexcept; /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ * * ## IMPORTANT: string_view lifetime * * The string_view is only valid until the next parse() call on the parser. * * @param iter A json_iterator, which contains a buffer where the string will be written. */ simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(json_iterator &iter) const noexcept; const uint8_t * buf{}; friend class object; friend class field; friend class parser; friend struct simdjson_result; }; simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; /** * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. */ simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public arm64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(arm64::ondemand::raw_json_string &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape(arm64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(arm64::ondemand::json_iterator &iter) const noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H /* end file simdjson/generic/ondemand/raw_json_string.h for arm64 */ /* including simdjson/generic/ondemand/parser.h for arm64: #include "simdjson/generic/ondemand/parser.h" */ /* begin file simdjson/generic/ondemand/parser.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include namespace simdjson { namespace arm64 { namespace ondemand { /** * The default batch size for document_stream instances for this On Demand kernel. * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; /** * Some adversary might try to set the batch size to 0 or 1, which might cause problems. * We set a minimum of 32B since anything else is highly likely to be an error. In practice, * most users will want a much larger batch size. * * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. */ static constexpr size_t MINIMAL_BATCH_SIZE = 32; /** * A JSON fragment iterator. * * This holds the actual iterator as well as the buffer for writing strings. */ class parser { public: /** * Create a JSON parser. * * The new parser will have zero capacity. */ inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; inline parser(parser &&other) noexcept = default; simdjson_inline parser(const parser &other) = delete; simdjson_inline parser &operator=(const parser &other) = delete; simdjson_inline parser &operator=(parser &&other) noexcept = default; /** Deallocate the JSON parser. */ inline ~parser() noexcept = default; /** * Start iterating an on-demand JSON document. * * ondemand::parser parser; * document doc = parser.iterate(json); * * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. * Otherwise the iterate method may return an error. In particular, the whole input should be * valid: we do not attempt to tolerate incorrect content either before or after a JSON * document. If there is a UTF-8 BOM, the parser skips it. * * ### IMPORTANT: Validate what you use * * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to * iterate does not parse and validate the whole document. * * ### IMPORTANT: Buffer Lifetime * * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as * long as the document iteration. * * ### IMPORTANT: Document Lifetime * * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before * you call parse() again or destroy the parser. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * * @param json The JSON to parse. * @param len The length of the JSON. * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). * * @return The document, or an error: * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory * allocation fails. * - EMPTY if the document is all whitespace. * - UTF8_ERROR if the document is not valid UTF-8. * - UNESCAPED_CHARS if a string contains control characters that must be escaped * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; /** * @private * * Start iterating an on-demand JSON document. * * ondemand::parser parser; * json_iterator doc = parser.iterate(json); * * ### IMPORTANT: Buffer Lifetime * * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as * long as the document iteration. * * ### IMPORTANT: Document Lifetime * * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before * you call parse() again or destroy the parser. * * The ondemand::document instance holds the iterator. The document must remain in scope * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * * @param json The JSON to parse. * * @return The iterator, or an error: * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory * allocation fails. * - EMPTY if the document is all whitespace. * - UTF8_ERROR if the document is not valid UTF-8. * - UNESCAPED_CHARS if a string contains control characters that must be escaped * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; /** * Parse a buffer containing many JSON documents. * * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; * ondemand::parser parser; * ondemand::document_stream docs = parser.iterate_many(json); * for (auto & doc : docs) { * std::cout << doc["foo"] << std::endl; * } * // Prints 1 2 3 * * No copy of the input buffer is made. * * The function is lazy: it may be that no more than one JSON document at a time is parsed. * * The caller is responsabile to ensure that the input string data remains unchanged and is * not deleted during the loop. * * ### Format * * The buffer must contain a series of one or more JSON documents, concatenated into a single * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, * then starts parsing the next document at that point. (It does this with more parallelism and * lookahead than you might think, though.) * * documents that consist of an object or array may omit the whitespace between them, concatenating * with no separator. Documents that consist of a single primitive (i.e. documents that are not * arrays or objects) MUST be separated with ASCII whitespace. * * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). * If there is a UTF-8 BOM, the parser skips it. * * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * * ### Threads * * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the * hood to do some lookahead. * * ### Parser Capacity * * If the parser's current capacity is less than batch_size, it will allocate enough capacity * to handle it (up to max_capacity). * * @param buf The concatenated JSON to parse. * @param len The length of the concatenated JSON. * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet * spot is cache-related: small enough to fit in cache, yet big enough to * parse as many documents as possible in one tight loop. * Defaults to 10MB, which has been a reasonable sweet spot in our tests. * @param allow_comma_separated (defaults on false) This allows a mode where the documents are * separated by commas instead of whitespace. It comes with a performance * penalty because the entire document is indexed at once (and the document must be * less than 4 GB), and there is no multithreading. In this mode, the batch_size parameter * is effectively ignored, as it is set to at least the document size. * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: * - MEMALLOC if the parser does not have enough capacity and memory allocation fails * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. * - other json errors if parsing fails. You should not rely on these errors to always the same for the * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). */ inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** The capacity of this parser (the largest document it can process). */ simdjson_inline size_t capacity() const noexcept; /** The maximum capacity of this parser (the largest document it is allowed to process). */ simdjson_inline size_t max_capacity() const noexcept; simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; /** * The maximum depth of this parser (the most deeply nested objects and arrays it can process). * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. */ simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length * and `max_depth` depth. * * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. * * @param capacity The new capacity. * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. * @return The error, if there is one. */ simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; #ifdef SIMDJSON_THREADS_ENABLED /** * The parser instance can use threads when they are available to speed up some * operations. It is enabled by default. Changing this attribute will change the * behavior of the parser for future operations. */ bool threaded{true}; #endif /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. * The result must be valid UTF-8. * The provided pointer is advanced to the end of the string by reference, and a string_view instance * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. * * This unescape function is a low-level function. If you want a more user-friendly approach, you should * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() * instead of get_raw_json_string()). * * ## IMPORTANT: string_view lifetime * * The string_view is only valid as long as the bytes in dst. * * @param raw_json_string input * @param dst A pointer to a buffer at least large enough to write this string as well as * an additional SIMDJSON_PADDING bytes. * @param allow_replacement Whether we allow a replacement if the input string contains unmatched surrogate pairs. * @return A string_view pointing at the unescaped string in dst * @error STRING_ERROR if escapes are incorrect. */ simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement = false) const noexcept; /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. * The result may not be valid UTF-8. See https://simonsapin.github.io/wtf-8/ * The provided pointer is advanced to the end of the string by reference, and a string_view instance * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. * * This unescape function is a low-level function. If you want a more user-friendly approach, you should * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() * instead of get_raw_json_string()). * * ## IMPORTANT: string_view lifetime * * The string_view is only valid as long as the bytes in dst. * * @param raw_json_string input * @param dst A pointer to a buffer at least large enough to write this string as well as * an additional SIMDJSON_PADDING bytes. * @return A string_view pointing at the unescaped string in dst * @error STRING_ERROR if escapes are incorrect. */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; std::unique_ptr string_buf{}; #if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif friend class json_iterator; friend class document_stream; }; } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public arm64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(arm64::ondemand::parser &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* end file simdjson/generic/ondemand/parser.h for arm64 */ // All other declarations /* including simdjson/generic/ondemand/array.h for arm64: #include "simdjson/generic/ondemand/array.h" */ /* begin file simdjson/generic/ondemand/array.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { /** * A forward-only JSON array. */ class array { public: /** * Create a new invalid array. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline array() noexcept = default; /** * Begin array iteration. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result begin() noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result end() noexcept; /** * This method scans the array and counts the number of elements. * The count_elements method should always be called before you have begun * iterating through the array: it is expected that you are pointing at * the beginning of the array. * The runtime complexity is linear in the size of the array. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an array is empty, it is more performant to use * the is_empty() method. */ simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the beginning of the array and checks whether the * array is empty. * The runtime complexity is constant time. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. */ simdjson_inline simdjson_result is_empty() & noexcept; /** * Reset the iterator so that we are pointing back at the * beginning of the array. You should still consume values only once even if you * can iterate through the array more than once. If you unescape a string * within the array more than once, you have unsafe code. Note that rewinding * an array means that you may need to reparse it anew: it is not a free * operation. * * @returns true if the array contains some elements (not empty) */ inline simdjson_result reset() & noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node * as the root of its own JSON document. * * ondemand::parser parser; * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/0/foo/a/1") == 20 * * Note that at_pointer() called on the document automatically calls the document's rewind * method between each call. It invalidates all previously accessed arrays, objects and values * that have not been consumed. Yet it is not the case when calling at_pointer on an array * instance: there is no rewind and no invalidation. * * You may only call at_pointer on an array after it has been created, but before it has * been first accessed. When calling at_pointer on an array, the pointer is advanced to * the location indicated by the JSON pointer (in case of success). It is no longer possible * to call at_pointer on the same array. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; /** * Consumes the array and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. */ simdjson_inline simdjson_result raw_json() noexcept; /** * Get the value at the given index. This function has linear-time complexity. * This function should only be called once on an array instance since the array iterator is not reset between each call. * * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; protected: /** * Go to the end of the array, no matter where you are right now. */ simdjson_inline error_code consume() noexcept; /** * Begin array iteration. * * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the * resulting array. * @error INCORRECT_TYPE if the iterator is not at [. */ static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; /** * Begin array iteration from the root. * * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the * resulting array. * @error INCORRECT_TYPE if the iterator is not at [. * @error TAPE_ERROR if there is no closing ] at the end of the document. */ static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; /** * Begin array iteration. * * This version of the method should be called after the initial [ has been verified, and is * intended for use by switch statements that check the type of a value. * * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. */ static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; /** * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. * * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* * into the resulting array. */ simdjson_inline array(const value_iterator &iter) noexcept; /** * Iterator marking current position. * * iter.is_alive() == false indicates iteration is complete. */ value_iterator iter{}; friend class value; friend class document; friend struct simdjson_result; friend struct simdjson_result; friend class array_iterator; }; } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public arm64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(arm64::ondemand::array &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; inline simdjson_result count_elements() & noexcept; inline simdjson_result is_empty() & noexcept; inline simdjson_result reset() & noexcept; simdjson_inline simdjson_result at(size_t index) noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result raw_json() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H /* end file simdjson/generic/ondemand/array.h for arm64 */ /* including simdjson/generic/ondemand/array_iterator.h for arm64: #include "simdjson/generic/ondemand/array_iterator.h" */ /* begin file simdjson/generic/ondemand/array_iterator.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { /** * A forward-only JSON array. * * This is an input_iterator, meaning: * - It is forward-only * - * must be called exactly once per element. * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) */ class array_iterator { public: /** Create a new, invalid array iterator. */ simdjson_inline array_iterator() noexcept = default; // // Iterator interface // /** * Get the current element. * * Part of the std::iterator interface. */ simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * * Part of the std::iterator interface. * * @return true if there are no more elements in the JSON array. */ simdjson_inline bool operator==(const array_iterator &) const noexcept; /** * Check if there are more elements in the JSON array. * * Part of the std::iterator interface. * * @return true if there are more elements in the JSON array. */ simdjson_inline bool operator!=(const array_iterator &) const noexcept; /** * Move to the next element. * * Part of the std::iterator interface. */ simdjson_inline array_iterator &operator++() noexcept; private: value_iterator iter{}; simdjson_inline array_iterator(const value_iterator &iter) noexcept; friend class array; friend class value; friend struct simdjson_result; }; } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public arm64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(arm64::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; // // Iterator interface // simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. simdjson_inline bool operator==(const simdjson_result &) const noexcept; simdjson_inline bool operator!=(const simdjson_result &) const noexcept; simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H /* end file simdjson/generic/ondemand/array_iterator.h for arm64 */ /* including simdjson/generic/ondemand/document.h for arm64: #include "simdjson/generic/ondemand/document.h" */ /* begin file simdjson/generic/ondemand/document.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { /** * A JSON document. It holds a json_iterator instance. * * Used by tokens to get text, and string buffer location. * * You must keep the document around during iteration. */ class document { public: /** * Create a new invalid document. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline document() noexcept = default; simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy simdjson_inline document(document &&other) noexcept = default; simdjson_inline document &operator=(const document &other) noexcept = delete; simdjson_inline document &operator=(document &&other) noexcept = default; /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @returns INCORRECT_TYPE If the JSON value is not an array. */ simdjson_inline simdjson_result get_array() & noexcept; /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @returns INCORRECT_TYPE If the JSON value is not an object. */ simdjson_inline simdjson_result get_object() & noexcept; /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64() noexcept; /** * Cast this JSON value (inside string) to an unsigned integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64_in_string() noexcept; /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64() noexcept; /** * Cast this JSON value (inside string) to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64_in_string() noexcept; /** * Cast this JSON value to a double. * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double() noexcept; /** * Cast this JSON value (inside string) to a double. * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double_in_string() noexcept; /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * Important: Calling get_string() twice on the same document is an error. * * @param Whether to allow a replacement character for unmatched surrogate pairs. * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; /** * Attempts to fill the provided std::string reference with the parsed value of the current string. * * The string is guaranteed to be valid UTF-8. * * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. * We recommend you avoid allocating an std::string unless you need to. * * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a string. * * The string is not guaranteed to be valid UTF-8. See https://simonsapin.github.io/wtf-8/ * * Important: Calling get_wobbly_string() twice on the same document is an error. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_wobbly_string() noexcept; /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_raw_json_string() noexcept; /** * Cast this JSON value to a bool. * * @returns A bool value. * @returns INCORRECT_TYPE if the JSON value is not true or false. */ simdjson_inline simdjson_result get_bool() noexcept; /** * Cast this JSON value to a value when the document is an object or an array. * * You must not have begun iterating through the object or array. When * SIMDJSON_DEVELOPMENT_CHECKS is set to 1 (which is the case when building in Debug mode * by default), and you have already begun iterating, * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use * rewind() to reset the document to its initial state before calling this method. * * @returns A value if a JSON array or object cannot be found. * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). */ simdjson_inline simdjson_result get_value() noexcept; /** * Checks if this JSON value is null. If and only if the value is * null, then it is consumed (we advance). If we find a token that * begins with 'n' but is not 'null', then an error is returned. * * @returns Whether the value is null. * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. */ simdjson_inline simdjson_result is_null() noexcept; /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() & noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); } /** @overload template simdjson_result get() & noexcept */ template simdjson_inline simdjson_result get() && noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); } /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value * * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template simdjson_inline error_code get(T &out) & noexcept; /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. */ simdjson_inline operator array() & noexcept(false); /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. */ simdjson_inline operator object() & noexcept(false); /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline operator uint64_t() noexcept(false); /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. */ simdjson_inline operator int64_t() noexcept(false); /** * Cast this JSON value to a double. * * @returns A double. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. */ simdjson_inline operator double() noexcept(false); /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator std::string_view() noexcept(false); /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator raw_json_string() noexcept(false); /** * Cast this JSON value to a bool. * * @returns A bool value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. */ simdjson_inline operator bool() noexcept(false); /** * Cast this JSON value to a value when the document is an object or an array. * * You must not have begun iterating through the object or array. When * SIMDJSON_DEVELOPMENT_CHECKS is defined, and you have already begun iterating, * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use * rewind() to reset the document to its initial state before calling this method. * * @returns A value value if a JSON array or object cannot be found. * @exception SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). */ simdjson_inline operator value() noexcept(false); #endif /** * This method scans the array and counts the number of elements. * The count_elements method should always be called before you have begun * iterating through the array: it is expected that you are pointing at * the beginning of the array. * The runtime complexity is linear in the size of the array. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. */ simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the object and counts the number of key-value pairs. * The count_fields method should always be called before you have begun * iterating through the object: it is expected that you are pointing at * the beginning of the object. * The runtime complexity is linear in the size of the object. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an object is empty, it is more performant to use * the is_empty() method. */ simdjson_inline simdjson_result count_fields() & noexcept; /** * Get the value at the given index in the array. This function has linear-time complexity. * This function should only be called once on an array instance since the array iterator is not reset between each call. * * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) & noexcept; /** * Begin array iteration. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result begin() & noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result end() & noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. E.g., the array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() * is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field(const char *key) & noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. E.g., the array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to a key * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() * is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](const char *key) & noexcept; /** * Get the type of this JSON value. It does not validate or consume the value. * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result type() noexcept; /** * Checks whether the document is a scalar (string, number, null, Boolean). * Returns false when there it is an array or object. * * @returns true if the type is string, number, null, Boolean * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result is_scalar() noexcept; /** * Checks whether the document is a negative number. * * @returns true if the number if negative. */ simdjson_inline bool is_negative() noexcept; /** * Checks whether the document is an integer number. Note that * this requires to partially parse the number string. If * the value is determined to be an integer, it may still * not parse properly as an integer in subsequent steps * (e.g., it might overflow). * * @returns true if the number if negative. */ simdjson_inline simdjson_result is_integer() noexcept; /** * Determine the number type (integer or floating-point number) as quickly * as possible. This function does not fully validate the input. It is * useful when you only need to classify the numbers, without parsing them. * * If you are planning to retrieve the value or you need full validation, * consider using the get_number() method instead: it will fully parse * and validate the input, and give you access to the type: * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have * an integer greater or equal to 9223372036854775808 * get_number_type() is number_type::signed_integer if we have an * integer that is less than 9223372036854775808 * Otherwise, get_number_type() has value number_type::floating_point_number * * This function requires processing the number string, but it is expected * to be faster than get_number().get_number_type() because it is does not * parse the number value. * * @returns the type of the number */ simdjson_inline simdjson_result get_number_type() noexcept; /** * Attempt to parse an ondemand::number. An ondemand::number may * contain an integer value or a floating-point value, the simdjson * library will autodetect the type. Thus it is a dynamically typed * number. Before accessing the value, you must determine the detected * type. * * number.get_number_type() is number_type::signed_integer if we have * an integer in [-9223372036854775808,9223372036854775808) * You can recover the value by calling number.get_int64() and you * have that number.is_int64() is true. * * number.get_number_type() is number_type::unsigned_integer if we have * an integer in [9223372036854775808,18446744073709551616) * You can recover the value by calling number.get_uint64() and you * have that number.is_uint64() is true. * * Otherwise, number.get_number_type() has value number_type::floating_point_number * and we have a binary64 number. * You can recover the value by calling number.get_double() and you * have that number.is_double() is true. * * You must check the type before accessing the value: it is an error * to call "get_int64()" when number.get_number_type() is not * number_type::signed_integer and when number.is_int64() is false. */ simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; /** * Get the raw JSON for this token. * * The string_view will always point into the input buffer. * * The string_view will start at the beginning of the token, and include the entire token * *as well as all spaces until the next token (or EOF).* This means, for example, that a * string token always begins with a " and is always terminated by the final ", possibly * followed by a number of spaces. * * The string_view is *not* null-terminated. If this is a scalar (string, number, * boolean, or null), the character after the end of the string_view may be the padded buffer. * * Tokens include: * - { * - [ * - "a string (possibly with UTF-8 or backslashed characters like \\\")". * - -1.2e-100 * - true * - false * - null */ simdjson_inline simdjson_result raw_json_token() noexcept; /** * Reset the iterator inside the document instance so we are pointing back at the * beginning of the document, as if it had just been created. It invalidates all * values, objects and arrays that you have created so far (including unescaped strings). */ inline void rewind() noexcept; /** * Returns debugging information. */ inline std::string to_debug_string() noexcept; /** * Some unrecoverable error conditions may render the document instance unusable. * The is_alive() method returns true when the document is still suitable. */ inline bool is_alive() noexcept; /** * Returns the current location in the document if in bounds. */ inline simdjson_result current_location() const noexcept; /** * Returns true if this document has been fully parsed. * If you have consumed the whole document and at_end() returns * false, then there may be trailing content. */ inline bool at_end() const noexcept; /** * Returns the current depth in the document if in bounds. * * E.g., * 0 = finished with document * 1 = document root value (could be [ or {, not yet known) * 2 = , or } inside root array/object * 3 = key or value inside root array/object. */ simdjson_inline int32_t current_depth() const noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard. * * ondemand::parser parser; * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/foo/a/1") == 20 * * It is allowed for a key to be the empty string: * * ondemand::parser parser; * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * * Note that at_pointer() automatically calls rewind between each call. Thus * all values, objects and arrays that you have created so far (including unescaped strings) * are invalidated. After calling at_pointer, you need to consume the result: string values * should be stored in your own variables, arrays should be decoded and stored in your own array-like * structures and so forth. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; /** * Consumes the document and returns a string_view instance corresponding to the * document as represented in JSON. It points inside the original byte array containing * the JSON document. */ simdjson_inline simdjson_result raw_json() noexcept; protected: /** * Consumes the document. */ simdjson_inline error_code consume() noexcept; simdjson_inline document(ondemand::json_iterator &&iter) noexcept; simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; simdjson_inline value_iterator resume_value_iterator() noexcept; simdjson_inline value_iterator get_root_value_iterator() noexcept; simdjson_inline simdjson_result start_or_resume_object() noexcept; static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; // // Fields // json_iterator iter{}; ///< Current position in the document static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 friend class array_iterator; friend class value; friend class ondemand::parser; friend class object; friend class array; friend class field; friend class token; friend class document_stream; friend class document_reference; }; /** * A document_reference is a thin wrapper around a document reference instance. */ class document_reference { public: simdjson_inline document_reference() noexcept; simdjson_inline document_reference(document &d) noexcept; simdjson_inline document_reference(const document_reference &other) noexcept = default; simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; simdjson_inline void rewind() noexcept; simdjson_inline simdjson_result get_array() & noexcept; simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; simdjson_inline simdjson_result raw_json() noexcept; simdjson_inline operator document&() const noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator array() & noexcept(false); simdjson_inline operator object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); simdjson_inline operator value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) & noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; simdjson_inline bool is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; private: document *doc{nullptr}; }; } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public arm64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(arm64::ondemand::document &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; simdjson_inline simdjson_result get_array() & noexcept; simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & noexcept; template simdjson_inline simdjson_result get() && noexcept; template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator arm64::ondemand::array() & noexcept(false); simdjson_inline operator arm64::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator arm64::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); simdjson_inline operator arm64::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) & noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; simdjson_inline bool at_end() const noexcept; simdjson_inline bool is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public arm64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(arm64::ondemand::document_reference value, error_code error) noexcept; simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; simdjson_inline simdjson_result get_array() & noexcept; simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator arm64::ondemand::array() & noexcept(false); simdjson_inline operator arm64::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator arm64::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); simdjson_inline operator arm64::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) & noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H /* end file simdjson/generic/ondemand/document.h for arm64 */ /* including simdjson/generic/ondemand/document_stream.h for arm64: #include "simdjson/generic/ondemand/document_stream.h" */ /* begin file simdjson/generic/ondemand/document_stream.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #ifdef SIMDJSON_THREADS_ENABLED #include #include #include #endif namespace simdjson { namespace arm64 { namespace ondemand { #ifdef SIMDJSON_THREADS_ENABLED /** @private Custom worker class **/ struct stage1_worker { stage1_worker() noexcept = default; stage1_worker(const stage1_worker&) = delete; stage1_worker(stage1_worker&&) = delete; stage1_worker operator=(const stage1_worker&) = delete; ~stage1_worker(); /** * We only start the thread when it is needed, not at object construction, this may throw. * You should only call this once. **/ void start_thread(); /** * Start a stage 1 job. You should first call 'run', then 'finish'. * You must call start_thread once before. */ void run(document_stream * ds, parser * stage1, size_t next_batch_start); /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ void finish(); private: /** * Normally, we would never stop the thread. But we do in the destructor. * This function is only safe assuming that you are not waiting for results. You * should have called run, then finish, and be done. **/ void stop_thread(); std::thread thread{}; /** These three variables define the work done by the thread. **/ ondemand::parser * stage1_thread_parser{}; size_t _next_batch_start{}; document_stream * owner{}; /** * We have two state variables. This could be streamlined to one variable in the future but * we use two for clarity. */ bool has_work{false}; bool can_work{true}; /** * We lock using a mutex. */ std::mutex locking_mutex{}; std::condition_variable cond_var{}; friend class document_stream; }; #endif // SIMDJSON_THREADS_ENABLED /** * A forward-only stream of documents. * * Produced by parser::iterate_many. * */ class document_stream { public: /** * Construct an uninitialized document_stream. * * ```c++ * document_stream docs; * auto error = parser.iterate_many(json).get(docs); * ``` */ simdjson_inline document_stream() noexcept; /** Move one document_stream to another. */ simdjson_inline document_stream(document_stream &&other) noexcept = default; /** Move one document_stream to another. */ simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; simdjson_inline ~document_stream() noexcept; /** * Returns the input size in bytes. */ inline size_t size_in_bytes() const noexcept; /** * After iterating through the stream, this method * returns the number of bytes that were not parsed at the end * of the stream. If truncated_bytes() differs from zero, * then the input was truncated maybe because incomplete JSON * documents were found at the end of the stream. You * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). * * You should only call truncated_bytes() after streaming through all * documents, like so: * * document_stream stream = parser.iterate_many(json,window); * for(auto & doc : stream) { * // do something with doc * } * size_t truncated = stream.truncated_bytes(); * */ inline size_t truncated_bytes() const noexcept; class iterator { public: using value_type = simdjson_result; using reference = value_type; using difference_type = std::ptrdiff_t; using iterator_category = std::input_iterator_tag; /** * Default constructor. */ simdjson_inline iterator() noexcept; /** * Get the current document (or error). */ simdjson_inline simdjson_result operator*() noexcept; /** * Advance to the next document (prefix). */ inline iterator& operator++() noexcept; /** * Check if we're at the end yet. * @param other the end iterator to compare to. */ simdjson_inline bool operator!=(const iterator &other) const noexcept; /** * @private * * Gives the current index in the input document in bytes. * * document_stream stream = parser.parse_many(json,window); * for(auto i = stream.begin(); i != stream.end(); ++i) { * auto doc = *i; * size_t index = i.current_index(); * } * * This function (current_index()) is experimental and the usage * may change in future versions of simdjson: we find the API somewhat * awkward and we would like to offer something friendlier. */ simdjson_inline size_t current_index() const noexcept; /** * @private * * Gives a view of the current document at the current position. * * document_stream stream = parser.iterate_many(json,window); * for(auto i = stream.begin(); i != stream.end(); ++i) { * std::string_view v = i.source(); * } * * The returned string_view instance is simply a map to the (unparsed) * source string: it may thus include white-space characters and all manner * of padding. * * This function (source()) is experimental and the usage * may change in future versions of simdjson: we find the API somewhat * awkward and we would like to offer something friendlier. * */ simdjson_inline std::string_view source() const noexcept; /** * Returns error of the stream (if any). */ inline error_code error() const noexcept; private: simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ document_stream* stream; /** Whether we're finished or not. */ bool finished; friend class document; friend class document_stream; friend class json_iterator; }; /** * Start iterating the documents in the stream. */ simdjson_inline iterator begin() noexcept; /** * The end of the stream, for iterator comparison purposes. */ simdjson_inline iterator end() noexcept; private: document_stream &operator=(const document_stream &) = delete; // Disallow copying document_stream(const document_stream &other) = delete; // Disallow copying /** * Construct a document_stream. Does not allocate or parse anything until the iterator is * used. * * @param parser is a reference to the parser instance used to generate this document_stream * @param buf is the raw byte buffer we need to process * @param len is the length of the raw byte buffer in bytes * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) */ simdjson_inline document_stream( ondemand::parser &parser, const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated ) noexcept; /** * Parse the first document in the buffer. Used by begin(), to handle allocation and * initialization. */ inline void start() noexcept; /** * Parse the next document found in the buffer previously given to document_stream. * * The content should be a valid JSON document encoded as UTF-8. If there is a * UTF-8 BOM, the parser skips it. * * You do NOT need to pre-allocate a parser. This function takes care of * pre-allocating a capacity defined by the batch_size defined when creating the * document_stream object. * * The function returns simdjson::EMPTY if there is no more data to be parsed. * * The function returns simdjson::SUCCESS (as integer = 0) in case of success * and indicates that the buffer has successfully been parsed to the end. * Every document it contained has been parsed without error. * * The function returns an error code from simdjson/simdjson.h in case of failure * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; * the simdjson::error_message function converts these error codes into a string). * * You can also check validity by calling parser.is_valid(). The same parser can * and should be reused for the other documents in the buffer. */ inline void next() noexcept; /** Move the json_iterator of the document to the location of the next document in the stream. */ inline void next_document() noexcept; /** Get the next document index. */ inline size_t next_batch_start() const noexcept; /** Pass the next batch through stage 1 with the given parser. */ inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; // Fields ondemand::parser *parser; const uint8_t *buf; size_t len; size_t batch_size; bool allow_comma_separated; /** * We are going to use just one document instance. The document owns * the json_iterator. It implies that we only ever pass a reference * to the document to the users. */ document doc{}; /** The error (or lack thereof) from the current document. */ error_code error; size_t batch_start{0}; size_t doc_index{}; #ifdef SIMDJSON_THREADS_ENABLED /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ bool use_thread; inline void load_from_stage1_thread() noexcept; /** Start a thread to run stage 1 on the next batch. */ inline void start_stage1_thread() noexcept; /** Wait for the stage 1 thread to finish and capture the results. */ inline void finish_stage1_thread() noexcept; /** The error returned from the stage 1 thread. */ error_code stage1_thread_error{UNINITIALIZED}; /** The thread used to run stage 1 against the next batch in the background. */ std::unique_ptr worker{new(std::nothrow) stage1_worker()}; /** * The parser used to run stage 1 in the background. Will be swapped * with the regular parser when finished. */ ondemand::parser stage1_thread_parser{}; friend struct stage1_worker; #endif // SIMDJSON_THREADS_ENABLED friend class parser; friend class document; friend class json_iterator; friend struct simdjson_result; friend struct internal::simdjson_result_base; }; // document_stream } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public arm64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(arm64::ondemand::document_stream &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H /* end file simdjson/generic/ondemand/document_stream.h for arm64 */ /* including simdjson/generic/ondemand/field.h for arm64: #include "simdjson/generic/ondemand/field.h" */ /* begin file simdjson/generic/ondemand/field.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { /** * A JSON field (key/value pair) in an object. * * Returned from object iteration. * * Extends from std::pair so you can use C++ algorithms that rely on pairs. */ class field : public std::pair { public: /** * Create a new invalid field. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline field() noexcept; /** * Get the key as a string_view (for higher speed, consider raw_key). * We deliberately use a more cumbersome name (unescaped_key) to force users * to think twice about using it. * * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with * an unescaped C string: e.g., key() == "test". */ simdjson_inline raw_json_string key() const noexcept; /** * Get the field value. */ simdjson_inline ondemand::value &value() & noexcept; /** * @overload ondemand::value &ondemand::value() & noexcept */ simdjson_inline ondemand::value value() && noexcept; protected: simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; friend struct simdjson_result; friend class object_iterator; }; } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public arm64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(arm64::ondemand::field &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; simdjson_inline simdjson_result key() noexcept; simdjson_inline simdjson_result value() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H /* end file simdjson/generic/ondemand/field.h for arm64 */ /* including simdjson/generic/ondemand/object.h for arm64: #include "simdjson/generic/ondemand/object.h" */ /* begin file simdjson/generic/ondemand/object.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { /** * A forward-only JSON object field iterator. */ class object { public: /** * Create a new invalid object. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline object() noexcept = default; simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. The value instance you get * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to a * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() * is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. The value instance you get * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to a key * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node * as the root of its own JSON document. * * ondemand::parser parser; * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/foo/a/1") == 20 * * It is allowed for a key to be the empty string: * * ondemand::parser parser; * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * * Note that at_pointer() called on the document automatically calls the document's rewind * method between each call. It invalidates all previously accessed arrays, objects and values * that have not been consumed. Yet it is not the case when calling at_pointer on an object * instance: there is no rewind and no invalidation. * * You may call at_pointer more than once on an object, but each time the pointer is advanced * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding * key (as well as the current key) can no longer be used with following JSON pointer calls. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you * can iterate through the object more than once. If you unescape a string within * the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) */ inline simdjson_result reset() & noexcept; /** * This method scans the beginning of the object and checks whether the * object is empty. * The runtime complexity is constant time. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. */ inline simdjson_result is_empty() & noexcept; /** * This method scans the object and counts the number of key-value pairs. * The count_fields method should always be called before you have begun * iterating through the object: it is expected that you are pointing at * the beginning of the object. * The runtime complexity is linear in the size of the object. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an object is empty, it is more performant to use * the is_empty() method. * * Performance hint: You should only call count_fields() as a last * resort as it may require scanning the document twice or more. */ simdjson_inline simdjson_result count_fields() & noexcept; /** * Consumes the object and returns a string_view instance corresponding to the * object as represented in JSON. It points inside the original byte array containing * the JSON document. */ simdjson_inline simdjson_result raw_json() noexcept; protected: /** * Go to the end of the object, no matter where you are right now. */ simdjson_inline error_code consume() noexcept; static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; static simdjson_inline object resume(const value_iterator &iter) noexcept; simdjson_inline object(const value_iterator &iter) noexcept; simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; value_iterator iter{}; friend class value; friend class document; friend struct simdjson_result; }; } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public arm64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(arm64::ondemand::object &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; inline simdjson_result raw_json() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H /* end file simdjson/generic/ondemand/object.h for arm64 */ /* including simdjson/generic/ondemand/object_iterator.h for arm64: #include "simdjson/generic/ondemand/object_iterator.h" */ /* begin file simdjson/generic/ondemand/object_iterator.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { class object_iterator { public: /** * Create a new invalid object_iterator. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline object_iterator() noexcept = default; // // Iterator interface // // Reads key and value, yielding them to the user. // MUST ONLY BE CALLED ONCE PER ITERATION. simdjson_inline simdjson_result operator*() noexcept; // Assumes it's being compared with the end. true if depth < iter->depth. simdjson_inline bool operator==(const object_iterator &) const noexcept; // Assumes it's being compared with the end. true if depth >= iter->depth. simdjson_inline bool operator!=(const object_iterator &) const noexcept; // Checks for ']' and ',' simdjson_inline object_iterator &operator++() noexcept; private: /** * The underlying JSON iterator. * * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object * is first used, and never changes afterwards. */ value_iterator iter{}; simdjson_inline object_iterator(const value_iterator &iter) noexcept; friend struct simdjson_result; friend class object; }; } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public arm64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(arm64::ondemand::object_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; // // Iterator interface // // Reads key and value, yielding them to the user. simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. // Assumes it's being compared with the end. true if depth < iter->depth. simdjson_inline bool operator==(const simdjson_result &) const noexcept; // Assumes it's being compared with the end. true if depth >= iter->depth. simdjson_inline bool operator!=(const simdjson_result &) const noexcept; // Checks for ']' and ',' simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H /* end file simdjson/generic/ondemand/object_iterator.h for arm64 */ /* including simdjson/generic/ondemand/serialization.h for arm64: #include "simdjson/generic/ondemand/serialization.h" */ /* begin file simdjson/generic/ondemand/serialization.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { /** * Create a string-view instance out of a document instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ inline simdjson_result to_json_string(arm64::ondemand::document& x) noexcept; /** * Create a string-view instance out of a value instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. The value must * not have been accessed previously. It does not * validate the content. */ inline simdjson_result to_json_string(arm64::ondemand::value& x) noexcept; /** * Create a string-view instance out of an object instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ inline simdjson_result to_json_string(arm64::ondemand::object& x) noexcept; /** * Create a string-view instance out of an array instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ inline simdjson_result to_json_string(arm64::ondemand::array& x) noexcept; inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); } // namespace simdjson /** * We want to support argument-dependent lookup (ADL). * Hence we should define operator<< in the namespace * where the argument (here value, object, etc.) resides. * Credit: @madhur4127 * See https://github.com/simdjson/simdjson/issues/1768 */ namespace simdjson { namespace arm64 { namespace ondemand { /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The element. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::value x); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The array. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::array value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The array. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document& value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document_reference& value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The object. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::object value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif }}} // namespace simdjson::arm64::ondemand #endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H /* end file simdjson/generic/ondemand/serialization.h for arm64 */ // Inline definitions /* including simdjson/generic/ondemand/array-inl.h for arm64: #include "simdjson/generic/ondemand/array-inl.h" */ /* begin file simdjson/generic/ondemand/array-inl.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { // // ### Live States // // While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is // always SUCCESS: // // - Start: This is the state when the array is first found and the iterator is just past the `{`. // In this state, at_start == true. // - Next: After we hand a scalar value to the user, or an array/object which they then fully // iterate over, the iterator is at the `,` before the next value (or `]`). In this state, // depth == iter->depth, at_start == false, and error == SUCCESS. // - Unfinished Business: When we hand an array/object to the user which they do not fully // iterate over, we need to finish that iteration by skipping child values until we reach the // Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. // // ## Error States // // In error states, we will yield exactly one more value before stopping. iter->depth == depth // and at_start is always false. We decrement after yielding the error, moving to the Finished // state. // // - Chained Error: When the array iterator is part of an error chain--for example, in // `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an // array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and // iter->depth == depth, and at_start == false. We decrement depth when we yield the error. // - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, // we flag that as an error and treat it exactly the same as a Chained Error. In this state, // error == TAPE_ERROR, iter->depth == depth, and at_start == false. // // ## Terminal State // // The terminal state has iter->depth < depth. at_start is always false. // // - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this // by decrementing depth. In this state, iter->depth < depth, at_start == false, and // error == SUCCESS. // simdjson_inline array::array(const value_iterator &_iter) noexcept : iter{_iter} { } simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { // We don't need to know if the array is empty to start iteration, but we do want to know if there // is an error--thus `simdjson_unused`. simdjson_unused bool has_value; SIMDJSON_TRY( iter.start_array().get(has_value) ); return array(iter); } simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { simdjson_unused bool has_value; SIMDJSON_TRY( iter.start_root_array().get(has_value) ); return array(iter); } simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { bool has_value; SIMDJSON_TRY(iter.started_array().get(has_value)); return array(iter); } simdjson_inline simdjson_result array::begin() noexcept { #if SIMDJSON_DEVELOPMENT_CHECKS if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif return array_iterator(iter); } simdjson_inline simdjson_result array::end() noexcept { return array_iterator(iter); } simdjson_inline error_code array::consume() noexcept { auto error = iter.json_iter().skip_child(iter.depth()-1); if(error) { iter.abandon(); } return error; } simdjson_inline simdjson_result array::raw_json() noexcept { const uint8_t * starting_point{iter.peek_start()}; auto error = consume(); if(error) { return error; } // After 'consume()', we could be left pointing just beyond the document, but that // is ok because we are not going to dereference the final pointer position, we just // use it to compute the length in bytes. const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_inline simdjson_result array::count_elements() & noexcept { size_t count{0}; // Important: we do not consume any of the values. for(simdjson_unused auto v : *this) { count++; } // The above loop will always succeed, but we want to report errors. if(iter.error()) { return iter.error(); } // We need to move back at the start because we expect users to iterate through // the array after counting the number of elements. iter.reset_array(); return count; } SIMDJSON_POP_DISABLE_WARNINGS simdjson_inline simdjson_result array::is_empty() & noexcept { bool is_not_empty; auto error = iter.reset_array().get(is_not_empty); if(error) { return error; } return !is_not_empty; } inline simdjson_result array::reset() & noexcept { return iter.reset_array(); } inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } json_pointer = json_pointer.substr(1); // - means "the append position" or "the element after the end of the array" // We don't support this, because we're returning a real element, not a position. if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } // Read the array index size_t array_index = 0; size_t i; for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { uint8_t digit = uint8_t(json_pointer[i] - '0'); // Check for non-digit in array index. If it's there, we're trying to get a field in an object if (digit > 9) { return INCORRECT_TYPE; } array_index = array_index*10 + digit; } // 0 followed by other digits is invalid if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" // Empty string is invalid; so is a "/" with no digits before it if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" // Get the child auto child = at(array_index); // If there is an error, it ends here if(child.error()) { return child; } // If there is a /, we're not done yet, call recursively. if (i < json_pointer.length()) { child = child.at_pointer(json_pointer.substr(i)); } return child; } simdjson_inline simdjson_result array::at(size_t index) noexcept { size_t i = 0; for (auto value : *this) { if (i == index) { return value; } i++; } return INDEX_OUT_OF_BOUNDS; } } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( arm64::ondemand::array &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result simdjson_result::begin() noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() noexcept { if (error()) { return error(); } return first.end(); } simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { if (error()) { return error(); } return first.is_empty(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H /* end file simdjson/generic/ondemand/array-inl.h for arm64 */ /* including simdjson/generic/ondemand/array_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/array_iterator-inl.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept : iter{_iter} {} simdjson_inline simdjson_result array_iterator::operator*() noexcept { if (iter.error()) { iter.abandon(); return iter.error(); } return value(iter.child()); } simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { return !(*this != other); } simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { return iter.is_open(); } simdjson_inline array_iterator &array_iterator::operator++() noexcept { error_code error; // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. if (( error = iter.error() )) { return *this; } if (( error = iter.skip_child() )) { return *this; } if (( error = iter.has_next_element().error() )) { return *this; } return *this; } } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( arm64::ondemand::array_iterator &&value ) noexcept : arm64::implementation_simdjson_result_base(std::forward(value)) { first.iter.assert_is_valid(); } simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : arm64::implementation_simdjson_result_base({}, error) { } simdjson_inline simdjson_result simdjson_result::operator*() noexcept { if (error()) { return error(); } return *first; } simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return !error(); } return first == other.first; } simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return error(); } return first != other.first; } simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { // Clear the error if there is one, so we don't yield it twice if (error()) { second = SUCCESS; return *this; } ++(first); return *this; } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H /* end file simdjson/generic/ondemand/array_iterator-inl.h for arm64 */ /* including simdjson/generic/ondemand/document-inl.h for arm64: #include "simdjson/generic/ondemand/document-inl.h" */ /* begin file simdjson/generic/ondemand/document-inl.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept : iter{std::forward(_iter)} { logger::log_start_value(iter, "document"); } simdjson_inline document document::start(json_iterator &&iter) noexcept { return document(std::forward(iter)); } inline void document::rewind() noexcept { iter.rewind(); } inline std::string document::to_debug_string() noexcept { return iter.to_string(); } inline simdjson_result document::current_location() const noexcept { return iter.current_location(); } inline int32_t document::current_depth() const noexcept { return iter.depth(); } inline bool document::at_end() const noexcept { return iter.at_end(); } inline bool document::is_alive() noexcept { return iter.is_alive(); } simdjson_inline value_iterator document::resume_value_iterator() noexcept { return value_iterator(&iter, 1, iter.root_position()); } simdjson_inline value_iterator document::get_root_value_iterator() noexcept { return resume_value_iterator(); } simdjson_inline simdjson_result document::start_or_resume_object() noexcept { if (iter.at_root()) { return get_object(); } else { return object::resume(resume_value_iterator()); } } simdjson_inline simdjson_result document::get_value() noexcept { // Make sure we start any arrays or objects before returning, so that start_root_() // gets called. // It is the convention throughout the code that the macro `SIMDJSON_DEVELOPMENT_CHECKS` determines whether // we check for OUT_OF_ORDER_ITERATION. Proper on::demand code should never trigger this error. #if SIMDJSON_DEVELOPMENT_CHECKS if (!iter.at_root()) { return OUT_OF_ORDER_ITERATION; } #endif // assert_at_root() serves two purposes: in Debug mode, whether or not // SIMDJSON_DEVELOPMENT_CHECKS is set or not, it checks that we are at the root of // the document (this will typically be redundant). In release mode, it generates // SIMDJSON_ASSUME statements to allow the compiler to make assumptions. iter.assert_at_root(); switch (*iter.peek()) { case '[': { // The following lines check that the document ends with ]. auto value_iterator = get_root_value_iterator(); auto error = value_iterator.check_root_array(); if(error) { return error; } return value(get_root_value_iterator()); } case '{': { // The following lines would check that the document ends with }. auto value_iterator = get_root_value_iterator(); auto error = value_iterator.check_root_object(); if(error) { return error; } return value(get_root_value_iterator()); } default: // Unfortunately, scalar documents are a special case in simdjson and they cannot // be safely converted to value instances. return SCALAR_DOCUMENT_AS_VALUE; } } simdjson_inline simdjson_result document::get_array() & noexcept { auto value = get_root_value_iterator(); return array::start_root(value); } simdjson_inline simdjson_result document::get_object() & noexcept { auto value = get_root_value_iterator(); return object::start_root(value); } /** * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should * give an error, so we check for trailing content. We want to disallow trailing * content. * Thus, in several implementations below, we pass a 'true' parameter value to * a get_root_value_iterator() method: this indicates that we disallow trailing content. */ simdjson_inline simdjson_result document::get_uint64() noexcept { return get_root_value_iterator().get_root_uint64(true); } simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { return get_root_value_iterator().get_root_uint64_in_string(true); } simdjson_inline simdjson_result document::get_int64() noexcept { return get_root_value_iterator().get_root_int64(true); } simdjson_inline simdjson_result document::get_int64_in_string() noexcept { return get_root_value_iterator().get_root_int64_in_string(true); } simdjson_inline simdjson_result document::get_double() noexcept { return get_root_value_iterator().get_root_double(true); } simdjson_inline simdjson_result document::get_double_in_string() noexcept { return get_root_value_iterator().get_root_double_in_string(true); } simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { return get_root_value_iterator().get_root_string(true, allow_replacement); } template simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); } simdjson_inline simdjson_result document::get_wobbly_string() noexcept { return get_root_value_iterator().get_root_wobbly_string(true); } simdjson_inline simdjson_result document::get_raw_json_string() noexcept { return get_root_value_iterator().get_root_raw_json_string(true); } simdjson_inline simdjson_result document::get_bool() noexcept { return get_root_value_iterator().get_root_bool(true); } simdjson_inline simdjson_result document::is_null() noexcept { return get_root_value_iterator().is_root_null(true); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } template simdjson_inline error_code document::get(T &out) & noexcept { return get().get(out); } template simdjson_inline error_code document::get(T &out) && noexcept { return std::forward(*this).get().get(out); } #if SIMDJSON_EXCEPTIONS simdjson_inline document::operator array() & noexcept(false) { return get_array(); } simdjson_inline document::operator object() & noexcept(false) { return get_object(); } simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document::operator double() noexcept(false) { return get_double(); } simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document::operator value() noexcept(false) { return get_value(); } #endif simdjson_inline simdjson_result document::count_elements() & noexcept { auto a = get_array(); simdjson_result answer = a.count_elements(); /* If there was an array, we are now left pointing at its first element. */ if(answer.error() == SUCCESS) { rewind(); } return answer; } simdjson_inline simdjson_result document::count_fields() & noexcept { auto a = get_object(); simdjson_result answer = a.count_fields(); /* If there was an object, we are now left pointing at its first element. */ if(answer.error() == SUCCESS) { rewind(); } return answer; } simdjson_inline simdjson_result document::at(size_t index) & noexcept { auto a = get_array(); return a.at(index); } simdjson_inline simdjson_result document::begin() & noexcept { return get_array().begin(); } simdjson_inline simdjson_result document::end() & noexcept { return {}; } simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { return start_or_resume_object()[key]; } simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { return start_or_resume_object()[key]; } simdjson_inline error_code document::consume() noexcept { auto error = iter.skip_child(0); if(error) { iter.abandon(); } return error; } simdjson_inline simdjson_result document::raw_json() noexcept { auto _iter = get_root_value_iterator(); const uint8_t * starting_point{_iter.peek_start()}; auto error = consume(); if(error) { return error; } // After 'consume()', we could be left pointing just beyond the document, but that // is ok because we are not going to dereference the final pointer position, we just // use it to compute the length in bytes. const uint8_t * final_point{iter.unsafe_pointer()}; return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } simdjson_inline simdjson_result document::type() noexcept { return get_root_value_iterator().type(); } simdjson_inline simdjson_result document::is_scalar() noexcept { json_type this_type; auto error = type().get(this_type); if(error) { return error; } return ! ((this_type == json_type::array) || (this_type == json_type::object)); } simdjson_inline bool document::is_negative() noexcept { return get_root_value_iterator().is_root_negative(); } simdjson_inline simdjson_result document::is_integer() noexcept { return get_root_value_iterator().is_root_integer(true); } simdjson_inline simdjson_result document::get_number_type() noexcept { return get_root_value_iterator().get_root_number_type(true); } simdjson_inline simdjson_result document::get_number() noexcept { return get_root_value_iterator().get_root_number(true); } simdjson_inline simdjson_result document::raw_json_token() noexcept { auto _iter = get_root_value_iterator(); return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); } simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { rewind(); // Rewind the document each time at_pointer is called if (json_pointer.empty()) { return this->get_value(); } json_type t; SIMDJSON_TRY(type().get(t)); switch (t) { case json_type::array: return (*this).get_array().at_pointer(json_pointer); case json_type::object: return (*this).get_object().at_pointer(json_pointer); default: return INVALID_JSON_POINTER; } } } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( arm64::ondemand::document &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base( error ) { } simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } template simdjson_inline simdjson_result simdjson_result::get() & noexcept { if (error()) { return error(); } return first.get(); } template simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first).get(); } template simdjson_inline error_code simdjson_result::get(T &out) & noexcept { if (error()) { return error(); } return first.get(out); } template simdjson_inline error_code simdjson_result::get(T &out) && noexcept { if (error()) { return error(); } return std::forward(first).get(out); } template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first); } template<> simdjson_inline error_code simdjson_result::get(arm64::ondemand::document &out) & noexcept = delete; template<> simdjson_inline error_code simdjson_result::get(arm64::ondemand::document &out) && noexcept { if (error()) { return error(); } out = std::forward(first); return SUCCESS; } simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } simdjson_inline bool simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS simdjson_inline simdjson_result::operator arm64::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator arm64::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator arm64::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator arm64::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } simdjson_inline bool simdjson_result::at_end() const noexcept { if (error()) { return error(); } return first.at_end(); } simdjson_inline int32_t simdjson_result::current_depth() const noexcept { if (error()) { return error(); } return first.current_depth(); } simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } } // namespace simdjson namespace simdjson { namespace arm64 { namespace ondemand { simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } /** * The document_reference instances are used primarily/solely for streams of JSON * documents. * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should * give an error, so we check for trailing content. * * However, for streams of JSON documents, we want to be able to start from * "321" "321" "321" * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() * successfully each time. * * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: * this indicates that we allow trailing content. */ simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } template simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } #if SIMDJSON_EXCEPTIONS simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } #endif simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} simdjson_inline document_reference::operator document&() const noexcept { return *doc; } } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::document_reference value, error_code error) noexcept : implementation_simdjson_result_base(std::forward(value), error) {} simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS simdjson_inline simdjson_result::operator arm64::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator arm64::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator arm64::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator arm64::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H /* end file simdjson/generic/ondemand/document-inl.h for arm64 */ /* including simdjson/generic/ondemand/document_stream-inl.h for arm64: #include "simdjson/generic/ondemand/document_stream-inl.h" */ /* begin file simdjson/generic/ondemand/document_stream-inl.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #include namespace simdjson { namespace arm64 { namespace ondemand { #ifdef SIMDJSON_THREADS_ENABLED inline void stage1_worker::finish() { // After calling "run" someone would call finish() to wait // for the end of the processing. // This function will wait until either the thread has done // the processing or, else, the destructor has been called. std::unique_lock lock(locking_mutex); cond_var.wait(lock, [this]{return has_work == false;}); } inline stage1_worker::~stage1_worker() { // The thread may never outlive the stage1_worker instance // and will always be stopped/joined before the stage1_worker // instance is gone. stop_thread(); } inline void stage1_worker::start_thread() { std::unique_lock lock(locking_mutex); if(thread.joinable()) { return; // This should never happen but we never want to create more than one thread. } thread = std::thread([this]{ while(true) { std::unique_lock thread_lock(locking_mutex); // We wait for either "run" or "stop_thread" to be called. cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); // If, for some reason, the stop_thread() method was called (i.e., the // destructor of stage1_worker is called, then we want to immediately destroy // the thread (and not do any more processing). if(!can_work) { break; } this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, this->_next_batch_start); this->has_work = false; // The condition variable call should be moved after thread_lock.unlock() for performance // reasons but thread sanitizers may report it as a data race if we do. // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock cond_var.notify_one(); // will notify "finish" thread_lock.unlock(); } } ); } inline void stage1_worker::stop_thread() { std::unique_lock lock(locking_mutex); // We have to make sure that all locks can be released. can_work = false; has_work = false; cond_var.notify_all(); lock.unlock(); if(thread.joinable()) { thread.join(); } } inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { std::unique_lock lock(locking_mutex); owner = ds; _next_batch_start = next_batch_start; stage1_thread_parser = stage1; has_work = true; // The condition variable call should be moved after thread_lock.unlock() for performance // reasons but thread sanitizers may report it as a data race if we do. // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock cond_var.notify_one(); // will notify the thread lock that we have work lock.unlock(); } #endif // SIMDJSON_THREADS_ENABLED simdjson_inline document_stream::document_stream( ondemand::parser &_parser, const uint8_t *_buf, size_t _len, size_t _batch_size, bool _allow_comma_separated ) noexcept : parser{&_parser}, buf{_buf}, len{_len}, batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, allow_comma_separated{_allow_comma_separated}, error{SUCCESS} #ifdef SIMDJSON_THREADS_ENABLED , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change #endif { #ifdef SIMDJSON_THREADS_ENABLED if(worker.get() == nullptr) { error = MEMALLOC; } #endif } simdjson_inline document_stream::document_stream() noexcept : parser{nullptr}, buf{nullptr}, len{0}, batch_size{0}, allow_comma_separated{false}, error{UNINITIALIZED} #ifdef SIMDJSON_THREADS_ENABLED , use_thread(false) #endif { } simdjson_inline document_stream::~document_stream() noexcept { #ifdef SIMDJSON_THREADS_ENABLED worker.reset(); #endif } inline size_t document_stream::size_in_bytes() const noexcept { return len; } inline size_t document_stream::truncated_bytes() const noexcept { if(error == CAPACITY) { return len - batch_start; } return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; } simdjson_inline document_stream::iterator::iterator() noexcept : stream{nullptr}, finished{true} { } simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept : stream{_stream}, finished{is_end} { } simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { //if(stream->error) { return stream->error; } return simdjson_result(stream->doc, stream->error); } simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { // If there is an error, then we want the iterator // to be finished, no matter what. (E.g., we do not // keep generating documents with errors, or go beyond // a document with errors.) // // Users do not have to call "operator*()" when they use operator++, // so we need to end the stream in the operator++ function. // // Note that setting finished = true is essential otherwise // we would enter an infinite loop. if (stream->error) { finished = true; } // Note that stream->error() is guarded against error conditions // (it will immediately return if stream->error casts to false). // In effect, this next function does nothing when (stream->error) // is true (hence the risk of an infinite loop). stream->next(); // If that was the last document, we're finished. // It is the only type of error we do not want to appear // in operator*. if (stream->error == EMPTY) { finished = true; } // If we had any other kind of error (not EMPTY) then we want // to pass it along to the operator* and we cannot mark the result // as "finished" just yet. return *this; } simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { return finished != other.finished; } simdjson_inline document_stream::iterator document_stream::begin() noexcept { start(); // If there are no documents, we're finished. return iterator(this, error == EMPTY); } simdjson_inline document_stream::iterator document_stream::end() noexcept { return iterator(this, true); } inline void document_stream::start() noexcept { if (error) { return; } error = parser->allocate(batch_size); if (error) { return; } // Always run the first stage 1 parse immediately batch_start = 0; error = run_stage1(*parser, batch_start); while(error == EMPTY) { // In exceptional cases, we may start with an empty block batch_start = next_batch_start(); if (batch_start >= len) { return; } error = run_stage1(*parser, batch_start); } if (error) { return; } doc_index = batch_start; doc = document(json_iterator(&buf[batch_start], parser)); doc.iter._streaming = true; #ifdef SIMDJSON_THREADS_ENABLED if (use_thread && next_batch_start() < len) { // Kick off the first thread on next batch if needed error = stage1_thread_parser.allocate(batch_size); if (error) { return; } worker->start_thread(); start_stage1_thread(); if (error) { return; } } #endif // SIMDJSON_THREADS_ENABLED } inline void document_stream::next() noexcept { // We always enter at once once in an error condition. if (error) { return; } next_document(); if (error) { return; } auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; // Check if at end of structural indexes (i.e. at end of batch) if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { error = EMPTY; // Load another batch (if available) while (error == EMPTY) { batch_start = next_batch_start(); if (batch_start >= len) { break; } #ifdef SIMDJSON_THREADS_ENABLED if(use_thread) { load_from_stage1_thread(); } else { error = run_stage1(*parser, batch_start); } #else error = run_stage1(*parser, batch_start); #endif /** * Whenever we move to another window, we need to update all pointers to make * it appear as if the input buffer started at the beginning of the window. * * Take this input: * * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] * * Say you process the following window... * * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' * * When you do so, the json_iterator has a pointer at the beginning of the memory region * (pointing at the beginning of '{"z"...'. * * When you move to the window that starts at... * * '[7, 10, 9] [15, 11, 12, 13] ... * * then it is not sufficient to just run stage 1. You also need to re-anchor the * json_iterator so that it believes we are starting at '[7, 10, 9]...'. * * Under the DOM front-end, this gets done automatically because the parser owns * the pointer the data, and when you call stage1 and then stage2 on the same * parser, then stage2 will run on the pointer acquired by stage1. * * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that * we used. But json_iterator has no callback when stage1 is called on the parser. * In fact, I think that the parser is unaware of json_iterator. * * * So we need to re-anchor the json_iterator after each call to stage 1 so that * all of the pointers are in sync. */ doc.iter = json_iterator(&buf[batch_start], parser); doc.iter._streaming = true; /** * End of resync. */ if (error) { continue; } // If the error was EMPTY, we may want to load another batch. doc_index = batch_start; } } } inline void document_stream::next_document() noexcept { // Go to next place where depth=0 (document depth) error = doc.iter.skip_child(0); if (error) { return; } // Always set depth=1 at the start of document doc.iter._depth = 1; // consume comma if comma separated is allowed if (allow_comma_separated) { doc.iter.consume_character(','); } // Resets the string buffer at the beginning, thus invalidating the strings. doc.iter._string_buf_loc = parser->string_buf.get(); doc.iter._root = doc.iter.position(); } inline size_t document_stream::next_batch_start() const noexcept { return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; } inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { // This code only updates the structural index in the parser, it does not update any json_iterator // instance. size_t remaining = len - _batch_start; if (remaining <= batch_size) { return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); } else { return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); } } simdjson_inline size_t document_stream::iterator::current_index() const noexcept { return stream->doc_index; } simdjson_inline std::string_view document_stream::iterator::source() const noexcept { auto depth = stream->doc.iter.depth(); auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); // If at root, process the first token to determine if scalar value if (stream->doc.iter.at_root()) { switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { case '{': case '[': // Depth=1 already at start of document break; case '}': case ']': depth--; break; default: // Scalar value document // TODO: Remove any trailing whitespaces // This returns a string spanning from start of value to the beginning of the next document (excluded) return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1); } cur_struct_index++; } while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { case '{': case '[': depth++; break; case '}': case ']': depth--; break; } if (depth == 0) { break; } cur_struct_index++; } return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; } inline error_code document_stream::iterator::error() const noexcept { return stream->error; } #ifdef SIMDJSON_THREADS_ENABLED inline void document_stream::load_from_stage1_thread() noexcept { worker->finish(); // Swap to the parser that was loaded up in the thread. Make sure the parser has // enough memory to swap to, as well. std::swap(stage1_thread_parser,*parser); error = stage1_thread_error; if (error) { return; } // If there's anything left, start the stage 1 thread! if (next_batch_start() < len) { start_stage1_thread(); } } inline void document_stream::start_stage1_thread() noexcept { // we call the thread on a lambda that will update // this->stage1_thread_error // there is only one thread that may write to this value // TODO this is NOT exception-safe. this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error size_t _next_batch_start = this->next_batch_start(); worker->run(this, & this->stage1_thread_parser, _next_batch_start); } #endif // SIMDJSON_THREADS_ENABLED } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result::simdjson_result( arm64::ondemand::document_stream &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } } #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H /* end file simdjson/generic/ondemand/document_stream-inl.h for arm64 */ /* including simdjson/generic/ondemand/field-inl.h for arm64: #include "simdjson/generic/ondemand/field-inl.h" */ /* begin file simdjson/generic/ondemand/field-inl.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { // clang 6 doesn't think the default constructor can be noexcept, so we make it explicit simdjson_inline field::field() noexcept : std::pair() {} simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept : std::pair(key, std::forward(value)) { } simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { raw_json_string key; SIMDJSON_TRY( parent_iter.field_key().get(key) ); SIMDJSON_TRY( parent_iter.field_value() ); return field::start(parent_iter, key); } simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { return field(key, parent_iter.child()); } simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); first.consume(); return answer; } simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; } simdjson_inline value &field::value() & noexcept { return second; } simdjson_inline value field::value() && noexcept { return std::forward(*this).second; } } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( arm64::ondemand::field &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result simdjson_result::key() noexcept { if (error()) { return error(); } return first.key(); } simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { if (error()) { return error(); } return first.unescaped_key(allow_replacement); } simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H /* end file simdjson/generic/ondemand/field-inl.h for arm64 */ /* including simdjson/generic/ondemand/json_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/json_iterator-inl.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept : token(std::forward(other.token)), parser{other.parser}, _string_buf_loc{other._string_buf_loc}, error{other.error}, _depth{other._depth}, _root{other._root}, _streaming{other._streaming} { other.parser = nullptr; } simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { token = other.token; parser = other.parser; _string_buf_loc = other._string_buf_loc; error = other.error; _depth = other._depth; _root = other._root; _streaming = other._streaming; other.parser = nullptr; return *this; } simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept : token(buf, &_parser->implementation->structural_indexes[0]), parser{_parser}, _string_buf_loc{parser->string_buf.get()}, _depth{1}, _root{parser->implementation->structural_indexes.get()}, _streaming{false} { logger::log_headers(); #if SIMDJSON_CHECK_EOF assert_more_tokens(); #endif } inline void json_iterator::rewind() noexcept { token.set_position( root_position() ); logger::log_headers(); // We start again _string_buf_loc = parser->string_buf.get(); _depth = 1; } inline bool json_iterator::balanced() const noexcept { token_iterator ti(token); int32_t count{0}; ti.set_position( root_position() ); while(ti.peek() <= peek_last()) { switch (*ti.return_current_and_advance()) { case '[': case '{': count++; break; case ']': case '}': count--; break; default: break; } } return count == 0; } // GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller // relating depth and parent_depth, which is a desired effect. The warning does not show up if the // skip_child() function is not marked inline). SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { if (depth() <= parent_depth) { return SUCCESS; } switch (*return_current_and_advance()) { // TODO consider whether matching braces is a requirement: if non-matching braces indicates // *missing* braces, then future lookups are not in the object/arrays they think they are, // violating the rule "validate enough structure that the user can be confident they are // looking at the right values." // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth // For the first open array/object in a value, we've already incremented depth, so keep it the same // We never stop at colon, but if we did, it wouldn't affect depth case '[': case '{': case ':': logger::log_start_value(*this, "skip"); break; // If there is a comma, we have just finished a value in an array/object, and need to get back in case ',': logger::log_value(*this, "skip"); break; // ] or } means we just finished a value and need to jump out of the array/object case ']': case '}': logger::log_end_value(*this, "skip"); _depth--; if (depth() <= parent_depth) { return SUCCESS; } #if SIMDJSON_CHECK_EOF // If there are no more tokens, the parent is incomplete. if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } #endif // SIMDJSON_CHECK_EOF break; case '"': if(*peek() == ':') { // We are at a key!!! // This might happen if you just started an object and you skip it immediately. // Performance note: it would be nice to get rid of this check as it is somewhat // expensive. // https://github.com/simdjson/simdjson/issues/1742 logger::log_value(*this, "key"); return_current_and_advance(); // eat up the ':' break; // important!!! } simdjson_fallthrough; // Anything else must be a scalar value default: // For the first scalar, we will have incremented depth already, so we decrement it here. logger::log_value(*this, "skip"); _depth--; if (depth() <= parent_depth) { return SUCCESS; } break; } // Now that we've considered the first value, we only increment/decrement for arrays/objects while (position() < end_position()) { switch (*return_current_and_advance()) { case '[': case '{': logger::log_start_value(*this, "skip"); _depth++; break; // TODO consider whether matching braces is a requirement: if non-matching braces indicates // *missing* braces, then future lookups are not in the object/arrays they think they are, // violating the rule "validate enough structure that the user can be confident they are // looking at the right values." // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth case ']': case '}': logger::log_end_value(*this, "skip"); _depth--; if (depth() <= parent_depth) { return SUCCESS; } break; default: logger::log_value(*this, "skip", ""); break; } } return report_error(TAPE_ERROR, "not enough close braces"); } SIMDJSON_POP_DISABLE_WARNINGS simdjson_inline bool json_iterator::at_root() const noexcept { return position() == root_position(); } simdjson_inline bool json_iterator::is_single_token() const noexcept { return parser->implementation->n_structural_indexes == 1; } simdjson_inline bool json_iterator::streaming() const noexcept { return _streaming; } simdjson_inline token_position json_iterator::root_position() const noexcept { return _root; } simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { SIMDJSON_ASSUME( _depth == 1 ); } simdjson_inline void json_iterator::assert_at_root() const noexcept { SIMDJSON_ASSUME( _depth == 1 ); #ifndef SIMDJSON_CLANG_VISUAL_STUDIO // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument // has side effects that will be discarded. SIMDJSON_ASSUME( token.position() == _root ); #endif } simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { assert_valid_position(token._position + required_tokens - 1); } simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { #ifndef SIMDJSON_CLANG_VISUAL_STUDIO SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); #endif } simdjson_inline bool json_iterator::at_end() const noexcept { return position() == end_position(); } simdjson_inline token_position json_iterator::end_position() const noexcept { uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; return &parser->implementation->structural_indexes[n_structural_indexes]; } inline std::string json_iterator::to_string() const noexcept { if( !is_alive() ) { return "dead json_iterator instance"; } const char * current_structural = reinterpret_cast(token.peek()); return std::string("json_iterator [ depth : ") + std::to_string(_depth) + std::string(", structural : '") + std::string(current_structural,1) + std::string("', offset : ") + std::to_string(token.current_offset()) + std::string("', error : ") + error_message(error) + std::string(" ]"); } inline simdjson_result json_iterator::current_location() const noexcept { if (!is_alive()) { // Unrecoverable error if (!at_root()) { return reinterpret_cast(token.peek(-1)); } else { return reinterpret_cast(token.peek()); } } if (at_end()) { return OUT_OF_BOUNDS; } return reinterpret_cast(token.peek()); } simdjson_inline bool json_iterator::is_alive() const noexcept { return parser; } simdjson_inline void json_iterator::abandon() noexcept { parser = nullptr; _depth = 0; } simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(); #endif // SIMDJSON_CHECK_EOF return token.return_current_and_advance(); } simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { // deliberately done without safety guard: return token.peek(); } simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(delta+1); #endif // SIMDJSON_CHECK_EOF return token.peek(delta); } simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(delta+1); #endif // #if SIMDJSON_CHECK_EOF return token.peek_length(delta); } simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { // todo: currently we require end-of-string buffering, but the following // assert_valid_position should be turned on if/when we lift that condition. // assert_valid_position(position); // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF // is ON by default, we have no choice but to disable it for real with a comment. return token.peek(position); } simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { #if SIMDJSON_CHECK_EOF assert_valid_position(position); #endif // SIMDJSON_CHECK_EOF return token.peek_length(position); } simdjson_inline token_position json_iterator::last_position() const noexcept { // The following line fails under some compilers... // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); // since it has side-effects. uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; SIMDJSON_ASSUME(n_structural_indexes > 0); return &parser->implementation->structural_indexes[n_structural_indexes - 1]; } simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { return token.peek(last_position()); } simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); SIMDJSON_ASSUME(_depth == parent_depth + 1); _depth = parent_depth; } simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); SIMDJSON_ASSUME(_depth == child_depth - 1); _depth = child_depth; } simdjson_inline depth_t json_iterator::depth() const noexcept { return _depth; } simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { return _string_buf_loc; } simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); logger::log_error(*this, message); error = _error; return error; } simdjson_inline token_position json_iterator::position() const noexcept { return token.position(); } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { return parser->unescape(in, _string_buf_loc, allow_replacement); } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { return parser->unescape_wobbly(in, _string_buf_loc); } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); SIMDJSON_ASSUME(_depth == child_depth - 1); #if SIMDJSON_DEVELOPMENT_CHECKS #ifndef SIMDJSON_CLANG_VISUAL_STUDIO SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); #endif #endif token.set_position(position); _depth = child_depth; } simdjson_inline error_code json_iterator::consume_character(char c) noexcept { if (*peek() == c) { return_current_and_advance(); return SUCCESS; } return TAPE_ERROR; } #if SIMDJSON_DEVELOPMENT_CHECKS simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; } simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } } #endif simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); logger::log_error(*this, message); return _error; } simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { // This function is not expected to be called in performance-sensitive settings. // Let us guard against silly cases: if((N < max_len) || (N == 0)) { return false; } // Copy to the buffer. std::memcpy(tmpbuf, json, max_len); if(N > max_len) { // We pad whatever remains with ' '. std::memset(tmpbuf + max_len, ' ', N - max_len); } return true; } } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::json_iterator &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H /* end file simdjson/generic/ondemand/json_iterator-inl.h for arm64 */ /* including simdjson/generic/ondemand/json_type-inl.h for arm64: #include "simdjson/generic/ondemand/json_type-inl.h" */ /* begin file simdjson/generic/ondemand/json_type-inl.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { switch (type) { case json_type::array: out << "array"; break; case json_type::object: out << "object"; break; case json_type::number: out << "number"; break; case json_type::string: out << "string"; break; case json_type::boolean: out << "boolean"; break; case json_type::null: out << "null"; break; default: SIMDJSON_UNREACHABLE(); } return out; } #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { return out << type.value(); } #endif simdjson_inline number_type number::get_number_type() const noexcept { return type; } simdjson_inline bool number::is_uint64() const noexcept { return get_number_type() == number_type::unsigned_integer; } simdjson_inline uint64_t number::get_uint64() const noexcept { return payload.unsigned_integer; } simdjson_inline number::operator uint64_t() const noexcept { return get_uint64(); } simdjson_inline bool number::is_int64() const noexcept { return get_number_type() == number_type::signed_integer; } simdjson_inline int64_t number::get_int64() const noexcept { return payload.signed_integer; } simdjson_inline number::operator int64_t() const noexcept { return get_int64(); } simdjson_inline bool number::is_double() const noexcept { return get_number_type() == number_type::floating_point_number; } simdjson_inline double number::get_double() const noexcept { return payload.floating_point_number; } simdjson_inline number::operator double() const noexcept { return get_double(); } simdjson_inline double number::as_double() const noexcept { if(is_double()) { return payload.floating_point_number; } if(is_int64()) { return double(payload.signed_integer); } return double(payload.unsigned_integer); } simdjson_inline void number::append_s64(int64_t value) noexcept { payload.signed_integer = value; type = number_type::signed_integer; } simdjson_inline void number::append_u64(uint64_t value) noexcept { payload.unsigned_integer = value; type = number_type::unsigned_integer; } simdjson_inline void number::append_double(double value) noexcept { payload.floating_point_number = value; type = number_type::floating_point_number; } simdjson_inline void number::skip_double() noexcept { type = number_type::floating_point_number; } } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::json_type &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H /* end file simdjson/generic/ondemand/json_type-inl.h for arm64 */ /* including simdjson/generic/ondemand/logger-inl.h for arm64: #include "simdjson/generic/ondemand/logger-inl.h" */ /* begin file simdjson/generic/ondemand/logger-inl.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #include namespace simdjson { namespace arm64 { namespace ondemand { namespace logger { static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; static constexpr const int LOG_EVENT_LEN = 20; static constexpr const int LOG_BUFFER_LEN = 30; static constexpr const int LOG_SMALL_BUFFER_LEN = 10; static int log_depth = 0; // Not threadsafe. Log only. // Helper to turn unprintable or newline characters into spaces static inline char printable_char(char c) { if (c >= 0x20) { return c; } else { return ' '; } } template static inline std::string string_format(const std::string& format, const Args&... args) { SIMDJSON_PUSH_DISABLE_ALL_WARNINGS int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; auto size = static_cast(size_s); if (size <= 0) return std::string(); std::unique_ptr buf(new char[size]); std::snprintf(buf.get(), size, format.c_str(), args...); SIMDJSON_POP_DISABLE_WARNINGS return std::string(buf.get(), buf.get() + size - 1); } static inline log_level get_log_level_from_env() { SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe char *lvl = getenv("SIMDJSON_LOG_LEVEL"); SIMDJSON_POP_DISABLE_WARNINGS if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } return log_level::info; } static inline log_level log_threshold() { static log_level threshold = get_log_level_from_env(); return threshold; } static inline bool should_log(log_level level) { return level >= log_threshold(); } inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { log_line(iter, index, depth, "", type, detail, log_level::info); } inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { log_line(iter, index, depth, "+", type, detail, log_level::info); if (LOG_ENABLED) { log_depth++; } } inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { log_line(iter, "+", type, "", delta, depth_delta, log_level::info); if (LOG_ENABLED) { log_depth++; } } inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { if (LOG_ENABLED) { log_depth--; } log_line(iter, "-", type, "", delta, depth_delta, log_level::info); } inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); } inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); } inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_event(iter.json_iter(), type, detail, delta, depth_delta); } inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_value(iter.json_iter(), type, detail, delta, depth_delta); } inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { log_start_value(iter.json_iter(), type, delta, depth_delta); } inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { log_end_value(iter.json_iter(), type, delta, depth_delta); } inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { log_error(iter.json_iter(), error, detail, delta, depth_delta); } inline void log_headers() noexcept { if (LOG_ENABLED) { if (simdjson_unlikely(should_log(log_level::info))) { // Technically a static variable is not thread-safe, but if you are using threads and logging... well... static bool displayed_hint{false}; log_depth = 0; printf("\n"); if (!displayed_hint) { // We only print this helpful header once. printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); printf("# +array says 'this is where we were when we discovered the start array'\n"); printf( "# -array says 'this is where we were when we ended the array'\n"); printf("# skip says 'this is a structural or value I am skipping'\n"); printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); printf("#\n"); printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); printf("# in addition to the depth being displayed.\n"); printf("#\n"); printf("# Every token in the document has a single depth determined by the tokens before it,\n"); printf("# and is not affected by what the token actually is.\n"); printf("#\n"); printf("# Not all structural elements are presented as tokens in the logs.\n"); printf("#\n"); printf("# We never give control to the user within an empty array or an empty object.\n"); printf("#\n"); printf("# Inside an array, having a depth greater than the array's depth means that\n"); printf("# we are pointing inside a value.\n"); printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); displayed_hint = true; } printf("\n"); printf("| %-*s ", LOG_EVENT_LEN, "Event"); printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); // printf("| %-*s ", 5, "Next#"); printf("| %-*s ", 5, "Depth"); printf("| Detail "); printf("|\n"); printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); // printf("|%.*s", 5+2, DASHES); printf("|%.*s", 5 + 2, DASHES); printf("|--------"); printf("|\n"); fflush(stdout); } } } template inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); } template inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { if (LOG_ENABLED) { if (simdjson_unlikely(should_log(level))) { const int indent = depth * 2; const auto buf = iter.token.buf; auto msg = string_format(title, std::forward(args)...); printf("| %*s%s%-*s ", indent, "", title_prefix, LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); { // Print the current structural. printf("| "); // Before we begin, the index might point right before the document. // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 if (index < iter._root) { printf("%*s", LOG_BUFFER_LEN, ""); } else { auto current_structural = &buf[*index]; for (int i = 0; i < LOG_BUFFER_LEN; i++) { printf("%c", printable_char(current_structural[i])); } } printf(" "); } { // Print the next structural. printf("| "); auto next_structural = &buf[*(index + 1)]; for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { printf("%c", printable_char(next_structural[i])); } printf(" "); } // printf("| %5u ", *(index+1)); printf("| %5i ", depth); printf("| %6.*s ", int(detail.size()), detail.data()); printf("|\n"); fflush(stdout); } } } } // namespace logger } // namespace ondemand } // namespace arm64 } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* end file simdjson/generic/ondemand/logger-inl.h for arm64 */ /* including simdjson/generic/ondemand/object-inl.h for arm64: #include "simdjson/generic/ondemand/object-inl.h" */ /* begin file simdjson/generic/ondemand/object-inl.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { return find_field_unordered(key); } simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { return std::forward(*this).find_field_unordered(key); } simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { SIMDJSON_TRY( iter.start_object().error() ); return object(iter); } simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { SIMDJSON_TRY( iter.start_root_object().error() ); return object(iter); } simdjson_inline error_code object::consume() noexcept { if(iter.is_at_key()) { /** * whenever you are pointing at a key, calling skip_child() is * unsafe because you will hit a string and you will assume that * it is string value, and this mistake will lead you to make bad * depth computation. */ /** * We want to 'consume' the key. We could really * just do _json_iter->return_current_and_advance(); at this * point, but, for clarity, we will use the high-level API to * eat the key. We assume that the compiler optimizes away * most of the work. */ simdjson_unused raw_json_string actual_key; auto error = iter.field_key().get(actual_key); if (error) { iter.abandon(); return error; }; // Let us move to the value while we are at it. if ((error = iter.field_value())) { iter.abandon(); return error; } } auto error_skip = iter.json_iter().skip_child(iter.depth()-1); if(error_skip) { iter.abandon(); } return error_skip; } simdjson_inline simdjson_result object::raw_json() noexcept { const uint8_t * starting_point{iter.peek_start()}; auto error = consume(); if(error) { return error; } const uint8_t * final_point{iter._json_iter->peek()}; return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { SIMDJSON_TRY( iter.started_object().error() ); return object(iter); } simdjson_inline object object::resume(const value_iterator &iter) noexcept { return iter; } simdjson_inline object::object(const value_iterator &_iter) noexcept : iter{_iter} { } simdjson_inline simdjson_result object::begin() noexcept { #if SIMDJSON_DEVELOPMENT_CHECKS if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif return object_iterator(iter); } simdjson_inline simdjson_result object::end() noexcept { return object_iterator(iter); } inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } json_pointer = json_pointer.substr(1); size_t slash = json_pointer.find('/'); std::string_view key = json_pointer.substr(0, slash); // Grab the child with the given key simdjson_result child; // If there is an escape character in the key, unescape it and then get the child. size_t escape = key.find('~'); if (escape != std::string_view::npos) { // Unescape the key std::string unescaped(key); do { switch (unescaped[escape+1]) { case '0': unescaped.replace(escape, 2, "~"); break; case '1': unescaped.replace(escape, 2, "/"); break; default: return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); } escape = unescaped.find('~', escape+1); } while (escape != std::string::npos); child = find_field(unescaped); // Take note find_field does not unescape keys when matching } else { child = find_field(key); } if(child.error()) { return child; // we do not continue if there was an error } // If there is a /, we have to recurse and look up more of the path if (slash != std::string_view::npos) { child = child.at_pointer(json_pointer.substr(slash)); } return child; } simdjson_inline simdjson_result object::count_fields() & noexcept { size_t count{0}; // Important: we do not consume any of the values. for(simdjson_unused auto v : *this) { count++; } // The above loop will always succeed, but we want to report errors. if(iter.error()) { return iter.error(); } // We need to move back at the start because we expect users to iterate through // the object after counting the number of elements. iter.reset_object(); return count; } simdjson_inline simdjson_result object::is_empty() & noexcept { bool is_not_empty; auto error = iter.reset_object().get(is_not_empty); if(error) { return error; } return !is_not_empty; } simdjson_inline simdjson_result object::reset() & noexcept { return iter.reset_object(); } } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::object &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} simdjson_inline simdjson_result simdjson_result::begin() noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() noexcept { if (error()) { return error(); } return first.end(); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first).find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first)[key]; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first).find_field(key); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } inline simdjson_result simdjson_result::reset() noexcept { if (error()) { return error(); } return first.reset(); } inline simdjson_result simdjson_result::is_empty() noexcept { if (error()) { return error(); } return first.is_empty(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* end file simdjson/generic/ondemand/object-inl.h for arm64 */ /* including simdjson/generic/ondemand/object_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/object_iterator-inl.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { // // object_iterator // simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept : iter{_iter} {} simdjson_inline simdjson_result object_iterator::operator*() noexcept { error_code error = iter.error(); if (error) { iter.abandon(); return error; } auto result = field::start(iter); // TODO this is a safety rail ... users should exit loops as soon as they receive an error. // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. if (result.error()) { iter.abandon(); } return result; } simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { return !(*this != other); } simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { return iter.is_open(); } SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_inline object_iterator &object_iterator::operator++() noexcept { // TODO this is a safety rail ... users should exit loops as soon as they receive an error. // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error simdjson_unused error_code error; if ((error = iter.skip_child() )) { return *this; } simdjson_unused bool has_value; if ((error = iter.has_next_field().get(has_value) )) { return *this; }; return *this; } SIMDJSON_POP_DISABLE_WARNINGS // // ### Live States // // While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is // always SUCCESS: // // - Start: This is the state when the object is first found and the iterator is just past the {. // In this state, at_start == true. // - Next: After we hand a scalar value to the user, or an array/object which they then fully // iterate over, the iterator is at the , or } before the next value. In this state, // depth == iter.depth, at_start == false, and error == SUCCESS. // - Unfinished Business: When we hand an array/object to the user which they do not fully // iterate over, we need to finish that iteration by skipping child values until we reach the // Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. // // ## Error States // // In error states, we will yield exactly one more value before stopping. iter.depth == depth // and at_start is always false. We decrement after yielding the error, moving to the Finished // state. // // - Chained Error: When the object iterator is part of an error chain--for example, in // `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an // object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and // iter.depth == depth, and at_start == false. We decrement depth when we yield the error. // - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, // we flag that as an error and treat it exactly the same as a Chained Error. In this state, // error == TAPE_ERROR, iter.depth == depth, and at_start == false. // // Errors that occur while reading a field to give to the user (such as when the key is not a // string or the field is missing a colon) are yielded immediately. Depth is then decremented, // moving to the Finished state without transitioning through an Error state at all. // // ## Terminal State // // The terminal state has iter.depth < depth. at_start is always false. // // - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. // In this state, iter.depth < depth, at_start == false, and error == SUCCESS. // } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( arm64::ondemand::object_iterator &&value ) noexcept : implementation_simdjson_result_base(std::forward(value)) { first.iter.assert_is_valid(); } simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base({}, error) { } simdjson_inline simdjson_result simdjson_result::operator*() noexcept { if (error()) { return error(); } return *first; } // If we're iterating and there is an error, return the error once. simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return !error(); } return first == other.first; } // If we're iterating and there is an error, return the error once. simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return error(); } return first != other.first; } // Checks for ']' and ',' simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { // Clear the error if there is one, so we don't yield it twice if (error()) { second = SUCCESS; return *this; } ++first; return *this; } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H /* end file simdjson/generic/ondemand/object_iterator-inl.h for arm64 */ /* including simdjson/generic/ondemand/parser-inl.h for arm64: #include "simdjson/generic/ondemand/parser-inl.h" */ /* begin file simdjson/generic/ondemand/parser-inl.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ /* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { simdjson_inline parser::parser(size_t max_capacity) noexcept : _max_capacity{max_capacity} { } simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { if (new_capacity > max_capacity()) { return CAPACITY; } if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } // string_capacity copied from document::allocate _capacity = 0; size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); #if SIMDJSON_DEVELOPMENT_CHECKS start_positions.reset(new (std::nothrow) token_position[new_max_depth]); #endif if (implementation) { SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); } else { SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); } _capacity = new_capacity; _max_depth = new_max_depth; return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); // Allocate if needed if (capacity() < json.length() || !string_buf) { SIMDJSON_TRY( allocate(json.length(), max_depth()) ); } // Run stage 1. SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); return document::start({ reinterpret_cast(json.data()), this }); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { return iterate(padded_string_view(json, allocated)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { if(json.capacity() - json.size() < SIMDJSON_PADDING) { json.reserve(json.size() + SIMDJSON_PADDING); } return iterate(padded_string_view(json)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { return iterate(padded_string_view(json)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception SIMDJSON_TRY( result.error() ); padded_string_view json = result.value_unsafe(); return iterate(json); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception SIMDJSON_TRY( result.error() ); const padded_string &json = result.value_unsafe(); return iterate(json); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); // Allocate if needed if (capacity() < json.length()) { SIMDJSON_TRY( allocate(json.length(), max_depth()) ); } // Run stage 1. SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); return json_iterator(reinterpret_cast(json.data()), this); } inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { buf += 3; len -= 3; } if(allow_comma_separated && batch_size < len) { batch_size = len; } return document_stream(*this, buf, len, batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } simdjson_inline size_t parser::max_depth() const noexcept { return _max_depth; } simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { _max_capacity = max_capacity; } else { _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; } } simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); if (!end) { return STRING_ERROR; } std::string_view result(reinterpret_cast(dst), end-dst); dst = end; return result; } simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); if (!end) { return STRING_ERROR; } std::string_view result(reinterpret_cast(dst), end-dst); dst = end; return result; } } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::parser &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* end file simdjson/generic/ondemand/parser-inl.h for arm64 */ /* including simdjson/generic/ondemand/raw_json_string-inl.h for arm64: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ /* begin file simdjson/generic/ondemand/raw_json_string-inl.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; // if the content has no escape character, just scan through it quickly! for(;pos < target.size() && target[pos] != '\\';pos++) {} // slow path may begin. bool escaping{false}; for(;pos < target.size();pos++) { if((target[pos] == '"') && !escaping) { return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } return true; } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { size_t pos{0}; // if the content has no escape character, just scan through it quickly! for(;target[pos] && target[pos] != '\\';pos++) {} // slow path may begin. bool escaping{false}; for(;target[pos];pos++) { if((target[pos] == '"') && !escaping) { return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } return true; } simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { // If we are going to call memcmp, then we must know something about the length of the raw_json_string. return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { // Assumptions: does not contain unescaped quote characters, and // the raw content is quote terminated within a valid JSON string. if(target.size() <= SIMDJSON_PADDING) { return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } const char * r{raw()}; size_t pos{0}; for(;pos < target.size();pos++) { if(r[pos] != target[pos]) { return false; } } if(r[pos] != '"') { return false; } return true; } simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { const char * r{raw()}; size_t pos{0}; bool escaping{false}; for(;pos < target.size();pos++) { if(r[pos] != target[pos]) { return false; } // if target is a compile-time constant and it is free from // quotes, then the next part could get optimized away through // inlining. if((target[pos] == '"') && !escaping) { // We have reached the end of the raw_json_string but // the target is not done. return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } if(r[pos] != '"') { return false; } return true; } simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and // the raw content is quote terminated within a valid JSON string. const char * r{raw()}; size_t pos{0}; for(;target[pos];pos++) { if(r[pos] != target[pos]) { return false; } } if(r[pos] != '"') { return false; } return true; } simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { // Assumptions: does not contain unescaped quote characters, and // the raw content is quote terminated within a valid JSON string. const char * r{raw()}; size_t pos{0}; bool escaping{false}; for(;target[pos];pos++) { if(r[pos] != target[pos]) { return false; } // if target is a compile-time constant and it is free from // quotes, then the next part could get optimized away through // inlining. if((target[pos] == '"') && !escaping) { // We have reached the end of the raw_json_string but // the target is not done. return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } if(r[pos] != '"') { return false; } return true; } simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { return a.unsafe_is_equal(c); } simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { return a == c; } simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { return !(a == c); } simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { return !(a == c); } simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { return iter.unescape(*this, allow_replacement); } simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { return iter.unescape_wobbly(*this); } simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { bool in_escape = false; const char *s = str.raw(); while (true) { switch (*s) { case '\\': in_escape = !in_escape; break; case '"': if (in_escape) { in_escape = false; } else { return out; } break; default: if (in_escape) { in_escape = false; } } out << *s; s++; } } } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::raw_json_string &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} simdjson_inline simdjson_result simdjson_result::raw() const noexcept { if (error()) { return error(); } return first.raw(); } simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(arm64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { if (error()) { return error(); } return first.unescape(iter, allow_replacement); } simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(arm64::ondemand::json_iterator &iter) const noexcept { if (error()) { return error(); } return first.unescape_wobbly(iter); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* end file simdjson/generic/ondemand/raw_json_string-inl.h for arm64 */ /* including simdjson/generic/ondemand/serialization-inl.h for arm64: #include "simdjson/generic/ondemand/serialization-inl.h" */ /* begin file simdjson/generic/ondemand/serialization-inl.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { inline std::string_view trim(const std::string_view str) noexcept { // We can almost surely do better by rolling our own find_first_not_of function. size_t first = str.find_first_not_of(" \t\n\r"); // If we have the empty string (just white space), then no trimming is possible, and // we return the empty string_view. if (std::string_view::npos == first) { return std::string_view(); } size_t last = str.find_last_not_of(" \t\n\r"); return str.substr(first, (last - first + 1)); } inline simdjson_result to_json_string(arm64::ondemand::document& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(arm64::ondemand::document_reference& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(arm64::ondemand::value& x) noexcept { /** * If we somehow receive a value that has already been consumed, * then the following code could be in trouble. E.g., we create * an array as needed, but if an array was already created, then * it could be bad. */ using namespace arm64::ondemand; arm64::ondemand::json_type t; auto error = x.type().get(t); if(error != SUCCESS) { return error; } switch (t) { case json_type::array: { arm64::ondemand::array array; error = x.get_array().get(array); if(error) { return error; } return to_json_string(array); } case json_type::object: { arm64::ondemand::object object; error = x.get_object().get(object); if(error) { return error; } return to_json_string(object); } default: return trim(x.raw_json_token()); } } inline simdjson_result to_json_string(arm64::ondemand::object& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(arm64::ondemand::array& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } } // namespace simdjson namespace simdjson { namespace arm64 { namespace ondemand { #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::value x) { std::string_view v; auto error = simdjson::to_json_string(x).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::value x) { std::string_view v; auto error = simdjson::to_json_string(x).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::array value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::array value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document_reference& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::object value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::object value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif }}} // namespace simdjson::arm64::ondemand #endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* end file simdjson/generic/ondemand/serialization-inl.h for arm64 */ /* including simdjson/generic/ondemand/token_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/token_iterator-inl.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { simdjson_inline token_iterator::token_iterator( const uint8_t *_buf, token_position position ) noexcept : buf{_buf}, _position{position} { } simdjson_inline uint32_t token_iterator::current_offset() const noexcept { return *(_position); } simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { return &buf[*(_position++)]; } simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { return &buf[*position]; } simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { return *position; } simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { return *(position+1) - *position; } simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { return &buf[*(_position+delta)]; } simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { return *(_position+delta); } simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { return *(_position+delta+1) - *(_position+delta); } simdjson_inline token_position token_iterator::position() const noexcept { return _position; } simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { _position = target_position; } simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { return _position == other._position; } simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { return _position != other._position; } simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { return _position > other._position; } simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { return _position >= other._position; } simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { return _position < other._position; } simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { return _position <= other._position; } } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::token_iterator &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H /* end file simdjson/generic/ondemand/token_iterator-inl.h for arm64 */ /* including simdjson/generic/ondemand/value-inl.h for arm64: #include "simdjson/generic/ondemand/value-inl.h" */ /* begin file simdjson/generic/ondemand/value-inl.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { simdjson_inline value::value(const value_iterator &_iter) noexcept : iter{_iter} { } simdjson_inline value value::start(const value_iterator &iter) noexcept { return iter; } simdjson_inline value value::resume(const value_iterator &iter) noexcept { return iter; } simdjson_inline simdjson_result value::get_array() noexcept { return array::start(iter); } simdjson_inline simdjson_result value::get_object() noexcept { return object::start(iter); } simdjson_inline simdjson_result value::start_or_resume_object() noexcept { if (iter.at_start()) { return get_object(); } else { return object::resume(iter); } } simdjson_inline simdjson_result value::get_raw_json_string() noexcept { return iter.get_raw_json_string(); } simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { return iter.get_string(allow_replacement); } template simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { return iter.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result value::get_wobbly_string() noexcept { return iter.get_wobbly_string(); } simdjson_inline simdjson_result value::get_double() noexcept { return iter.get_double(); } simdjson_inline simdjson_result value::get_double_in_string() noexcept { return iter.get_double_in_string(); } simdjson_inline simdjson_result value::get_uint64() noexcept { return iter.get_uint64(); } simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { return iter.get_uint64_in_string(); } simdjson_inline simdjson_result value::get_int64() noexcept { return iter.get_int64(); } simdjson_inline simdjson_result value::get_int64_in_string() noexcept { return iter.get_int64_in_string(); } simdjson_inline simdjson_result value::get_bool() noexcept { return iter.get_bool(); } simdjson_inline simdjson_result value::is_null() noexcept { return iter.is_null(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } template simdjson_inline error_code value::get(T &out) noexcept { return get().get(out); } #if SIMDJSON_EXCEPTIONS simdjson_inline value::operator array() noexcept(false) { return get_array(); } simdjson_inline value::operator object() noexcept(false) { return get_object(); } simdjson_inline value::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline value::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline value::operator double() noexcept(false) { return get_double(); } simdjson_inline value::operator std::string_view() noexcept(false) { return get_string(false); } simdjson_inline value::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } simdjson_inline value::operator bool() noexcept(false) { return get_bool(); } #endif simdjson_inline simdjson_result value::begin() & noexcept { return get_array().begin(); } simdjson_inline simdjson_result value::end() & noexcept { return {}; } simdjson_inline simdjson_result value::count_elements() & noexcept { simdjson_result answer; auto a = get_array(); answer = a.count_elements(); // count_elements leaves you pointing inside the array, at the first element. // We need to move back so that the user can create a new array (which requires that // we point at '['). iter.move_at_start(); return answer; } simdjson_inline simdjson_result value::count_fields() & noexcept { simdjson_result answer; auto a = get_object(); answer = a.count_fields(); iter.move_at_start(); return answer; } simdjson_inline simdjson_result value::at(size_t index) noexcept { auto a = get_array(); return a.at(index); } simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result value::find_field(const char *key) noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { return start_or_resume_object()[key]; } simdjson_inline simdjson_result value::operator[](const char *key) noexcept { return start_or_resume_object()[key]; } simdjson_inline simdjson_result value::type() noexcept { return iter.type(); } simdjson_inline simdjson_result value::is_scalar() noexcept { json_type this_type; auto error = type().get(this_type); if(error) { return error; } return ! ((this_type == json_type::array) || (this_type == json_type::object)); } simdjson_inline bool value::is_negative() noexcept { return iter.is_negative(); } simdjson_inline simdjson_result value::is_integer() noexcept { return iter.is_integer(); } simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { return iter.get_number_type(); } simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { return iter.get_number(); } simdjson_inline std::string_view value::raw_json_token() noexcept { return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); } simdjson_inline simdjson_result value::raw_json() noexcept { json_type t; SIMDJSON_TRY(type().get(t)); switch (t) { case json_type::array: { ondemand::array array; SIMDJSON_TRY(get_array().get(array)); return array.raw_json(); } case json_type::object: { ondemand::object object; SIMDJSON_TRY(get_object().get(object)); return object.raw_json(); } default: return raw_json_token(); } } simdjson_inline simdjson_result value::current_location() noexcept { return iter.json_iter().current_location(); } simdjson_inline int32_t value::current_depth() const noexcept{ return iter.json_iter().depth(); } simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { json_type t; SIMDJSON_TRY(type().get(t)); switch (t) { case json_type::array: return (*this).get_array().at_pointer(json_pointer); case json_type::object: return (*this).get_object().at_pointer(json_pointer); default: return INVALID_JSON_POINTER; } } } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( arm64::ondemand::value &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() & noexcept { if (error()) { return error(); } return {}; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::get_array() noexcept { if (error()) { return error(); } return first.get_array(); } simdjson_inline simdjson_result simdjson_result::get_object() noexcept { if (error()) { return error(); } return first.get_object(); } simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } template simdjson_inline simdjson_result simdjson_result::get() noexcept { if (error()) { return error(); } return first.get(); } template simdjson_inline error_code simdjson_result::get(T &out) noexcept { if (error()) { return error(); } return first.get(out); } template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { if (error()) { return error(); } return std::move(first); } template<> simdjson_inline error_code simdjson_result::get(arm64::ondemand::value &out) noexcept { if (error()) { return error(); } out = first; return SUCCESS; } simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS simdjson_inline simdjson_result::operator arm64::ondemand::array() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator arm64::ondemand::object() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator arm64::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); } simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { if (error()) { return error(); } return first.current_depth(); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H /* end file simdjson/generic/ondemand/value-inl.h for arm64 */ /* including simdjson/generic/ondemand/value_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/value_iterator-inl.h for arm64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { namespace ondemand { simdjson_inline value_iterator::value_iterator( json_iterator *json_iter, depth_t depth, token_position start_position ) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} { } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { SIMDJSON_TRY( start_container('{', "Not an object", "object") ); return started_object(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { SIMDJSON_TRY( start_container('{', "Not an object", "object") ); return started_root_object(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { assert_at_container_start(); #if SIMDJSON_DEVELOPMENT_CHECKS _json_iter->set_start_position(_depth, start_position()); #endif if (*_json_iter->peek() == '}') { logger::log_value(*_json_iter, "empty object"); _json_iter->return_current_and_advance(); end_container(); return false; } return true; } simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { // When in streaming mode, we cannot expect peek_last() to be the last structural element of the // current document. It only works in the normal mode where we have indexed a single document. // Note that adding a check for 'streaming' is not expensive since we only have at most // one root element. if ( ! _json_iter->streaming() ) { // The following lines do not fully protect against garbage content within the // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should // call `at_end()` on the document instance at the end of the processing to // ensure that the processing has finished at the end. // if (*_json_iter->peek_last() != '}') { _json_iter->abandon(); return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); } // If the last character is } *and* the first gibberish character is also '}' // then on-demand could accidentally go over. So we need additional checks. // https://github.com/simdjson/simdjson/issues/1834 // Checking that the document is balanced requires a full scan which is potentially // expensive, but it only happens in edge cases where the first padding character is // a closing bracket. if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { _json_iter->abandon(); // The exact error would require more work. It will typically be an unclosed object. return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); } } return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { auto error = check_root_object(); if(error) { return error; } return started_object(); } simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { #if SIMDJSON_CHECK_EOF if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } #endif // SIMDJSON_CHECK_EOF _json_iter->ascend_to(depth()-1); return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { assert_at_next(); // It's illegal to call this unless there are more tokens: anything that ends in } or ] is // obligated to verify there are more tokens if they are not the top level. switch (*_json_iter->return_current_and_advance()) { case '}': logger::log_end_value(*_json_iter, "object"); SIMDJSON_TRY( end_container() ); return false; case ',': return true; default: return report_error(TAPE_ERROR, "Missing comma between object fields"); } } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { error_code error; bool has_value; // // Initially, the object can be in one of a few different places: // // 1. The start of the object, at the first field: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2, index 1) // ``` if (at_first_field()) { has_value = true; // // 2. When a previous search did not yield a value or the object is empty: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 0) // { } // ^ (depth 0, index 2) // ``` // } else if (!is_open()) { #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif return false; // 3. When a previous search found a field or an iterator yielded a value: // // ``` // // When a field was not fully consumed (or not even touched at all) // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2) // // When a field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // // When the last field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // ``` // } else { if ((error = skip_child() )) { abandon(); return error; } if ((error = has_next_field().get(has_value) )) { abandon(); return error; } #if SIMDJSON_DEVELOPMENT_CHECKS if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif } while (has_value) { // Get the key and colon, stopping at the value. raw_json_string actual_key; // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. // field_key() advances the pointer and checks that '"' is found (corresponding to a key). // The depth is left unchanged by field_key(). if ((error = field_key().get(actual_key) )) { abandon(); return error; }; // field_value() will advance and check that we find a ':' separating the // key and the value. It will also increment the depth by one. if ((error = field_value() )) { abandon(); return error; } // If it matches, stop and return // We could do it this way if we wanted to allow arbitrary // key content (including escaped quotes). //if (actual_key.unsafe_is_equal(max_key_length, key)) { // Instead we do the following which may trigger buffer overruns if the // user provides an adversarial key (containing a well placed unescaped quote // character and being longer than the number of bytes remaining in the JSON // input). if (actual_key.unsafe_is_equal(key)) { logger::log_event(*this, "match", key, -2); // If we return here, then we return while pointing at the ':' that we just checked. return true; } // No match: skip the value and see if , or } is next logger::log_event(*this, "no match", key, -2); // The call to skip_child is meant to skip over the value corresponding to the key. // After skip_child(), we are right before the next comma (',') or the final brace ('}'). SIMDJSON_TRY( skip_child() ); // Skip the value entirely // The has_next_field() advances the pointer and check that either ',' or '}' is found. // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, // then we are in error and we abort. if ((error = has_next_field().get(has_value) )) { abandon(); return error; } } // If the loop ended, we're out of fields to look at. return false; } SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { /** * When find_field_unordered_raw is called, we can either be pointing at the * first key, pointing outside (at the closing brace) or if a key was matched * we can be either pointing right afterthe ':' right before the value (that we need skip), * or we may have consumed the value and we might be at a comma or at the * final brace (ready for a call to has_next_field()). */ error_code error; bool has_value; // First, we scan from that point to the end. // If we don't find a match, we may loop back around, and scan from the beginning to that point. token_position search_start = _json_iter->position(); // We want to know whether we need to go back to the beginning. bool at_first = at_first_field(); /////////////// // Initially, the object can be in one of a few different places: // // 1. At the first key: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2, index 1) // ``` // if (at_first) { has_value = true; // 2. When a previous search did not yield a value or the object is empty: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 0) // { } // ^ (depth 0, index 2) // ``` // } else if (!is_open()) { #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif SIMDJSON_TRY(reset_object().get(has_value)); at_first = true; // 3. When a previous search found a field or an iterator yielded a value: // // ``` // // When a field was not fully consumed (or not even touched at all) // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2) // // When a field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // // When the last field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // ``` // } else { // If someone queried a key but they not did access the value, then we are left pointing // at the ':' and we need to move forward through the value... If the value was // processed then skip_child() does not move the iterator (but may adjust the depth). if ((error = skip_child() )) { abandon(); return error; } search_start = _json_iter->position(); if ((error = has_next_field().get(has_value) )) { abandon(); return error; } #if SIMDJSON_DEVELOPMENT_CHECKS if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif } // After initial processing, we will be in one of two states: // // ``` // // At the beginning of a field // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // // At the end of the object // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 0) // ``` // // Next, we find a match starting from the current position. while (has_value) { SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field // Get the key and colon, stopping at the value. raw_json_string actual_key; // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. // field_key() advances the pointer and checks that '"' is found (corresponding to a key). // The depth is left unchanged by field_key(). if ((error = field_key().get(actual_key) )) { abandon(); return error; }; // field_value() will advance and check that we find a ':' separating the // key and the value. It will also increment the depth by one. if ((error = field_value() )) { abandon(); return error; } // If it matches, stop and return // We could do it this way if we wanted to allow arbitrary // key content (including escaped quotes). // if (actual_key.unsafe_is_equal(max_key_length, key)) { // Instead we do the following which may trigger buffer overruns if the // user provides an adversarial key (containing a well placed unescaped quote // character and being longer than the number of bytes remaining in the JSON // input). if (actual_key.unsafe_is_equal(key)) { logger::log_event(*this, "match", key, -2); // If we return here, then we return while pointing at the ':' that we just checked. return true; } // No match: skip the value and see if , or } is next logger::log_event(*this, "no match", key, -2); // The call to skip_child is meant to skip over the value corresponding to the key. // After skip_child(), we are right before the next comma (',') or the final brace ('}'). SIMDJSON_TRY( skip_child() ); // The has_next_field() advances the pointer and check that either ',' or '}' is found. // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, // then we are in error and we abort. if ((error = has_next_field().get(has_value) )) { abandon(); return error; } } // Performance note: it maybe wasteful to rewind to the beginning when there might be // no other query following. Indeed, it would require reskipping the whole object. // Instead, you can just stay where you are. If there is a new query, there is always time // to rewind. if(at_first) { return false; } // If we reach the end without finding a match, search the rest of the fields starting at the // beginning of the object. // (We have already run through the object before, so we've already validated its structure. We // don't check errors in this bit.) SIMDJSON_TRY(reset_object().get(has_value)); while (true) { SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field // Get the key and colon, stopping at the value. raw_json_string actual_key; // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. // field_key() advances the pointer and checks that '"' is found (corresponding to a key). // The depth is left unchanged by field_key(). error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); // field_value() will advance and check that we find a ':' separating the // key and the value. It will also increment the depth by one. error = field_value(); SIMDJSON_ASSUME(!error); // If it matches, stop and return // We could do it this way if we wanted to allow arbitrary // key content (including escaped quotes). // if (actual_key.unsafe_is_equal(max_key_length, key)) { // Instead we do the following which may trigger buffer overruns if the // user provides an adversarial key (containing a well placed unescaped quote // character and being longer than the number of bytes remaining in the JSON // input). if (actual_key.unsafe_is_equal(key)) { logger::log_event(*this, "match", key, -2); // If we return here, then we return while pointing at the ':' that we just checked. return true; } // No match: skip the value and see if , or } is next logger::log_event(*this, "no match", key, -2); // The call to skip_child is meant to skip over the value corresponding to the key. // After skip_child(), we are right before the next comma (',') or the final brace ('}'). SIMDJSON_TRY( skip_child() ); // If we reached the end of the key-value pair we started from, then we know // that the key is not there so we return false. We are either right before // the next comma or the final brace. if(_json_iter->position() == search_start) { return false; } // The has_next_field() advances the pointer and check that either ',' or '}' is found. // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, // then we are in error and we abort. error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); // If we make the mistake of exiting here, then we could be left pointing at a key // in the middle of an object. That's not an allowable state. } // If the loop ended, we're out of fields to look at. The program should // never reach this point. return false; } SIMDJSON_POP_DISABLE_WARNINGS simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { assert_at_next(); const uint8_t *key = _json_iter->return_current_and_advance(); if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } return raw_json_string(key); } simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { assert_at_next(); if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } _json_iter->descend_to(depth()+1); return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { SIMDJSON_TRY( start_container('[', "Not an array", "array") ); return started_array(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { SIMDJSON_TRY( start_container('[', "Not an array", "array") ); return started_root_array(); } inline std::string value_iterator::to_string() const noexcept { auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); if(_json_iter != nullptr) { answer += _json_iter->to_string(); } answer += std::string(" ]"); return answer; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { assert_at_container_start(); if (*_json_iter->peek() == ']') { logger::log_value(*_json_iter, "empty array"); _json_iter->return_current_and_advance(); SIMDJSON_TRY( end_container() ); return false; } _json_iter->descend_to(depth()+1); #if SIMDJSON_DEVELOPMENT_CHECKS _json_iter->set_start_position(_depth, start_position()); #endif return true; } simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { // When in streaming mode, we cannot expect peek_last() to be the last structural element of the // current document. It only works in the normal mode where we have indexed a single document. // Note that adding a check for 'streaming' is not expensive since we only have at most // one root element. if ( ! _json_iter->streaming() ) { // The following lines do not fully protect against garbage content within the // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should // also call `at_end()` on the document instance at the end of the processing to // ensure that the processing has finished at the end. // if (*_json_iter->peek_last() != ']') { _json_iter->abandon(); return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); } // If the last character is ] *and* the first gibberish character is also ']' // then on-demand could accidentally go over. So we need additional checks. // https://github.com/simdjson/simdjson/issues/1834 // Checking that the document is balanced requires a full scan which is potentially // expensive, but it only happens in edge cases where the first padding character is // a closing bracket. if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { _json_iter->abandon(); // The exact error would require more work. It will typically be an unclosed array. return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); } } return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { auto error = check_root_array(); if (error) { return error; } return started_array(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { assert_at_next(); logger::log_event(*this, "has_next_element"); switch (*_json_iter->return_current_and_advance()) { case ']': logger::log_end_value(*_json_iter, "array"); SIMDJSON_TRY( end_container() ); return false; case ',': _json_iter->descend_to(depth()+1); return true; default: return report_error(TAPE_ERROR, "Missing comma between array elements"); } } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { auto not_true = atomparsing::str4ncmp(json, "true"); auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); if (error) { return incorrect_type_error("Not a boolean"); } return simdjson_result(!not_true); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); // if we start with 'n', we must be a null if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } return is_null_string; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { return get_raw_json_string().unescape(json_iter(), allow_replacement); } template simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { std::string_view content; auto err = get_string(allow_replacement).get(content); if (err) { return err; } receiver = content; return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { return get_raw_json_string().unescape_wobbly(json_iter()); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { auto json = peek_scalar("string"); if (*json != '"') { return incorrect_type_error("Not a string"); } advance_scalar("string"); return raw_json_string(json+1); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { auto result = numberparsing::parse_double(peek_non_root_scalar("double")); if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { auto result = parse_bool(peek_non_root_scalar("bool")); if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } return result; } simdjson_inline simdjson_result value_iterator::is_null() noexcept { bool is_null_value; SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); if(is_null_value) { advance_non_root_scalar("null"); } return is_null_value; } simdjson_inline bool value_iterator::is_negative() noexcept { return numberparsing::is_negative(peek_non_root_scalar("numbersign")); } simdjson_inline bool value_iterator::is_root_negative() noexcept { return numberparsing::is_negative(peek_root_scalar("numbersign")); } simdjson_inline simdjson_result value_iterator::is_integer() noexcept { return numberparsing::is_integer(peek_non_root_scalar("integer")); } simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { return numberparsing::get_number_type(peek_non_root_scalar("integer")); } simdjson_inline simdjson_result value_iterator::get_number() noexcept { number num; error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); if(error) { return error; } return num; } simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("is_root_integer"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { return false; // if there are more than 20 characters, it cannot be represented as an integer. } auto answer = numberparsing::is_integer(tmpbuf); // If the parsing was a success, we must still check that it is // a single scalar. Note that we parse first because of cases like '[]' where // getting TRAILING_CONTENT is wrong. if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } return answer; } simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } auto answer = numberparsing::get_number_type(tmpbuf); if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } return answer; } simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } number num; error_code error = numberparsing::parse_number(tmpbuf, num); if(error) { return error; } if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("number"); return num; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); } template simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { std::string_view content; auto err = get_root_string(check_trailing, allow_replacement).get(content); if (err) { return err; } receiver = content; return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { auto json = peek_scalar("string"); if (*json != '"') { return incorrect_type_error("Not a string"); } if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_scalar("string"); return raw_json_string(json+1); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_unsigned(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_unsigned_in_string(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_integer(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_integer_in_string(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_double(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_double_in_string(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("bool"); uint8_t tmpbuf[5+1+1]; // +1 for null termination tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } auto result = parse_bool(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("bool"); } return result; } simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("null"); bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); if(result) { // we have something that looks like a null. if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("null"); } return result; } simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); return _json_iter->skip_child(depth()); } simdjson_inline value_iterator value_iterator::child() const noexcept { assert_at_child(); return { _json_iter, depth()+1, _json_iter->token.position() }; } // GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller // relating depth and iterator depth, which is a desired effect. It does not happen if is_open is // marked non-inline. SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_inline bool value_iterator::is_open() const noexcept { return _json_iter->depth() >= depth(); } SIMDJSON_POP_DISABLE_WARNINGS simdjson_inline bool value_iterator::at_end() const noexcept { return _json_iter->at_end(); } simdjson_inline bool value_iterator::at_start() const noexcept { return _json_iter->token.position() == start_position(); } simdjson_inline bool value_iterator::at_first_field() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); return _json_iter->token.position() == start_position() + 1; } simdjson_inline void value_iterator::abandon() noexcept { _json_iter->abandon(); } simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { return _depth; } simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { return _json_iter->error; } simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { return _json_iter->string_buf_loc(); } simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { return *_json_iter; } simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { return *_json_iter; } simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { return _json_iter->peek(start_position()); } simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { return _json_iter->peek_length(start_position()); } simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. if (!is_at_start()) { return peek_start(); } // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. assert_at_start(); return _json_iter->peek(); } simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. if (!is_at_start()) { return; } // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. assert_at_start(); _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); } simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { logger::log_start_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. const uint8_t *json; if (!is_at_start()) { #if SIMDJSON_DEVELOPMENT_CHECKS if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif json = peek_start(); if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } } else { assert_at_start(); /** * We should be prudent. Let us peek. If it is not the right type, we * return an error. Only once we have determined that we have the right * type are we allowed to advance! */ json = _json_iter->peek(); if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } _json_iter->return_current_and_advance(); } return SUCCESS; } simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return peek_start(); } assert_at_root(); return _json_iter->peek(); } simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return peek_start(); } assert_at_non_root_start(); return _json_iter->peek(); } simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return; } assert_at_root(); _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); } simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return; } assert_at_non_root_start(); _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); } simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { logger::log_error(*_json_iter, start_position(), depth(), message); return INCORRECT_TYPE; } simdjson_inline bool value_iterator::is_at_start() const noexcept { return position() == start_position(); } simdjson_inline bool value_iterator::is_at_key() const noexcept { // Keys are at the same depth as the object. // Note here that we could be safer and check that we are within an object, // but we do not. return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; } simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). auto delta = position() - start_position(); return delta == 1 || delta == 2; } inline void value_iterator::assert_at_start() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth ); SIMDJSON_ASSUME( _depth > 0 ); } inline void value_iterator::assert_at_container_start() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); SIMDJSON_ASSUME( _json_iter->_depth == _depth ); SIMDJSON_ASSUME( _depth > 0 ); } inline void value_iterator::assert_at_next() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth ); SIMDJSON_ASSUME( _depth > 0 ); } simdjson_inline void value_iterator::move_at_start() noexcept { _json_iter->_depth = _depth; _json_iter->token.set_position(_start_position); } simdjson_inline void value_iterator::move_at_container_start() noexcept { _json_iter->_depth = _depth; _json_iter->token.set_position(_start_position + 1); } simdjson_inline simdjson_result value_iterator::reset_array() noexcept { if(error()) { return error(); } move_at_container_start(); return started_array(); } simdjson_inline simdjson_result value_iterator::reset_object() noexcept { if(error()) { return error(); } move_at_container_start(); return started_object(); } inline void value_iterator::assert_at_child() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); SIMDJSON_ASSUME( _depth > 0 ); } inline void value_iterator::assert_at_root() const noexcept { assert_at_start(); SIMDJSON_ASSUME( _depth == 1 ); } inline void value_iterator::assert_at_non_root_start() const noexcept { assert_at_start(); SIMDJSON_ASSUME( _depth > 1 ); } inline void value_iterator::assert_is_valid() const noexcept { SIMDJSON_ASSUME( _json_iter != nullptr ); } simdjson_inline bool value_iterator::is_valid() const noexcept { return _json_iter != nullptr; } simdjson_inline simdjson_result value_iterator::type() const noexcept { switch (*peek_start()) { case '{': return json_type::object; case '[': return json_type::array; case '"': return json_type::string; case 'n': return json_type::null; case 't': case 'f': return json_type::boolean; case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return json_type::number; default: return TAPE_ERROR; } } simdjson_inline token_position value_iterator::start_position() const noexcept { return _start_position; } simdjson_inline token_position value_iterator::position() const noexcept { return _json_iter->position(); } simdjson_inline token_position value_iterator::end_position() const noexcept { return _json_iter->end_position(); } simdjson_inline token_position value_iterator::last_position() const noexcept { return _json_iter->last_position(); } simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { return _json_iter->report_error(error, message); } } // namespace ondemand } // namespace arm64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::value_iterator &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* end file simdjson/generic/ondemand/value_iterator-inl.h for arm64 */ /* end file simdjson/generic/ondemand/amalgamated.h for arm64 */ /* including simdjson/arm64/end.h: #include "simdjson/arm64/end.h" */ /* begin file simdjson/arm64/end.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT /* undefining SIMDJSON_IMPLEMENTATION from "arm64" */ #undef SIMDJSON_IMPLEMENTATION /* end file simdjson/arm64/end.h */ #endif // SIMDJSON_ARM64_ONDEMAND_H /* end file simdjson/arm64/ondemand.h */ #elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(fallback) /* including simdjson/fallback/ondemand.h: #include "simdjson/fallback/ondemand.h" */ /* begin file simdjson/fallback/ondemand.h */ #ifndef SIMDJSON_FALLBACK_ONDEMAND_H #define SIMDJSON_FALLBACK_ONDEMAND_H /* including simdjson/fallback/begin.h: #include "simdjson/fallback/begin.h" */ /* begin file simdjson/fallback/begin.h */ /* defining SIMDJSON_IMPLEMENTATION to "fallback" */ #define SIMDJSON_IMPLEMENTATION fallback /* including simdjson/fallback/base.h: #include "simdjson/fallback/base.h" */ /* begin file simdjson/fallback/base.h */ #ifndef SIMDJSON_FALLBACK_BASE_H #define SIMDJSON_FALLBACK_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { /** * Fallback implementation (runs on any machine). */ namespace fallback { class implementation; } // namespace fallback } // namespace simdjson #endif // SIMDJSON_FALLBACK_BASE_H /* end file simdjson/fallback/base.h */ /* including simdjson/fallback/bitmanipulation.h: #include "simdjson/fallback/bitmanipulation.h" */ /* begin file simdjson/fallback/bitmanipulation.h */ #ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H #define SIMDJSON_FALLBACK_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace { #if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { unsigned long x0 = (unsigned long)x, top, bottom; _BitScanForward(&top, (unsigned long)(x >> 32)); _BitScanForward(&bottom, x0); *ret = x0 ? bottom : 32 + top; return x != 0; } static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { unsigned long x1 = (unsigned long)(x >> 32), top, bottom; _BitScanReverse(&top, x1); _BitScanReverse(&bottom, (unsigned long)x); *ret = x1 ? top + 32 : bottom; return x != 0; } #endif /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { #ifdef _MSC_VER unsigned long leading_zero = 0; // Search the mask data from most significant bit (MSB) // to least significant bit (LSB) for a set bit (1). if (_BitScanReverse64(&leading_zero, input_num)) return (int)(63 - leading_zero); else return 64; #else return __builtin_clzll(input_num); #endif// _MSC_VER } } // unnamed namespace } // namespace fallback } // namespace simdjson #endif // SIMDJSON_FALLBACK_BITMANIPULATION_H /* end file simdjson/fallback/bitmanipulation.h */ /* including simdjson/fallback/stringparsing_defs.h: #include "simdjson/fallback/stringparsing_defs.h" */ /* begin file simdjson/fallback/stringparsing_defs.h */ #ifndef SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H #define SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace { // Holds backslashes and quotes locations. struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 1; simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return c == '"'; } simdjson_inline bool has_backslash() { return c == '\\'; } simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } uint8_t c; }; // struct backslash_and_quote simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { // store to dest unconditionally - we can overwrite the bits we don't like later dst[0] = src[0]; return { src[0] }; } } // unnamed namespace } // namespace fallback } // namespace simdjson #endif // SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H /* end file simdjson/fallback/stringparsing_defs.h */ /* including simdjson/fallback/numberparsing_defs.h: #include "simdjson/fallback/numberparsing_defs.h" */ /* begin file simdjson/fallback/numberparsing_defs.h */ #ifndef SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H #define SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #ifdef JSON_TEST_NUMBERS // for unit testing void found_invalid_number(const uint8_t *buf); void found_integer(int64_t result, const uint8_t *buf); void found_unsigned_integer(uint64_t result, const uint8_t *buf); void found_float(double result, const uint8_t *buf); #endif namespace simdjson { namespace fallback { namespace numberparsing { // credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ /** @private */ static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) { uint64_t val; memcpy(&val, chars, sizeof(uint64_t)); val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); } /** @private */ static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { return parse_eight_digits_unrolled(reinterpret_cast(chars)); } #if SIMDJSON_IS_32BITS // _umul128 for x86, arm // this is a slow emulation routine for 32-bit // static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; } static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); uint64_t adbc_carry = !!(adbc < ad); uint64_t lo = bd + (adbc << 32); *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + (adbc_carry << 32) + !!(lo < bd); return lo; } #endif /** @private */ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { internal::value128 answer; #if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS #ifdef _M_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); answer.low = value1 * value2; #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 #endif // _M_ARM64 #else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); #endif return answer; } } // namespace numberparsing } // namespace fallback } // namespace simdjson #define SIMDJSON_SWAR_NUMBER_PARSING 1 #endif // SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H /* end file simdjson/fallback/numberparsing_defs.h */ /* end file simdjson/fallback/begin.h */ /* including simdjson/generic/ondemand/amalgamated.h for fallback: #include "simdjson/generic/ondemand/amalgamated.h" */ /* begin file simdjson/generic/ondemand/amalgamated.h for fallback */ #if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) #error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! #endif // Stuff other things depend on /* including simdjson/generic/ondemand/base.h for fallback: #include "simdjson/generic/ondemand/base.h" */ /* begin file simdjson/generic/ondemand/base.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { /** * A fast, simple, DOM-like interface that parses JSON as you use it. * * Designed for maximum speed and a lower memory profile. */ namespace ondemand { /** Represents the depth of a JSON value (number of nested arrays/objects). */ using depth_t = int32_t; /** @copydoc simdjson::fallback::number_type */ using number_type = simdjson::fallback::number_type; /** @private Position in the JSON buffer indexes */ using token_position = const uint32_t *; class array; class array_iterator; class document; class document_reference; class document_stream; class field; class json_iterator; enum class json_type; struct number; class object; class object_iterator; class parser; class raw_json_string; class token_iterator; class value; class value_iterator; } // namespace ondemand } // namespace fallback } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H /* end file simdjson/generic/ondemand/base.h for fallback */ /* including simdjson/generic/ondemand/value_iterator.h for fallback: #include "simdjson/generic/ondemand/value_iterator.h" */ /* begin file simdjson/generic/ondemand/value_iterator.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { /** * Iterates through a single JSON value at a particular depth. * * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects * the caller to call the right ones. * * @private This is not intended for external use. */ class value_iterator { protected: /** The underlying JSON iterator */ json_iterator *_json_iter{}; /** The depth of this value */ depth_t _depth{}; /** * The starting token index for this value */ token_position _start_position{}; public: simdjson_inline value_iterator() noexcept = default; /** * Denote that we're starting a document. */ simdjson_inline void start_document() noexcept; /** * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. * * Optimized for scalars. */ simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; /** * Tell whether the iterator is at the EOF mark */ simdjson_inline bool at_end() const noexcept; /** * Tell whether the iterator is at the start of the value */ simdjson_inline bool at_start() const noexcept; /** * Tell whether the value is open--if the value has not been used, or the array/object is still open. */ simdjson_inline bool is_open() const noexcept; /** * Tell whether the value is at an object's first field (just after the {). */ simdjson_inline bool at_first_field() const noexcept; /** * Abandon all iteration. */ simdjson_inline void abandon() noexcept; /** * Get the child value as a value_iterator. */ simdjson_inline value_iterator child_value() const noexcept; /** * Get the depth of this value. */ simdjson_inline int32_t depth() const noexcept; /** * Get the JSON type of this value. * * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result type() const noexcept; /** * @addtogroup object Object iteration * * Methods to iterate and find object fields. These methods generally *assume* the value is * actually an object; the caller is responsible for keeping track of that fact. * * @{ */ /** * Start an object iteration. * * @returns Whether the object had any fields (returns false for empty). * @error INCORRECT_TYPE if there is no opening { */ simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; /** * Start an object iteration from the root. * * @returns Whether the object had any fields (returns false for empty). * @error INCORRECT_TYPE if there is no opening { * @error TAPE_ERROR if there is no matching } at end of document */ simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; /** * Checks whether an object could be started from the root. May be called by start_root_object. * * @returns SUCCESS if it is possible to safely start an object from the root (document level). * @error INCORRECT_TYPE if there is no opening { * @error TAPE_ERROR if there is no matching } at end of document */ simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; /** * Start an object iteration after the user has already checked and moved past the {. * * Does not move the iterator unless the object is empty ({}). * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** * Start an object iteration from the root, after the user has already checked and moved past the {. * * Does not move the iterator unless the object is empty ({}). * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; /** * Moves to the next field in an object. * * Looks for , and }. If } is found, the object is finished and the iterator advances past it. * Otherwise, it advances to the next value. * * @return whether there is another field in the object. * @error TAPE_ERROR If there is a comma missing between fields. * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. */ simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; /** * Get the current field's key. */ simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; /** * Pass the : in the field and move to its value. */ simdjson_warn_unused simdjson_inline error_code field_value() noexcept; /** * Find the next field with the given key. * * Assumes you have called next_field() or otherwise matched the previous value. * * This means the iterator must be sitting at the next key: * * ``` * { "a": 1, "b": 2 } * ^ * ``` * * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; /** * Find the next field with the given key, *without* unescaping. This assumes object order: it * will not find the field if it was already passed when looking for some *other* field. * * Assumes you have called next_field() or otherwise matched the previous value. * * This means the iterator must be sitting at the next key: * * ``` * { "a": 1, "b": 2 } * ^ * ``` * * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; /** * Find the field with the given key without regard to order, and *without* unescaping. * * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. * * Assumes you have called next_field() or otherwise matched the previous value. * * This means the iterator must be sitting at the next key: * * ``` * { "a": 1, "b": 2 } * ^ * ``` * * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; /** @} */ /** * @addtogroup array Array iteration * Methods to iterate over array elements. These methods generally *assume* the value is actually * an object; the caller is responsible for keeping track of that fact. * @{ */ /** * Check for an opening [ and start an array iteration. * * @returns Whether the array had any elements (returns false for empty). * @error INCORRECT_TYPE If there is no [. */ simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; /** * Check for an opening [ and start an array iteration while at the root. * * @returns Whether the array had any elements (returns false for empty). * @error INCORRECT_TYPE If there is no [. * @error TAPE_ERROR if there is no matching ] at end of document */ simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; /** * Checks whether an array could be started from the root. May be called by start_root_array. * * @returns SUCCESS if it is possible to safely start an array from the root (document level). * @error INCORRECT_TYPE If there is no [. * @error TAPE_ERROR if there is no matching ] at end of document */ simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; /** * Start an array iteration, after the user has already checked and moved past the [. * * Does not move the iterator unless the array is empty ([]). * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** * Start an array iteration from the root, after the user has already checked and moved past the [. * * Does not move the iterator unless the array is empty ([]). * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; /** * Moves to the next element in an array. * * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. * Otherwise, it advances to the next value. * * @return Whether there is another element in the array. * @error TAPE_ERROR If there is a comma missing between elements. */ simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; /** * Get a child value iterator. */ simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; /** @} */ /** * @defgroup scalar Scalar values * @addtogroup scalar * @{ */ simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; template simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; simdjson_warn_unused simdjson_inline bool is_negative() noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; template simdjson_warn_unused simdjson_inline error_code get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; simdjson_inline error_code error() const noexcept; simdjson_inline uint8_t *&string_buf_loc() noexcept; simdjson_inline const json_iterator &json_iter() const noexcept; simdjson_inline json_iterator &json_iter() noexcept; simdjson_inline void assert_is_valid() const noexcept; simdjson_inline bool is_valid() const noexcept; /** @} */ protected: /** * Restarts an array iteration. * @returns Whether the array has any elements (returns false for empty). */ simdjson_inline simdjson_result reset_array() noexcept; /** * Restarts an object iteration. * @returns Whether the object has any fields (returns false for empty). */ simdjson_inline simdjson_result reset_object() noexcept; /** * move_at_start(): moves us so that we are pointing at the beginning of * the container. It updates the index so that at_start() is true and it * syncs the depth. The user can then create a new container instance. * * Usage: used with value::count_elements(). **/ simdjson_inline void move_at_start() noexcept; /** * move_at_container_start(): moves us so that we are pointing at the beginning of * the container so that assert_at_container_start() passes. * * Usage: used with reset_array() and reset_object(). **/ simdjson_inline void move_at_container_start() noexcept; /* Useful for debugging and logging purposes. */ inline std::string to_string() const noexcept; simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; simdjson_inline const uint8_t *peek_start() const noexcept; simdjson_inline uint32_t peek_start_length() const noexcept; /** * The general idea of the advance_... methods and the peek_* methods * is that you first peek and check that you have desired type. If you do, * and only if you do, then you advance. * * We used to unconditionally advance. But this made reasoning about our * current state difficult. * Suppose you always advance. Look at the 'value' matching the key * "shadowable" in the following example... * * ({"globals":{"a":{"shadowable":[}}}}) * * If the user thinks it is a Boolean and asks for it, then we check the '[', * decide it is not a Boolean, but still move into the next character ('}'). Now * we are left pointing at '}' right after a '['. And we have not yet reported * an error, only that we do not have a Boolean. * * If, instead, you just stand your ground until it is content that you know, then * you will only even move beyond the '[' if the user tells you that you have an * array. So you will be at the '}' character inside the array and, hopefully, you * will then catch the error because an array cannot start with '}', but the code * processing Boolean values does not know this. * * So the contract is: first call 'peek_...' and then call 'advance_...' only * if you have determined that it is a type you can handle. * * Unfortunately, it makes the code more verbose, longer and maybe more error prone. */ simdjson_inline void advance_scalar(const char *type) noexcept; simdjson_inline void advance_root_scalar(const char *type) noexcept; simdjson_inline void advance_non_root_scalar(const char *type) noexcept; simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; simdjson_inline error_code end_container() noexcept; /** * Advance to a place expecting a value (increasing depth). * * @return The current token (the one left behind). * @error TAPE_ERROR If the document ended early. */ simdjson_inline simdjson_result advance_to_value() noexcept; simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; simdjson_inline bool is_at_start() const noexcept; /** * is_at_iterator_start() returns true on an array or object after it has just been * created, whether the instance is empty or not. * * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) */ simdjson_inline bool is_at_iterator_start() const noexcept; /** * Assuming that we are within an object, this returns true if we * are pointing at a key. * * Usage: the skip_child() method should never be used while we are pointing * at a key inside an object. */ simdjson_inline bool is_at_key() const noexcept; inline void assert_at_start() const noexcept; inline void assert_at_container_start() const noexcept; inline void assert_at_root() const noexcept; inline void assert_at_child() const noexcept; inline void assert_at_next() const noexcept; inline void assert_at_non_root_start() const noexcept; /** Get the starting position of this value */ simdjson_inline token_position start_position() const noexcept; /** @copydoc error_code json_iterator::position() const noexcept; */ simdjson_inline token_position position() const noexcept; /** @copydoc error_code json_iterator::end_position() const noexcept; */ simdjson_inline token_position last_position() const noexcept; /** @copydoc error_code json_iterator::end_position() const noexcept; */ simdjson_inline token_position end_position() const noexcept; /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ simdjson_inline error_code report_error(error_code error, const char *message) noexcept; friend class document; friend class object; friend class array; friend class value; }; // value_iterator } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public fallback::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(fallback::ondemand::value_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H /* end file simdjson/generic/ondemand/value_iterator.h for fallback */ /* including simdjson/generic/ondemand/value.h for fallback: #include "simdjson/generic/ondemand/value.h" */ /* begin file simdjson/generic/ondemand/value.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { /** * An ephemeral JSON value returned during iteration. It is only valid for as long as you do * not access more data in the JSON document. */ class value { public: /** * Create a new invalid value. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline value() noexcept = default; /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); } /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template simdjson_inline error_code get(T &out) noexcept; /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @returns INCORRECT_TYPE If the JSON value is not an array. */ simdjson_inline simdjson_result get_array() noexcept; /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @returns INCORRECT_TYPE If the JSON value is not an object. */ simdjson_inline simdjson_result get_object() noexcept; /** * Cast this JSON value to an unsigned integer. * * @returns A unsigned 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64() noexcept; /** * Cast this JSON value (inside string) to a unsigned integer. * * @returns A unsigned 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64_in_string() noexcept; /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64() noexcept; /** * Cast this JSON value (inside string) to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64_in_string() noexcept; /** * Cast this JSON value to a double. * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double() noexcept; /** * Cast this JSON value (inside string) to a double * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double_in_string() noexcept; /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). * * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; /** * Attempts to fill the provided std::string reference with the parsed value of the current string. * * The string is guaranteed to be valid UTF-8. * * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. * We recommend you avoid allocating an std::string unless you need to. * * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a "wobbly" string. * * The string is may not be a valid UTF-8 string. * See https://simonsapin.github.io/wtf-8/ * * Important: a value should be consumed once. Calling get_wobbly_string() twice on the same value * is an error. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_wobbly_string() noexcept; /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_raw_json_string() noexcept; /** * Cast this JSON value to a bool. * * @returns A bool value. * @returns INCORRECT_TYPE if the JSON value is not true or false. */ simdjson_inline simdjson_result get_bool() noexcept; /** * Checks if this JSON value is null. If and only if the value is * null, then it is consumed (we advance). If we find a token that * begins with 'n' but is not 'null', then an error is returned. * * @returns Whether the value is null. * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. */ simdjson_inline simdjson_result is_null() noexcept; #if SIMDJSON_EXCEPTIONS /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. */ simdjson_inline operator array() noexcept(false); /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. */ simdjson_inline operator object() noexcept(false); /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline operator uint64_t() noexcept(false); /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. */ simdjson_inline operator int64_t() noexcept(false); /** * Cast this JSON value to a double. * * @returns A double. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. */ simdjson_inline operator double() noexcept(false); /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator std::string_view() noexcept(false); /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator raw_json_string() noexcept(false); /** * Cast this JSON value to a bool. * * @returns A bool value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. */ simdjson_inline operator bool() noexcept(false); #endif /** * Begin array iteration. * * Part of the std::iterable interface. * * @returns INCORRECT_TYPE If the JSON value is not an array. */ simdjson_inline simdjson_result begin() & noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result end() & noexcept; /** * This method scans the array and counts the number of elements. * The count_elements method should always be called before you have begun * iterating through the array: it is expected that you are pointing at * the beginning of the array. * The runtime complexity is linear in the size of the array. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * Performance hint: You should only call count_elements() as a last * resort as it may require scanning the document twice or more. */ simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the object and counts the number of key-value pairs. * The count_fields method should always be called before you have begun * iterating through the object: it is expected that you are pointing at * the beginning of the object. * The runtime complexity is linear in the size of the object. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an object is empty, it is more performant to use * the is_empty() method on the object instance. * * Performance hint: You should only call count_fields() as a last * resort as it may require scanning the document twice or more. */ simdjson_inline simdjson_result count_fields() & noexcept; /** * Get the value at the given index in the array. This function has linear-time complexity. * This function should only be called once on an array instance since the array iterator is not reset between each call. * * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field(const char *key) noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](const char *key) noexcept; /** * Get the type of this JSON value. It does not validate or consume the value. * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * * @return The type of JSON value (json_type::array, json_type::object, json_type::string, * json_type::number, json_type::boolean, or json_type::null). * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result type() noexcept; /** * Checks whether the value is a scalar (string, number, null, Boolean). * Returns false when there it is an array or object. * * @returns true if the type is string, number, null, Boolean * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result is_scalar() noexcept; /** * Checks whether the value is a negative number. * * @returns true if the number if negative. */ simdjson_inline bool is_negative() noexcept; /** * Checks whether the value is an integer number. Note that * this requires to partially parse the number string. If * the value is determined to be an integer, it may still * not parse properly as an integer in subsequent steps * (e.g., it might overflow). * * Performance note: if you call this function systematically * before parsing a number, you may have fallen for a performance * anti-pattern. * * @returns true if the number if negative. */ simdjson_inline simdjson_result is_integer() noexcept; /** * Determine the number type (integer or floating-point number) as quickly * as possible. This function does not fully validate the input. It is * useful when you only need to classify the numbers, without parsing them. * * If you are planning to retrieve the value or you need full validation, * consider using the get_number() method instead: it will fully parse * and validate the input, and give you access to the type: * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have * an integer greater or equal to 9223372036854775808 * get_number_type() is number_type::signed_integer if we have an * integer that is less than 9223372036854775808 * Otherwise, get_number_type() has value number_type::floating_point_number * * This function requires processing the number string, but it is expected * to be faster than get_number().get_number_type() because it is does not * parse the number value. * * @returns the type of the number */ simdjson_inline simdjson_result get_number_type() noexcept; /** * Attempt to parse an ondemand::number. An ondemand::number may * contain an integer value or a floating-point value, the simdjson * library will autodetect the type. Thus it is a dynamically typed * number. Before accessing the value, you must determine the detected * type. * * number.get_number_type() is number_type::signed_integer if we have * an integer in [-9223372036854775808,9223372036854775808) * You can recover the value by calling number.get_int64() and you * have that number.is_int64() is true. * * number.get_number_type() is number_type::unsigned_integer if we have * an integer in [9223372036854775808,18446744073709551616) * You can recover the value by calling number.get_uint64() and you * have that number.is_uint64() is true. * * Otherwise, number.get_number_type() has value number_type::floating_point_number * and we have a binary64 number. * You can recover the value by calling number.get_double() and you * have that number.is_double() is true. * * You must check the type before accessing the value: it is an error * to call "get_int64()" when number.get_number_type() is not * number_type::signed_integer and when number.is_int64() is false. * * Performance note: this is designed with performance in mind. When * calling 'get_number()', you scan the number string only once, determining * efficiently the type and storing it in an efficient manner. */ simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; /** * Get the raw JSON for this token. * * The string_view will always point into the input buffer. * * The string_view will start at the beginning of the token, and include the entire token * *as well as all spaces until the next token (or EOF).* This means, for example, that a * string token always begins with a " and is always terminated by the final ", possibly * followed by a number of spaces. * * The string_view is *not* null-terminated. However, if this is a scalar (string, number, * boolean, or null), the character after the end of the string_view is guaranteed to be * a non-space token. * * Tokens include: * - { * - [ * - "a string (possibly with UTF-8 or backslashed characters like \\\")". * - -1.2e-100 * - true * - false * - null * * See also value::raw_json(). */ simdjson_inline std::string_view raw_json_token() noexcept; /** * Get a string_view pointing at this value in the JSON document. * If this element is an array or an object, it consumes the array or the object * and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. * If this element is a scalar (string, number, Boolean, null), it returns what * raw_json_token() would return. */ simdjson_inline simdjson_result raw_json() noexcept; /** * Returns the current location in the document if in bounds. */ simdjson_inline simdjson_result current_location() noexcept; /** * Returns the current depth in the document if in bounds. * * E.g., * 0 = finished with document * 1 = document root value (could be [ or {, not yet known) * 2 = , or } inside root array/object * 3 = key or value inside root array/object. */ simdjson_inline int32_t current_depth() const noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard. * * ondemand::parser parser; * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/foo/a/1") == 20 * * It is allowed for a key to be the empty string: * * ondemand::parser parser; * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * * Note that at_pointer() called on the document automatically calls the document's rewind * method between each call. It invalidates all previously accessed arrays, objects and values * that have not been consumed. * * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not * standardized (by RFC 6901). We provide some experimental support for JSON pointers * on non-document instances. Yet it is not the case when calling at_pointer on an array * or an object instance: there is no rewind and no invalidation. * * You may only call at_pointer on an array after it has been created, but before it has * been first accessed. When calling at_pointer on an array, the pointer is advanced to * the location indicated by the JSON pointer (in case of success). It is no longer possible * to call at_pointer on the same array. * * You may call at_pointer more than once on an object, but each time the pointer is advanced * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding * key (as well as the current key) can no longer be used with following JSON pointer calls. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; protected: /** * Create a value. */ simdjson_inline value(const value_iterator &iter) noexcept; /** * Skip this value, allowing iteration to continue. */ simdjson_inline void skip() noexcept; /** * Start a value at the current position. * * (It should already be started; this is just a self-documentation method.) */ static simdjson_inline value start(const value_iterator &iter) noexcept; /** * Resume a value. */ static simdjson_inline value resume(const value_iterator &iter) noexcept; /** * Get the object, starting or resuming it as necessary */ simdjson_inline simdjson_result start_or_resume_object() noexcept; // simdjson_inline void log_value(const char *type) const noexcept; // simdjson_inline void log_error(const char *message) const noexcept; value_iterator iter{}; friend class document; friend class array_iterator; friend class field; friend class object; friend struct simdjson_result; friend struct simdjson_result; }; } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public fallback::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(fallback::ondemand::value &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result get_array() noexcept; simdjson_inline simdjson_result get_object() noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() noexcept; template simdjson_inline error_code get(T &out) noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator fallback::ondemand::array() noexcept(false); simdjson_inline operator fallback::ondemand::object() noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator fallback::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field(const char *key) noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](const char *key) noexcept; /** * Get the type of this JSON value. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). */ simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result raw_json() noexcept; /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ simdjson_inline simdjson_result current_location() noexcept; /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H /* end file simdjson/generic/ondemand/value.h for fallback */ /* including simdjson/generic/ondemand/logger.h for fallback: #include "simdjson/generic/ondemand/logger.h" */ /* begin file simdjson/generic/ondemand/logger.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { // Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical // that the call to the log functions be side-effect free. Thus, for example, you should not // create temporary std::string instances. namespace logger { enum class log_level : int32_t { info = 0, error = 1 }; #if SIMDJSON_VERBOSE_LOGGING static constexpr const bool LOG_ENABLED = true; #else static constexpr const bool LOG_ENABLED = false; #endif // We do not want these functions to be 'really inlined' since real inlining is // for performance purposes and if you are using the loggers, you do not care about // performance (or should not). static inline void log_headers() noexcept; // If args are provided, title will be treated as format string template static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; template static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; } // namespace logger } // namespace ondemand } // namespace fallback } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H /* end file simdjson/generic/ondemand/logger.h for fallback */ /* including simdjson/generic/ondemand/token_iterator.h for fallback: #include "simdjson/generic/ondemand/token_iterator.h" */ /* begin file simdjson/generic/ondemand/token_iterator.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { /** * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) * detected by stage 1. * * @private This is not intended for external use. */ class token_iterator { public: /** * Create a new invalid token_iterator. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline token_iterator() noexcept = default; simdjson_inline token_iterator(token_iterator &&other) noexcept = default; simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; simdjson_inline token_iterator(const token_iterator &other) noexcept = default; simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; /** * Advance to the next token (returning the current one). */ simdjson_inline const uint8_t *return_current_and_advance() noexcept; /** * Reports the current offset in bytes from the start of the underlying buffer. */ simdjson_inline uint32_t current_offset() const noexcept; /** * Get the JSON text for a given token (relative). * * This is not null-terminated; it is a view into the JSON. * * @param delta The relative position of the token to retrieve. e.g. 0 = current token, * 1 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** * Get the maximum length of the JSON text for a given token. * * The length will include any whitespace at the end of the token. * * @param delta The relative position of the token to retrieve. e.g. 0 = current token, * 1 = next token, -1 = prev token. */ simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; /** * Get the JSON text for a given token. * * This is not null-terminated; it is a view into the JSON. * * @param position The position of the token. * */ simdjson_inline const uint8_t *peek(token_position position) const noexcept; /** * Get the maximum length of the JSON text for a given token. * * The length will include any whitespace at the end of the token. * * @param position The position of the token. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; /** * Return the current index. */ simdjson_inline token_position position() const noexcept; /** * Reset to a previously saved index. */ simdjson_inline void set_position(token_position target_position) noexcept; // NOTE: we don't support a full C++ iterator interface, because we expect people to make // different calls to advance the iterator based on *their own* state. simdjson_inline bool operator==(const token_iterator &other) const noexcept; simdjson_inline bool operator!=(const token_iterator &other) const noexcept; simdjson_inline bool operator>(const token_iterator &other) const noexcept; simdjson_inline bool operator>=(const token_iterator &other) const noexcept; simdjson_inline bool operator<(const token_iterator &other) const noexcept; simdjson_inline bool operator<=(const token_iterator &other) const noexcept; protected: simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; /** * Get the index of the JSON text for a given token (relative). * * This is not null-terminated; it is a view into the JSON. * * @param delta The relative position of the token to retrieve. e.g. 0 = current token, * 1 = next token, -1 = prev token. */ simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; /** * Get the index of the JSON text for a given token. * * This is not null-terminated; it is a view into the JSON. * * @param position The position of the token. * */ simdjson_inline uint32_t peek_index(token_position position) const noexcept; const uint8_t *buf{}; token_position _position{}; friend class json_iterator; friend class value_iterator; friend class object; template friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; template friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; }; } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public fallback::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(fallback::ondemand::token_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H /* end file simdjson/generic/ondemand/token_iterator.h for fallback */ /* including simdjson/generic/ondemand/json_iterator.h for fallback: #include "simdjson/generic/ondemand/json_iterator.h" */ /* begin file simdjson/generic/ondemand/json_iterator.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { /** * Iterates through JSON tokens, keeping track of depth and string buffer. * * @private This is not intended for external use. */ class json_iterator { protected: token_iterator token{}; ondemand::parser *parser{}; /** * Next free location in the string buffer. * * Used by raw_json_string::unescape() to have a place to unescape strings to. */ uint8_t *_string_buf_loc{}; /** * JSON error, if there is one. * * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. * * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If * this is not elided, we should make sure it's at least not using up a register. Failing that, * we should store it in document so there's only one of them. */ error_code error{SUCCESS}; /** * Depth of the current token in the JSON. * * - 0 = finished with document * - 1 = document root value (could be [ or {, not yet known) * - 2 = , or } inside root array/object * - 3 = key or value inside root array/object. */ depth_t _depth{}; /** * Beginning of the document indexes. * Normally we have root == parser->implementation->structural_indexes.get() * but this may differ, especially in streaming mode (where we have several * documents); */ token_position _root{}; /** * Normally, a json_iterator operates over a single document, but in * some cases, we may have a stream of documents. This attribute is meant * as meta-data: the json_iterator works the same irrespective of the * value of this attribute. */ bool _streaming{false}; public: simdjson_inline json_iterator() noexcept = default; simdjson_inline json_iterator(json_iterator &&other) noexcept; simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; /** * Skips a JSON value, whether it is a scalar, array or object. */ simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; /** * Tell whether the iterator is still at the start */ simdjson_inline bool at_root() const noexcept; /** * Tell whether we should be expected to run in streaming * mode (iterating over many documents). It is pure metadata * that does not affect how the iterator works. It is used by * start_root_array() and start_root_object(). */ simdjson_inline bool streaming() const noexcept; /** * Get the root value iterator */ simdjson_inline token_position root_position() const noexcept; /** * Assert that we are at the document depth (== 1) */ simdjson_inline void assert_at_document_depth() const noexcept; /** * Assert that we are at the root of the document */ simdjson_inline void assert_at_root() const noexcept; /** * Tell whether the iterator is at the EOF mark */ simdjson_inline bool at_end() const noexcept; /** * Tell whether the iterator is live (has not been moved). */ simdjson_inline bool is_alive() const noexcept; /** * Abandon this iterator, setting depth to 0 (as if the document is finished). */ simdjson_inline void abandon() noexcept; /** * Advance the current token without modifying depth. */ simdjson_inline const uint8_t *return_current_and_advance() noexcept; /** * Returns true if there is a single token in the index (i.e., it is * a JSON with a scalar value such as a single number). * * @return whether there is a single token */ simdjson_inline bool is_single_token() const noexcept; /** * Assert that there are at least the given number of tokens left. * * Has no effect in release builds. */ simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; /** * Assert that the given position addresses an actual token (is within bounds). * * Has no effect in release builds. */ simdjson_inline void assert_valid_position(token_position position) const noexcept; /** * Get the JSON text for a given token (relative). * * This is not null-terminated; it is a view into the JSON. * * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** * Get the maximum length of the JSON text for the current token (or relative). * * The length will include any whitespace at the end of the token. * * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. */ simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; /** * Get a pointer to the current location in the input buffer. * * This is not null-terminated; it is a view into the JSON. * * You may be pointing outside of the input buffer: it is not generally * safe to dereference this pointer. */ simdjson_inline const uint8_t *unsafe_pointer() const noexcept; /** * Get the JSON text for a given token. * * This is not null-terminated; it is a view into the JSON. * * @param position The position of the token to retrieve. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek(token_position position) const noexcept; /** * Get the maximum length of the JSON text for the current token (or relative). * * The length will include any whitespace at the end of the token. * * @param position The position of the token to retrieve. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; /** * Get the JSON text for the last token in the document. * * This is not null-terminated; it is a view into the JSON. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek_last() const noexcept; /** * Ascend one level. * * Validates that the depth - 1 == parent_depth. * * @param parent_depth the expected parent depth. */ simdjson_inline void ascend_to(depth_t parent_depth) noexcept; /** * Descend one level. * * Validates that the new depth == child_depth. * * @param child_depth the expected child depth. */ simdjson_inline void descend_to(depth_t child_depth) noexcept; simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; /** * Get current depth. */ simdjson_inline depth_t depth() const noexcept; /** * Get current (writeable) location in the string buffer. */ simdjson_inline uint8_t *&string_buf_loc() noexcept; /** * Report an unrecoverable error, preventing further iteration. * * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ simdjson_inline error_code report_error(error_code error, const char *message) noexcept; /** * Log error, but don't stop iteration. * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; /** * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with * N bytes of capacity. It will return false if N is too small (smaller than max_len) of if it is zero. * The buffer (tmpbuf) is padded with space characters. */ simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept; simdjson_inline token_position position() const noexcept; /** * Write the raw_json_string to the string buffer and return a string_view. * Each raw_json_string should be unescaped once, or else the string buffer might * overflow. */ simdjson_inline simdjson_result unescape(raw_json_string in, bool allow_replacement) noexcept; simdjson_inline simdjson_result unescape_wobbly(raw_json_string in) noexcept; simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; simdjson_inline error_code consume_character(char c) noexcept; #if SIMDJSON_DEVELOPMENT_CHECKS simdjson_inline token_position start_position(depth_t depth) const noexcept; simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; #endif /* Useful for debugging and logging purposes. */ inline std::string to_string() const noexcept; /** * Returns the current location in the document if in bounds. */ inline simdjson_result current_location() const noexcept; /** * Updates this json iterator so that it is back at the beginning of the document, * as if it had just been created. */ inline void rewind() noexcept; /** * This checks whether the {,},[,] are balanced so that the document * ends with proper zero depth. This requires scanning the whole document * and it may be expensive. It is expected that it will be rarely called. * It does not attempt to match { with } and [ with ]. */ inline bool balanced() const noexcept; protected: simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; /// The last token before the end simdjson_inline token_position last_position() const noexcept; /// The token *at* the end. This points at gibberish and should only be used for comparison. simdjson_inline token_position end_position() const noexcept; /// The end of the buffer. simdjson_inline token_position end() const noexcept; friend class document; friend class document_stream; friend class object; friend class array; friend class value; friend class raw_json_string; friend class parser; friend class value_iterator; template friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; template friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; }; // json_iterator } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public fallback::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(fallback::ondemand::json_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H /* end file simdjson/generic/ondemand/json_iterator.h for fallback */ /* including simdjson/generic/ondemand/json_type.h for fallback: #include "simdjson/generic/ondemand/json_type.h" */ /* begin file simdjson/generic/ondemand/json_type.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { /** * The type of a JSON value. */ enum class json_type { // Start at 1 to catch uninitialized / default values more easily array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) string, ///< A JSON string ( "a" or "hello world\n" ...) boolean, ///< A JSON boolean (true or false) null ///< A JSON null (null) }; /** * A type representing a JSON number. * The design of the struct is deliberately straight-forward. All * functions return standard values with no error check. */ struct number { /** * return the automatically determined type of * the number: number_type::floating_point_number, * number_type::signed_integer or number_type::unsigned_integer. * * enum class number_type { * floating_point_number=1, /// a binary64 number * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement * unsigned_integer /// a positive integer larger or equal to 1<<63 * }; */ simdjson_inline ondemand::number_type get_number_type() const noexcept; /** * return true if the automatically determined type of * the number is number_type::unsigned_integer. */ simdjson_inline bool is_uint64() const noexcept; /** * return the value as a uint64_t, only valid if is_uint64() is true. */ simdjson_inline uint64_t get_uint64() const noexcept; simdjson_inline operator uint64_t() const noexcept; /** * return true if the automatically determined type of * the number is number_type::signed_integer. */ simdjson_inline bool is_int64() const noexcept; /** * return the value as a int64_t, only valid if is_int64() is true. */ simdjson_inline int64_t get_int64() const noexcept; simdjson_inline operator int64_t() const noexcept; /** * return true if the automatically determined type of * the number is number_type::floating_point_number. */ simdjson_inline bool is_double() const noexcept; /** * return the value as a double, only valid if is_double() is true. */ simdjson_inline double get_double() const noexcept; simdjson_inline operator double() const noexcept; /** * Convert the number to a double. Though it always succeed, the conversion * may be lossy if the number cannot be represented exactly. */ simdjson_inline double as_double() const noexcept; protected: /** * The next block of declaration is designed so that we can call the number parsing * functions on a number type. They are protected and should never be used outside * of the core simdjson library. */ friend class value_iterator; template friend error_code numberparsing::slow_float_parsing(simdjson_unused const uint8_t * src, W writer); template friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); template friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); /** Store a signed 64-bit value to the number. */ simdjson_inline void append_s64(int64_t value) noexcept; /** Store an unsigned 64-bit value to the number. */ simdjson_inline void append_u64(uint64_t value) noexcept; /** Store a double value to the number. */ simdjson_inline void append_double(double value) noexcept; /** Specifies that the value is a double, but leave it undefined. */ simdjson_inline void skip_double() noexcept; /** * End of friend declarations. */ /** * Our attributes are a union type (size = 64 bits) * followed by a type indicator. */ union { double floating_point_number; int64_t signed_integer; uint64_t unsigned_integer; } payload{0}; number_type type{number_type::signed_integer}; }; /** * Write the JSON type to the output stream * * @param out The output stream. * @param type The json_type. */ inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; #if SIMDJSON_EXCEPTIONS /** * Send JSON type to an output stream. * * @param out The output stream. * @param type The json_type. * @throw simdjson_error if the result being printed has an error. If there is an error with the * underlying output stream, that error will be propagated (simdjson_error will not be * thrown). */ inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); #endif } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public fallback::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(fallback::ondemand::json_type &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H /* end file simdjson/generic/ondemand/json_type.h for fallback */ /* including simdjson/generic/ondemand/raw_json_string.h for fallback: #include "simdjson/generic/ondemand/raw_json_string.h" */ /* begin file simdjson/generic/ondemand/raw_json_string.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { /** * A string escaped per JSON rules, terminated with quote ("). They are used to represent * unescaped keys inside JSON documents. * * (In other words, a pointer to the beginning of a string, just after the start quote, inside a * JSON file.) * * This class is deliberately simplistic and has little functionality. You can * compare a raw_json_string instance with an unescaped C string, but * that is nearly all you can do. * * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser * instance. Doing so requires you to have a sufficiently large buffer. * * The raw_json_string instances originate typically from field instance which in turn represent * key-value pairs from object instances. From a field instance, you get the raw_json_string * instance by calling key(). You can, if you want a more usable string_view instance, call * the unescaped_key() method on the field instance. You may also create a raw_json_string from * any other string value, with the value.get_raw_json_string() method. Again, you can get * a more usable string_view instance by calling get_string(). * */ class raw_json_string { public: /** * Create a new invalid raw_json_string. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline raw_json_string() noexcept = default; /** * Create a new invalid raw_json_string pointed at the given location in the JSON. * * The given location must be just *after* the beginning quote (") in the JSON file. * * It *must* be terminated by a ", and be a valid JSON string. */ simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; /** * Get the raw pointer to the beginning of the string in the JSON (just after the "). * * It is possible for this function to return a null pointer if the instance * has outlived its existence. */ simdjson_inline const char * raw() const noexcept; /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done) on target.size() characters, * and if the raw_json_string instance has a quote character at byte index target.size(). * We never read more than length + 1 bytes in the raw_json_string instance. * If length is smaller than target.size(), this will return false. * * The std::string_view instance may contain any characters. However, the caller * is responsible for setting length so that length bytes may be read in the * raw_json_string. * * Performance: the comparison may be done using memcmp which may be efficient * for long strings. */ simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done). * The std::string_view instance should not contain unescaped quote characters: * the caller is responsible for this check. See is_free_from_unescaped_quote. * * Performance: the comparison is done byte-by-byte which might be inefficient for * long strings. * * If target is a compile-time constant, and your compiler likes you, * you should be able to do the following without performance penalty... * * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); * s.unsafe_is_equal(target); */ simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; /** * This compares the current instance to the C string target: returns true if * they are byte-by-byte equal (no escaping is done). * The provided C string should not contain an unescaped quote character: * the caller is responsible for this check. See is_free_from_unescaped_quote. * * If target is a compile-time constant, and your compiler likes you, * you should be able to do the following without performance penalty... * * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); * s.unsafe_is_equal(target); */ simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done). */ simdjson_inline bool is_equal(std::string_view target) const noexcept; /** * This compares the current instance to the C string target: returns true if * they are byte-by-byte equal (no escaping is done). */ simdjson_inline bool is_equal(const char* target) const noexcept; /** * Returns true if target is free from unescaped quote. If target is known at * compile-time, we might expect the computation to happen at compile time with * many compilers (not all!). */ static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; private: /** * This will set the inner pointer to zero, effectively making * this instance unusable. */ simdjson_inline void consume() noexcept { buf = nullptr; } /** * Checks whether the inner pointer is non-null and thus usable. */ simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. * The result will be a valid UTF-8. * * ## IMPORTANT: string_view lifetime * * The string_view is only valid until the next parse() call on the parser. * * @param iter A json_iterator, which contains a buffer where the string will be written. * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. */ simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter, bool allow_replacement) const noexcept; /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ * * ## IMPORTANT: string_view lifetime * * The string_view is only valid until the next parse() call on the parser. * * @param iter A json_iterator, which contains a buffer where the string will be written. */ simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(json_iterator &iter) const noexcept; const uint8_t * buf{}; friend class object; friend class field; friend class parser; friend struct simdjson_result; }; simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; /** * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. */ simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public fallback::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(fallback::ondemand::raw_json_string &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape(fallback::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(fallback::ondemand::json_iterator &iter) const noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H /* end file simdjson/generic/ondemand/raw_json_string.h for fallback */ /* including simdjson/generic/ondemand/parser.h for fallback: #include "simdjson/generic/ondemand/parser.h" */ /* begin file simdjson/generic/ondemand/parser.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include namespace simdjson { namespace fallback { namespace ondemand { /** * The default batch size for document_stream instances for this On Demand kernel. * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; /** * Some adversary might try to set the batch size to 0 or 1, which might cause problems. * We set a minimum of 32B since anything else is highly likely to be an error. In practice, * most users will want a much larger batch size. * * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. */ static constexpr size_t MINIMAL_BATCH_SIZE = 32; /** * A JSON fragment iterator. * * This holds the actual iterator as well as the buffer for writing strings. */ class parser { public: /** * Create a JSON parser. * * The new parser will have zero capacity. */ inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; inline parser(parser &&other) noexcept = default; simdjson_inline parser(const parser &other) = delete; simdjson_inline parser &operator=(const parser &other) = delete; simdjson_inline parser &operator=(parser &&other) noexcept = default; /** Deallocate the JSON parser. */ inline ~parser() noexcept = default; /** * Start iterating an on-demand JSON document. * * ondemand::parser parser; * document doc = parser.iterate(json); * * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. * Otherwise the iterate method may return an error. In particular, the whole input should be * valid: we do not attempt to tolerate incorrect content either before or after a JSON * document. If there is a UTF-8 BOM, the parser skips it. * * ### IMPORTANT: Validate what you use * * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to * iterate does not parse and validate the whole document. * * ### IMPORTANT: Buffer Lifetime * * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as * long as the document iteration. * * ### IMPORTANT: Document Lifetime * * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before * you call parse() again or destroy the parser. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * * @param json The JSON to parse. * @param len The length of the JSON. * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). * * @return The document, or an error: * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory * allocation fails. * - EMPTY if the document is all whitespace. * - UTF8_ERROR if the document is not valid UTF-8. * - UNESCAPED_CHARS if a string contains control characters that must be escaped * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; /** * @private * * Start iterating an on-demand JSON document. * * ondemand::parser parser; * json_iterator doc = parser.iterate(json); * * ### IMPORTANT: Buffer Lifetime * * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as * long as the document iteration. * * ### IMPORTANT: Document Lifetime * * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before * you call parse() again or destroy the parser. * * The ondemand::document instance holds the iterator. The document must remain in scope * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * * @param json The JSON to parse. * * @return The iterator, or an error: * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory * allocation fails. * - EMPTY if the document is all whitespace. * - UTF8_ERROR if the document is not valid UTF-8. * - UNESCAPED_CHARS if a string contains control characters that must be escaped * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; /** * Parse a buffer containing many JSON documents. * * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; * ondemand::parser parser; * ondemand::document_stream docs = parser.iterate_many(json); * for (auto & doc : docs) { * std::cout << doc["foo"] << std::endl; * } * // Prints 1 2 3 * * No copy of the input buffer is made. * * The function is lazy: it may be that no more than one JSON document at a time is parsed. * * The caller is responsabile to ensure that the input string data remains unchanged and is * not deleted during the loop. * * ### Format * * The buffer must contain a series of one or more JSON documents, concatenated into a single * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, * then starts parsing the next document at that point. (It does this with more parallelism and * lookahead than you might think, though.) * * documents that consist of an object or array may omit the whitespace between them, concatenating * with no separator. Documents that consist of a single primitive (i.e. documents that are not * arrays or objects) MUST be separated with ASCII whitespace. * * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). * If there is a UTF-8 BOM, the parser skips it. * * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * * ### Threads * * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the * hood to do some lookahead. * * ### Parser Capacity * * If the parser's current capacity is less than batch_size, it will allocate enough capacity * to handle it (up to max_capacity). * * @param buf The concatenated JSON to parse. * @param len The length of the concatenated JSON. * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet * spot is cache-related: small enough to fit in cache, yet big enough to * parse as many documents as possible in one tight loop. * Defaults to 10MB, which has been a reasonable sweet spot in our tests. * @param allow_comma_separated (defaults on false) This allows a mode where the documents are * separated by commas instead of whitespace. It comes with a performance * penalty because the entire document is indexed at once (and the document must be * less than 4 GB), and there is no multithreading. In this mode, the batch_size parameter * is effectively ignored, as it is set to at least the document size. * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: * - MEMALLOC if the parser does not have enough capacity and memory allocation fails * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. * - other json errors if parsing fails. You should not rely on these errors to always the same for the * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). */ inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** The capacity of this parser (the largest document it can process). */ simdjson_inline size_t capacity() const noexcept; /** The maximum capacity of this parser (the largest document it is allowed to process). */ simdjson_inline size_t max_capacity() const noexcept; simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; /** * The maximum depth of this parser (the most deeply nested objects and arrays it can process). * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. */ simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length * and `max_depth` depth. * * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. * * @param capacity The new capacity. * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. * @return The error, if there is one. */ simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; #ifdef SIMDJSON_THREADS_ENABLED /** * The parser instance can use threads when they are available to speed up some * operations. It is enabled by default. Changing this attribute will change the * behavior of the parser for future operations. */ bool threaded{true}; #endif /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. * The result must be valid UTF-8. * The provided pointer is advanced to the end of the string by reference, and a string_view instance * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. * * This unescape function is a low-level function. If you want a more user-friendly approach, you should * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() * instead of get_raw_json_string()). * * ## IMPORTANT: string_view lifetime * * The string_view is only valid as long as the bytes in dst. * * @param raw_json_string input * @param dst A pointer to a buffer at least large enough to write this string as well as * an additional SIMDJSON_PADDING bytes. * @param allow_replacement Whether we allow a replacement if the input string contains unmatched surrogate pairs. * @return A string_view pointing at the unescaped string in dst * @error STRING_ERROR if escapes are incorrect. */ simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement = false) const noexcept; /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. * The result may not be valid UTF-8. See https://simonsapin.github.io/wtf-8/ * The provided pointer is advanced to the end of the string by reference, and a string_view instance * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. * * This unescape function is a low-level function. If you want a more user-friendly approach, you should * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() * instead of get_raw_json_string()). * * ## IMPORTANT: string_view lifetime * * The string_view is only valid as long as the bytes in dst. * * @param raw_json_string input * @param dst A pointer to a buffer at least large enough to write this string as well as * an additional SIMDJSON_PADDING bytes. * @return A string_view pointing at the unescaped string in dst * @error STRING_ERROR if escapes are incorrect. */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; std::unique_ptr string_buf{}; #if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif friend class json_iterator; friend class document_stream; }; } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public fallback::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(fallback::ondemand::parser &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* end file simdjson/generic/ondemand/parser.h for fallback */ // All other declarations /* including simdjson/generic/ondemand/array.h for fallback: #include "simdjson/generic/ondemand/array.h" */ /* begin file simdjson/generic/ondemand/array.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { /** * A forward-only JSON array. */ class array { public: /** * Create a new invalid array. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline array() noexcept = default; /** * Begin array iteration. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result begin() noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result end() noexcept; /** * This method scans the array and counts the number of elements. * The count_elements method should always be called before you have begun * iterating through the array: it is expected that you are pointing at * the beginning of the array. * The runtime complexity is linear in the size of the array. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an array is empty, it is more performant to use * the is_empty() method. */ simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the beginning of the array and checks whether the * array is empty. * The runtime complexity is constant time. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. */ simdjson_inline simdjson_result is_empty() & noexcept; /** * Reset the iterator so that we are pointing back at the * beginning of the array. You should still consume values only once even if you * can iterate through the array more than once. If you unescape a string * within the array more than once, you have unsafe code. Note that rewinding * an array means that you may need to reparse it anew: it is not a free * operation. * * @returns true if the array contains some elements (not empty) */ inline simdjson_result reset() & noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node * as the root of its own JSON document. * * ondemand::parser parser; * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/0/foo/a/1") == 20 * * Note that at_pointer() called on the document automatically calls the document's rewind * method between each call. It invalidates all previously accessed arrays, objects and values * that have not been consumed. Yet it is not the case when calling at_pointer on an array * instance: there is no rewind and no invalidation. * * You may only call at_pointer on an array after it has been created, but before it has * been first accessed. When calling at_pointer on an array, the pointer is advanced to * the location indicated by the JSON pointer (in case of success). It is no longer possible * to call at_pointer on the same array. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; /** * Consumes the array and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. */ simdjson_inline simdjson_result raw_json() noexcept; /** * Get the value at the given index. This function has linear-time complexity. * This function should only be called once on an array instance since the array iterator is not reset between each call. * * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; protected: /** * Go to the end of the array, no matter where you are right now. */ simdjson_inline error_code consume() noexcept; /** * Begin array iteration. * * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the * resulting array. * @error INCORRECT_TYPE if the iterator is not at [. */ static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; /** * Begin array iteration from the root. * * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the * resulting array. * @error INCORRECT_TYPE if the iterator is not at [. * @error TAPE_ERROR if there is no closing ] at the end of the document. */ static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; /** * Begin array iteration. * * This version of the method should be called after the initial [ has been verified, and is * intended for use by switch statements that check the type of a value. * * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. */ static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; /** * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. * * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* * into the resulting array. */ simdjson_inline array(const value_iterator &iter) noexcept; /** * Iterator marking current position. * * iter.is_alive() == false indicates iteration is complete. */ value_iterator iter{}; friend class value; friend class document; friend struct simdjson_result; friend struct simdjson_result; friend class array_iterator; }; } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public fallback::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(fallback::ondemand::array &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; inline simdjson_result count_elements() & noexcept; inline simdjson_result is_empty() & noexcept; inline simdjson_result reset() & noexcept; simdjson_inline simdjson_result at(size_t index) noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result raw_json() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H /* end file simdjson/generic/ondemand/array.h for fallback */ /* including simdjson/generic/ondemand/array_iterator.h for fallback: #include "simdjson/generic/ondemand/array_iterator.h" */ /* begin file simdjson/generic/ondemand/array_iterator.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { /** * A forward-only JSON array. * * This is an input_iterator, meaning: * - It is forward-only * - * must be called exactly once per element. * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) */ class array_iterator { public: /** Create a new, invalid array iterator. */ simdjson_inline array_iterator() noexcept = default; // // Iterator interface // /** * Get the current element. * * Part of the std::iterator interface. */ simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * * Part of the std::iterator interface. * * @return true if there are no more elements in the JSON array. */ simdjson_inline bool operator==(const array_iterator &) const noexcept; /** * Check if there are more elements in the JSON array. * * Part of the std::iterator interface. * * @return true if there are more elements in the JSON array. */ simdjson_inline bool operator!=(const array_iterator &) const noexcept; /** * Move to the next element. * * Part of the std::iterator interface. */ simdjson_inline array_iterator &operator++() noexcept; private: value_iterator iter{}; simdjson_inline array_iterator(const value_iterator &iter) noexcept; friend class array; friend class value; friend struct simdjson_result; }; } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public fallback::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(fallback::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; // // Iterator interface // simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. simdjson_inline bool operator==(const simdjson_result &) const noexcept; simdjson_inline bool operator!=(const simdjson_result &) const noexcept; simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H /* end file simdjson/generic/ondemand/array_iterator.h for fallback */ /* including simdjson/generic/ondemand/document.h for fallback: #include "simdjson/generic/ondemand/document.h" */ /* begin file simdjson/generic/ondemand/document.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { /** * A JSON document. It holds a json_iterator instance. * * Used by tokens to get text, and string buffer location. * * You must keep the document around during iteration. */ class document { public: /** * Create a new invalid document. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline document() noexcept = default; simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy simdjson_inline document(document &&other) noexcept = default; simdjson_inline document &operator=(const document &other) noexcept = delete; simdjson_inline document &operator=(document &&other) noexcept = default; /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @returns INCORRECT_TYPE If the JSON value is not an array. */ simdjson_inline simdjson_result get_array() & noexcept; /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @returns INCORRECT_TYPE If the JSON value is not an object. */ simdjson_inline simdjson_result get_object() & noexcept; /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64() noexcept; /** * Cast this JSON value (inside string) to an unsigned integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64_in_string() noexcept; /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64() noexcept; /** * Cast this JSON value (inside string) to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64_in_string() noexcept; /** * Cast this JSON value to a double. * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double() noexcept; /** * Cast this JSON value (inside string) to a double. * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double_in_string() noexcept; /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * Important: Calling get_string() twice on the same document is an error. * * @param Whether to allow a replacement character for unmatched surrogate pairs. * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; /** * Attempts to fill the provided std::string reference with the parsed value of the current string. * * The string is guaranteed to be valid UTF-8. * * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. * We recommend you avoid allocating an std::string unless you need to. * * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a string. * * The string is not guaranteed to be valid UTF-8. See https://simonsapin.github.io/wtf-8/ * * Important: Calling get_wobbly_string() twice on the same document is an error. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_wobbly_string() noexcept; /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_raw_json_string() noexcept; /** * Cast this JSON value to a bool. * * @returns A bool value. * @returns INCORRECT_TYPE if the JSON value is not true or false. */ simdjson_inline simdjson_result get_bool() noexcept; /** * Cast this JSON value to a value when the document is an object or an array. * * You must not have begun iterating through the object or array. When * SIMDJSON_DEVELOPMENT_CHECKS is set to 1 (which is the case when building in Debug mode * by default), and you have already begun iterating, * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use * rewind() to reset the document to its initial state before calling this method. * * @returns A value if a JSON array or object cannot be found. * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). */ simdjson_inline simdjson_result get_value() noexcept; /** * Checks if this JSON value is null. If and only if the value is * null, then it is consumed (we advance). If we find a token that * begins with 'n' but is not 'null', then an error is returned. * * @returns Whether the value is null. * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. */ simdjson_inline simdjson_result is_null() noexcept; /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() & noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); } /** @overload template simdjson_result get() & noexcept */ template simdjson_inline simdjson_result get() && noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); } /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value * * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template simdjson_inline error_code get(T &out) & noexcept; /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. */ simdjson_inline operator array() & noexcept(false); /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. */ simdjson_inline operator object() & noexcept(false); /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline operator uint64_t() noexcept(false); /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. */ simdjson_inline operator int64_t() noexcept(false); /** * Cast this JSON value to a double. * * @returns A double. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. */ simdjson_inline operator double() noexcept(false); /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator std::string_view() noexcept(false); /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator raw_json_string() noexcept(false); /** * Cast this JSON value to a bool. * * @returns A bool value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. */ simdjson_inline operator bool() noexcept(false); /** * Cast this JSON value to a value when the document is an object or an array. * * You must not have begun iterating through the object or array. When * SIMDJSON_DEVELOPMENT_CHECKS is defined, and you have already begun iterating, * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use * rewind() to reset the document to its initial state before calling this method. * * @returns A value value if a JSON array or object cannot be found. * @exception SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). */ simdjson_inline operator value() noexcept(false); #endif /** * This method scans the array and counts the number of elements. * The count_elements method should always be called before you have begun * iterating through the array: it is expected that you are pointing at * the beginning of the array. * The runtime complexity is linear in the size of the array. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. */ simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the object and counts the number of key-value pairs. * The count_fields method should always be called before you have begun * iterating through the object: it is expected that you are pointing at * the beginning of the object. * The runtime complexity is linear in the size of the object. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an object is empty, it is more performant to use * the is_empty() method. */ simdjson_inline simdjson_result count_fields() & noexcept; /** * Get the value at the given index in the array. This function has linear-time complexity. * This function should only be called once on an array instance since the array iterator is not reset between each call. * * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) & noexcept; /** * Begin array iteration. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result begin() & noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result end() & noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. E.g., the array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() * is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field(const char *key) & noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. E.g., the array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to a key * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() * is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](const char *key) & noexcept; /** * Get the type of this JSON value. It does not validate or consume the value. * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result type() noexcept; /** * Checks whether the document is a scalar (string, number, null, Boolean). * Returns false when there it is an array or object. * * @returns true if the type is string, number, null, Boolean * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result is_scalar() noexcept; /** * Checks whether the document is a negative number. * * @returns true if the number if negative. */ simdjson_inline bool is_negative() noexcept; /** * Checks whether the document is an integer number. Note that * this requires to partially parse the number string. If * the value is determined to be an integer, it may still * not parse properly as an integer in subsequent steps * (e.g., it might overflow). * * @returns true if the number if negative. */ simdjson_inline simdjson_result is_integer() noexcept; /** * Determine the number type (integer or floating-point number) as quickly * as possible. This function does not fully validate the input. It is * useful when you only need to classify the numbers, without parsing them. * * If you are planning to retrieve the value or you need full validation, * consider using the get_number() method instead: it will fully parse * and validate the input, and give you access to the type: * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have * an integer greater or equal to 9223372036854775808 * get_number_type() is number_type::signed_integer if we have an * integer that is less than 9223372036854775808 * Otherwise, get_number_type() has value number_type::floating_point_number * * This function requires processing the number string, but it is expected * to be faster than get_number().get_number_type() because it is does not * parse the number value. * * @returns the type of the number */ simdjson_inline simdjson_result get_number_type() noexcept; /** * Attempt to parse an ondemand::number. An ondemand::number may * contain an integer value or a floating-point value, the simdjson * library will autodetect the type. Thus it is a dynamically typed * number. Before accessing the value, you must determine the detected * type. * * number.get_number_type() is number_type::signed_integer if we have * an integer in [-9223372036854775808,9223372036854775808) * You can recover the value by calling number.get_int64() and you * have that number.is_int64() is true. * * number.get_number_type() is number_type::unsigned_integer if we have * an integer in [9223372036854775808,18446744073709551616) * You can recover the value by calling number.get_uint64() and you * have that number.is_uint64() is true. * * Otherwise, number.get_number_type() has value number_type::floating_point_number * and we have a binary64 number. * You can recover the value by calling number.get_double() and you * have that number.is_double() is true. * * You must check the type before accessing the value: it is an error * to call "get_int64()" when number.get_number_type() is not * number_type::signed_integer and when number.is_int64() is false. */ simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; /** * Get the raw JSON for this token. * * The string_view will always point into the input buffer. * * The string_view will start at the beginning of the token, and include the entire token * *as well as all spaces until the next token (or EOF).* This means, for example, that a * string token always begins with a " and is always terminated by the final ", possibly * followed by a number of spaces. * * The string_view is *not* null-terminated. If this is a scalar (string, number, * boolean, or null), the character after the end of the string_view may be the padded buffer. * * Tokens include: * - { * - [ * - "a string (possibly with UTF-8 or backslashed characters like \\\")". * - -1.2e-100 * - true * - false * - null */ simdjson_inline simdjson_result raw_json_token() noexcept; /** * Reset the iterator inside the document instance so we are pointing back at the * beginning of the document, as if it had just been created. It invalidates all * values, objects and arrays that you have created so far (including unescaped strings). */ inline void rewind() noexcept; /** * Returns debugging information. */ inline std::string to_debug_string() noexcept; /** * Some unrecoverable error conditions may render the document instance unusable. * The is_alive() method returns true when the document is still suitable. */ inline bool is_alive() noexcept; /** * Returns the current location in the document if in bounds. */ inline simdjson_result current_location() const noexcept; /** * Returns true if this document has been fully parsed. * If you have consumed the whole document and at_end() returns * false, then there may be trailing content. */ inline bool at_end() const noexcept; /** * Returns the current depth in the document if in bounds. * * E.g., * 0 = finished with document * 1 = document root value (could be [ or {, not yet known) * 2 = , or } inside root array/object * 3 = key or value inside root array/object. */ simdjson_inline int32_t current_depth() const noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard. * * ondemand::parser parser; * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/foo/a/1") == 20 * * It is allowed for a key to be the empty string: * * ondemand::parser parser; * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * * Note that at_pointer() automatically calls rewind between each call. Thus * all values, objects and arrays that you have created so far (including unescaped strings) * are invalidated. After calling at_pointer, you need to consume the result: string values * should be stored in your own variables, arrays should be decoded and stored in your own array-like * structures and so forth. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; /** * Consumes the document and returns a string_view instance corresponding to the * document as represented in JSON. It points inside the original byte array containing * the JSON document. */ simdjson_inline simdjson_result raw_json() noexcept; protected: /** * Consumes the document. */ simdjson_inline error_code consume() noexcept; simdjson_inline document(ondemand::json_iterator &&iter) noexcept; simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; simdjson_inline value_iterator resume_value_iterator() noexcept; simdjson_inline value_iterator get_root_value_iterator() noexcept; simdjson_inline simdjson_result start_or_resume_object() noexcept; static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; // // Fields // json_iterator iter{}; ///< Current position in the document static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 friend class array_iterator; friend class value; friend class ondemand::parser; friend class object; friend class array; friend class field; friend class token; friend class document_stream; friend class document_reference; }; /** * A document_reference is a thin wrapper around a document reference instance. */ class document_reference { public: simdjson_inline document_reference() noexcept; simdjson_inline document_reference(document &d) noexcept; simdjson_inline document_reference(const document_reference &other) noexcept = default; simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; simdjson_inline void rewind() noexcept; simdjson_inline simdjson_result get_array() & noexcept; simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; simdjson_inline simdjson_result raw_json() noexcept; simdjson_inline operator document&() const noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator array() & noexcept(false); simdjson_inline operator object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); simdjson_inline operator value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) & noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; simdjson_inline bool is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; private: document *doc{nullptr}; }; } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public fallback::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(fallback::ondemand::document &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; simdjson_inline simdjson_result get_array() & noexcept; simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & noexcept; template simdjson_inline simdjson_result get() && noexcept; template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator fallback::ondemand::array() & noexcept(false); simdjson_inline operator fallback::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator fallback::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); simdjson_inline operator fallback::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) & noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; simdjson_inline bool at_end() const noexcept; simdjson_inline bool is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public fallback::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(fallback::ondemand::document_reference value, error_code error) noexcept; simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; simdjson_inline simdjson_result get_array() & noexcept; simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator fallback::ondemand::array() & noexcept(false); simdjson_inline operator fallback::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator fallback::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); simdjson_inline operator fallback::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) & noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H /* end file simdjson/generic/ondemand/document.h for fallback */ /* including simdjson/generic/ondemand/document_stream.h for fallback: #include "simdjson/generic/ondemand/document_stream.h" */ /* begin file simdjson/generic/ondemand/document_stream.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #ifdef SIMDJSON_THREADS_ENABLED #include #include #include #endif namespace simdjson { namespace fallback { namespace ondemand { #ifdef SIMDJSON_THREADS_ENABLED /** @private Custom worker class **/ struct stage1_worker { stage1_worker() noexcept = default; stage1_worker(const stage1_worker&) = delete; stage1_worker(stage1_worker&&) = delete; stage1_worker operator=(const stage1_worker&) = delete; ~stage1_worker(); /** * We only start the thread when it is needed, not at object construction, this may throw. * You should only call this once. **/ void start_thread(); /** * Start a stage 1 job. You should first call 'run', then 'finish'. * You must call start_thread once before. */ void run(document_stream * ds, parser * stage1, size_t next_batch_start); /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ void finish(); private: /** * Normally, we would never stop the thread. But we do in the destructor. * This function is only safe assuming that you are not waiting for results. You * should have called run, then finish, and be done. **/ void stop_thread(); std::thread thread{}; /** These three variables define the work done by the thread. **/ ondemand::parser * stage1_thread_parser{}; size_t _next_batch_start{}; document_stream * owner{}; /** * We have two state variables. This could be streamlined to one variable in the future but * we use two for clarity. */ bool has_work{false}; bool can_work{true}; /** * We lock using a mutex. */ std::mutex locking_mutex{}; std::condition_variable cond_var{}; friend class document_stream; }; #endif // SIMDJSON_THREADS_ENABLED /** * A forward-only stream of documents. * * Produced by parser::iterate_many. * */ class document_stream { public: /** * Construct an uninitialized document_stream. * * ```c++ * document_stream docs; * auto error = parser.iterate_many(json).get(docs); * ``` */ simdjson_inline document_stream() noexcept; /** Move one document_stream to another. */ simdjson_inline document_stream(document_stream &&other) noexcept = default; /** Move one document_stream to another. */ simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; simdjson_inline ~document_stream() noexcept; /** * Returns the input size in bytes. */ inline size_t size_in_bytes() const noexcept; /** * After iterating through the stream, this method * returns the number of bytes that were not parsed at the end * of the stream. If truncated_bytes() differs from zero, * then the input was truncated maybe because incomplete JSON * documents were found at the end of the stream. You * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). * * You should only call truncated_bytes() after streaming through all * documents, like so: * * document_stream stream = parser.iterate_many(json,window); * for(auto & doc : stream) { * // do something with doc * } * size_t truncated = stream.truncated_bytes(); * */ inline size_t truncated_bytes() const noexcept; class iterator { public: using value_type = simdjson_result; using reference = value_type; using difference_type = std::ptrdiff_t; using iterator_category = std::input_iterator_tag; /** * Default constructor. */ simdjson_inline iterator() noexcept; /** * Get the current document (or error). */ simdjson_inline simdjson_result operator*() noexcept; /** * Advance to the next document (prefix). */ inline iterator& operator++() noexcept; /** * Check if we're at the end yet. * @param other the end iterator to compare to. */ simdjson_inline bool operator!=(const iterator &other) const noexcept; /** * @private * * Gives the current index in the input document in bytes. * * document_stream stream = parser.parse_many(json,window); * for(auto i = stream.begin(); i != stream.end(); ++i) { * auto doc = *i; * size_t index = i.current_index(); * } * * This function (current_index()) is experimental and the usage * may change in future versions of simdjson: we find the API somewhat * awkward and we would like to offer something friendlier. */ simdjson_inline size_t current_index() const noexcept; /** * @private * * Gives a view of the current document at the current position. * * document_stream stream = parser.iterate_many(json,window); * for(auto i = stream.begin(); i != stream.end(); ++i) { * std::string_view v = i.source(); * } * * The returned string_view instance is simply a map to the (unparsed) * source string: it may thus include white-space characters and all manner * of padding. * * This function (source()) is experimental and the usage * may change in future versions of simdjson: we find the API somewhat * awkward and we would like to offer something friendlier. * */ simdjson_inline std::string_view source() const noexcept; /** * Returns error of the stream (if any). */ inline error_code error() const noexcept; private: simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ document_stream* stream; /** Whether we're finished or not. */ bool finished; friend class document; friend class document_stream; friend class json_iterator; }; /** * Start iterating the documents in the stream. */ simdjson_inline iterator begin() noexcept; /** * The end of the stream, for iterator comparison purposes. */ simdjson_inline iterator end() noexcept; private: document_stream &operator=(const document_stream &) = delete; // Disallow copying document_stream(const document_stream &other) = delete; // Disallow copying /** * Construct a document_stream. Does not allocate or parse anything until the iterator is * used. * * @param parser is a reference to the parser instance used to generate this document_stream * @param buf is the raw byte buffer we need to process * @param len is the length of the raw byte buffer in bytes * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) */ simdjson_inline document_stream( ondemand::parser &parser, const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated ) noexcept; /** * Parse the first document in the buffer. Used by begin(), to handle allocation and * initialization. */ inline void start() noexcept; /** * Parse the next document found in the buffer previously given to document_stream. * * The content should be a valid JSON document encoded as UTF-8. If there is a * UTF-8 BOM, the parser skips it. * * You do NOT need to pre-allocate a parser. This function takes care of * pre-allocating a capacity defined by the batch_size defined when creating the * document_stream object. * * The function returns simdjson::EMPTY if there is no more data to be parsed. * * The function returns simdjson::SUCCESS (as integer = 0) in case of success * and indicates that the buffer has successfully been parsed to the end. * Every document it contained has been parsed without error. * * The function returns an error code from simdjson/simdjson.h in case of failure * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; * the simdjson::error_message function converts these error codes into a string). * * You can also check validity by calling parser.is_valid(). The same parser can * and should be reused for the other documents in the buffer. */ inline void next() noexcept; /** Move the json_iterator of the document to the location of the next document in the stream. */ inline void next_document() noexcept; /** Get the next document index. */ inline size_t next_batch_start() const noexcept; /** Pass the next batch through stage 1 with the given parser. */ inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; // Fields ondemand::parser *parser; const uint8_t *buf; size_t len; size_t batch_size; bool allow_comma_separated; /** * We are going to use just one document instance. The document owns * the json_iterator. It implies that we only ever pass a reference * to the document to the users. */ document doc{}; /** The error (or lack thereof) from the current document. */ error_code error; size_t batch_start{0}; size_t doc_index{}; #ifdef SIMDJSON_THREADS_ENABLED /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ bool use_thread; inline void load_from_stage1_thread() noexcept; /** Start a thread to run stage 1 on the next batch. */ inline void start_stage1_thread() noexcept; /** Wait for the stage 1 thread to finish and capture the results. */ inline void finish_stage1_thread() noexcept; /** The error returned from the stage 1 thread. */ error_code stage1_thread_error{UNINITIALIZED}; /** The thread used to run stage 1 against the next batch in the background. */ std::unique_ptr worker{new(std::nothrow) stage1_worker()}; /** * The parser used to run stage 1 in the background. Will be swapped * with the regular parser when finished. */ ondemand::parser stage1_thread_parser{}; friend struct stage1_worker; #endif // SIMDJSON_THREADS_ENABLED friend class parser; friend class document; friend class json_iterator; friend struct simdjson_result; friend struct internal::simdjson_result_base; }; // document_stream } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public fallback::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(fallback::ondemand::document_stream &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H /* end file simdjson/generic/ondemand/document_stream.h for fallback */ /* including simdjson/generic/ondemand/field.h for fallback: #include "simdjson/generic/ondemand/field.h" */ /* begin file simdjson/generic/ondemand/field.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { /** * A JSON field (key/value pair) in an object. * * Returned from object iteration. * * Extends from std::pair so you can use C++ algorithms that rely on pairs. */ class field : public std::pair { public: /** * Create a new invalid field. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline field() noexcept; /** * Get the key as a string_view (for higher speed, consider raw_key). * We deliberately use a more cumbersome name (unescaped_key) to force users * to think twice about using it. * * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with * an unescaped C string: e.g., key() == "test". */ simdjson_inline raw_json_string key() const noexcept; /** * Get the field value. */ simdjson_inline ondemand::value &value() & noexcept; /** * @overload ondemand::value &ondemand::value() & noexcept */ simdjson_inline ondemand::value value() && noexcept; protected: simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; friend struct simdjson_result; friend class object_iterator; }; } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public fallback::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(fallback::ondemand::field &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; simdjson_inline simdjson_result key() noexcept; simdjson_inline simdjson_result value() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H /* end file simdjson/generic/ondemand/field.h for fallback */ /* including simdjson/generic/ondemand/object.h for fallback: #include "simdjson/generic/ondemand/object.h" */ /* begin file simdjson/generic/ondemand/object.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { /** * A forward-only JSON object field iterator. */ class object { public: /** * Create a new invalid object. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline object() noexcept = default; simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. The value instance you get * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to a * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() * is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. The value instance you get * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to a key * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node * as the root of its own JSON document. * * ondemand::parser parser; * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/foo/a/1") == 20 * * It is allowed for a key to be the empty string: * * ondemand::parser parser; * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * * Note that at_pointer() called on the document automatically calls the document's rewind * method between each call. It invalidates all previously accessed arrays, objects and values * that have not been consumed. Yet it is not the case when calling at_pointer on an object * instance: there is no rewind and no invalidation. * * You may call at_pointer more than once on an object, but each time the pointer is advanced * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding * key (as well as the current key) can no longer be used with following JSON pointer calls. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you * can iterate through the object more than once. If you unescape a string within * the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) */ inline simdjson_result reset() & noexcept; /** * This method scans the beginning of the object and checks whether the * object is empty. * The runtime complexity is constant time. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. */ inline simdjson_result is_empty() & noexcept; /** * This method scans the object and counts the number of key-value pairs. * The count_fields method should always be called before you have begun * iterating through the object: it is expected that you are pointing at * the beginning of the object. * The runtime complexity is linear in the size of the object. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an object is empty, it is more performant to use * the is_empty() method. * * Performance hint: You should only call count_fields() as a last * resort as it may require scanning the document twice or more. */ simdjson_inline simdjson_result count_fields() & noexcept; /** * Consumes the object and returns a string_view instance corresponding to the * object as represented in JSON. It points inside the original byte array containing * the JSON document. */ simdjson_inline simdjson_result raw_json() noexcept; protected: /** * Go to the end of the object, no matter where you are right now. */ simdjson_inline error_code consume() noexcept; static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; static simdjson_inline object resume(const value_iterator &iter) noexcept; simdjson_inline object(const value_iterator &iter) noexcept; simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; value_iterator iter{}; friend class value; friend class document; friend struct simdjson_result; }; } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public fallback::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(fallback::ondemand::object &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; inline simdjson_result raw_json() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H /* end file simdjson/generic/ondemand/object.h for fallback */ /* including simdjson/generic/ondemand/object_iterator.h for fallback: #include "simdjson/generic/ondemand/object_iterator.h" */ /* begin file simdjson/generic/ondemand/object_iterator.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { class object_iterator { public: /** * Create a new invalid object_iterator. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline object_iterator() noexcept = default; // // Iterator interface // // Reads key and value, yielding them to the user. // MUST ONLY BE CALLED ONCE PER ITERATION. simdjson_inline simdjson_result operator*() noexcept; // Assumes it's being compared with the end. true if depth < iter->depth. simdjson_inline bool operator==(const object_iterator &) const noexcept; // Assumes it's being compared with the end. true if depth >= iter->depth. simdjson_inline bool operator!=(const object_iterator &) const noexcept; // Checks for ']' and ',' simdjson_inline object_iterator &operator++() noexcept; private: /** * The underlying JSON iterator. * * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object * is first used, and never changes afterwards. */ value_iterator iter{}; simdjson_inline object_iterator(const value_iterator &iter) noexcept; friend struct simdjson_result; friend class object; }; } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public fallback::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(fallback::ondemand::object_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; // // Iterator interface // // Reads key and value, yielding them to the user. simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. // Assumes it's being compared with the end. true if depth < iter->depth. simdjson_inline bool operator==(const simdjson_result &) const noexcept; // Assumes it's being compared with the end. true if depth >= iter->depth. simdjson_inline bool operator!=(const simdjson_result &) const noexcept; // Checks for ']' and ',' simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H /* end file simdjson/generic/ondemand/object_iterator.h for fallback */ /* including simdjson/generic/ondemand/serialization.h for fallback: #include "simdjson/generic/ondemand/serialization.h" */ /* begin file simdjson/generic/ondemand/serialization.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { /** * Create a string-view instance out of a document instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ inline simdjson_result to_json_string(fallback::ondemand::document& x) noexcept; /** * Create a string-view instance out of a value instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. The value must * not have been accessed previously. It does not * validate the content. */ inline simdjson_result to_json_string(fallback::ondemand::value& x) noexcept; /** * Create a string-view instance out of an object instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ inline simdjson_result to_json_string(fallback::ondemand::object& x) noexcept; /** * Create a string-view instance out of an array instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ inline simdjson_result to_json_string(fallback::ondemand::array& x) noexcept; inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); } // namespace simdjson /** * We want to support argument-dependent lookup (ADL). * Hence we should define operator<< in the namespace * where the argument (here value, object, etc.) resides. * Credit: @madhur4127 * See https://github.com/simdjson/simdjson/issues/1768 */ namespace simdjson { namespace fallback { namespace ondemand { /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The element. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::value x); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The array. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::array value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The array. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document& value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document_reference& value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The object. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::object value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif }}} // namespace simdjson::fallback::ondemand #endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H /* end file simdjson/generic/ondemand/serialization.h for fallback */ // Inline definitions /* including simdjson/generic/ondemand/array-inl.h for fallback: #include "simdjson/generic/ondemand/array-inl.h" */ /* begin file simdjson/generic/ondemand/array-inl.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { // // ### Live States // // While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is // always SUCCESS: // // - Start: This is the state when the array is first found and the iterator is just past the `{`. // In this state, at_start == true. // - Next: After we hand a scalar value to the user, or an array/object which they then fully // iterate over, the iterator is at the `,` before the next value (or `]`). In this state, // depth == iter->depth, at_start == false, and error == SUCCESS. // - Unfinished Business: When we hand an array/object to the user which they do not fully // iterate over, we need to finish that iteration by skipping child values until we reach the // Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. // // ## Error States // // In error states, we will yield exactly one more value before stopping. iter->depth == depth // and at_start is always false. We decrement after yielding the error, moving to the Finished // state. // // - Chained Error: When the array iterator is part of an error chain--for example, in // `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an // array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and // iter->depth == depth, and at_start == false. We decrement depth when we yield the error. // - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, // we flag that as an error and treat it exactly the same as a Chained Error. In this state, // error == TAPE_ERROR, iter->depth == depth, and at_start == false. // // ## Terminal State // // The terminal state has iter->depth < depth. at_start is always false. // // - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this // by decrementing depth. In this state, iter->depth < depth, at_start == false, and // error == SUCCESS. // simdjson_inline array::array(const value_iterator &_iter) noexcept : iter{_iter} { } simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { // We don't need to know if the array is empty to start iteration, but we do want to know if there // is an error--thus `simdjson_unused`. simdjson_unused bool has_value; SIMDJSON_TRY( iter.start_array().get(has_value) ); return array(iter); } simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { simdjson_unused bool has_value; SIMDJSON_TRY( iter.start_root_array().get(has_value) ); return array(iter); } simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { bool has_value; SIMDJSON_TRY(iter.started_array().get(has_value)); return array(iter); } simdjson_inline simdjson_result array::begin() noexcept { #if SIMDJSON_DEVELOPMENT_CHECKS if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif return array_iterator(iter); } simdjson_inline simdjson_result array::end() noexcept { return array_iterator(iter); } simdjson_inline error_code array::consume() noexcept { auto error = iter.json_iter().skip_child(iter.depth()-1); if(error) { iter.abandon(); } return error; } simdjson_inline simdjson_result array::raw_json() noexcept { const uint8_t * starting_point{iter.peek_start()}; auto error = consume(); if(error) { return error; } // After 'consume()', we could be left pointing just beyond the document, but that // is ok because we are not going to dereference the final pointer position, we just // use it to compute the length in bytes. const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_inline simdjson_result array::count_elements() & noexcept { size_t count{0}; // Important: we do not consume any of the values. for(simdjson_unused auto v : *this) { count++; } // The above loop will always succeed, but we want to report errors. if(iter.error()) { return iter.error(); } // We need to move back at the start because we expect users to iterate through // the array after counting the number of elements. iter.reset_array(); return count; } SIMDJSON_POP_DISABLE_WARNINGS simdjson_inline simdjson_result array::is_empty() & noexcept { bool is_not_empty; auto error = iter.reset_array().get(is_not_empty); if(error) { return error; } return !is_not_empty; } inline simdjson_result array::reset() & noexcept { return iter.reset_array(); } inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } json_pointer = json_pointer.substr(1); // - means "the append position" or "the element after the end of the array" // We don't support this, because we're returning a real element, not a position. if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } // Read the array index size_t array_index = 0; size_t i; for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { uint8_t digit = uint8_t(json_pointer[i] - '0'); // Check for non-digit in array index. If it's there, we're trying to get a field in an object if (digit > 9) { return INCORRECT_TYPE; } array_index = array_index*10 + digit; } // 0 followed by other digits is invalid if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" // Empty string is invalid; so is a "/" with no digits before it if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" // Get the child auto child = at(array_index); // If there is an error, it ends here if(child.error()) { return child; } // If there is a /, we're not done yet, call recursively. if (i < json_pointer.length()) { child = child.at_pointer(json_pointer.substr(i)); } return child; } simdjson_inline simdjson_result array::at(size_t index) noexcept { size_t i = 0; for (auto value : *this) { if (i == index) { return value; } i++; } return INDEX_OUT_OF_BOUNDS; } } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( fallback::ondemand::array &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result simdjson_result::begin() noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() noexcept { if (error()) { return error(); } return first.end(); } simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { if (error()) { return error(); } return first.is_empty(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H /* end file simdjson/generic/ondemand/array-inl.h for fallback */ /* including simdjson/generic/ondemand/array_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/array_iterator-inl.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept : iter{_iter} {} simdjson_inline simdjson_result array_iterator::operator*() noexcept { if (iter.error()) { iter.abandon(); return iter.error(); } return value(iter.child()); } simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { return !(*this != other); } simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { return iter.is_open(); } simdjson_inline array_iterator &array_iterator::operator++() noexcept { error_code error; // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. if (( error = iter.error() )) { return *this; } if (( error = iter.skip_child() )) { return *this; } if (( error = iter.has_next_element().error() )) { return *this; } return *this; } } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( fallback::ondemand::array_iterator &&value ) noexcept : fallback::implementation_simdjson_result_base(std::forward(value)) { first.iter.assert_is_valid(); } simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : fallback::implementation_simdjson_result_base({}, error) { } simdjson_inline simdjson_result simdjson_result::operator*() noexcept { if (error()) { return error(); } return *first; } simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return !error(); } return first == other.first; } simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return error(); } return first != other.first; } simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { // Clear the error if there is one, so we don't yield it twice if (error()) { second = SUCCESS; return *this; } ++(first); return *this; } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H /* end file simdjson/generic/ondemand/array_iterator-inl.h for fallback */ /* including simdjson/generic/ondemand/document-inl.h for fallback: #include "simdjson/generic/ondemand/document-inl.h" */ /* begin file simdjson/generic/ondemand/document-inl.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept : iter{std::forward(_iter)} { logger::log_start_value(iter, "document"); } simdjson_inline document document::start(json_iterator &&iter) noexcept { return document(std::forward(iter)); } inline void document::rewind() noexcept { iter.rewind(); } inline std::string document::to_debug_string() noexcept { return iter.to_string(); } inline simdjson_result document::current_location() const noexcept { return iter.current_location(); } inline int32_t document::current_depth() const noexcept { return iter.depth(); } inline bool document::at_end() const noexcept { return iter.at_end(); } inline bool document::is_alive() noexcept { return iter.is_alive(); } simdjson_inline value_iterator document::resume_value_iterator() noexcept { return value_iterator(&iter, 1, iter.root_position()); } simdjson_inline value_iterator document::get_root_value_iterator() noexcept { return resume_value_iterator(); } simdjson_inline simdjson_result document::start_or_resume_object() noexcept { if (iter.at_root()) { return get_object(); } else { return object::resume(resume_value_iterator()); } } simdjson_inline simdjson_result document::get_value() noexcept { // Make sure we start any arrays or objects before returning, so that start_root_() // gets called. // It is the convention throughout the code that the macro `SIMDJSON_DEVELOPMENT_CHECKS` determines whether // we check for OUT_OF_ORDER_ITERATION. Proper on::demand code should never trigger this error. #if SIMDJSON_DEVELOPMENT_CHECKS if (!iter.at_root()) { return OUT_OF_ORDER_ITERATION; } #endif // assert_at_root() serves two purposes: in Debug mode, whether or not // SIMDJSON_DEVELOPMENT_CHECKS is set or not, it checks that we are at the root of // the document (this will typically be redundant). In release mode, it generates // SIMDJSON_ASSUME statements to allow the compiler to make assumptions. iter.assert_at_root(); switch (*iter.peek()) { case '[': { // The following lines check that the document ends with ]. auto value_iterator = get_root_value_iterator(); auto error = value_iterator.check_root_array(); if(error) { return error; } return value(get_root_value_iterator()); } case '{': { // The following lines would check that the document ends with }. auto value_iterator = get_root_value_iterator(); auto error = value_iterator.check_root_object(); if(error) { return error; } return value(get_root_value_iterator()); } default: // Unfortunately, scalar documents are a special case in simdjson and they cannot // be safely converted to value instances. return SCALAR_DOCUMENT_AS_VALUE; } } simdjson_inline simdjson_result document::get_array() & noexcept { auto value = get_root_value_iterator(); return array::start_root(value); } simdjson_inline simdjson_result document::get_object() & noexcept { auto value = get_root_value_iterator(); return object::start_root(value); } /** * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should * give an error, so we check for trailing content. We want to disallow trailing * content. * Thus, in several implementations below, we pass a 'true' parameter value to * a get_root_value_iterator() method: this indicates that we disallow trailing content. */ simdjson_inline simdjson_result document::get_uint64() noexcept { return get_root_value_iterator().get_root_uint64(true); } simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { return get_root_value_iterator().get_root_uint64_in_string(true); } simdjson_inline simdjson_result document::get_int64() noexcept { return get_root_value_iterator().get_root_int64(true); } simdjson_inline simdjson_result document::get_int64_in_string() noexcept { return get_root_value_iterator().get_root_int64_in_string(true); } simdjson_inline simdjson_result document::get_double() noexcept { return get_root_value_iterator().get_root_double(true); } simdjson_inline simdjson_result document::get_double_in_string() noexcept { return get_root_value_iterator().get_root_double_in_string(true); } simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { return get_root_value_iterator().get_root_string(true, allow_replacement); } template simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); } simdjson_inline simdjson_result document::get_wobbly_string() noexcept { return get_root_value_iterator().get_root_wobbly_string(true); } simdjson_inline simdjson_result document::get_raw_json_string() noexcept { return get_root_value_iterator().get_root_raw_json_string(true); } simdjson_inline simdjson_result document::get_bool() noexcept { return get_root_value_iterator().get_root_bool(true); } simdjson_inline simdjson_result document::is_null() noexcept { return get_root_value_iterator().is_root_null(true); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } template simdjson_inline error_code document::get(T &out) & noexcept { return get().get(out); } template simdjson_inline error_code document::get(T &out) && noexcept { return std::forward(*this).get().get(out); } #if SIMDJSON_EXCEPTIONS simdjson_inline document::operator array() & noexcept(false) { return get_array(); } simdjson_inline document::operator object() & noexcept(false) { return get_object(); } simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document::operator double() noexcept(false) { return get_double(); } simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document::operator value() noexcept(false) { return get_value(); } #endif simdjson_inline simdjson_result document::count_elements() & noexcept { auto a = get_array(); simdjson_result answer = a.count_elements(); /* If there was an array, we are now left pointing at its first element. */ if(answer.error() == SUCCESS) { rewind(); } return answer; } simdjson_inline simdjson_result document::count_fields() & noexcept { auto a = get_object(); simdjson_result answer = a.count_fields(); /* If there was an object, we are now left pointing at its first element. */ if(answer.error() == SUCCESS) { rewind(); } return answer; } simdjson_inline simdjson_result document::at(size_t index) & noexcept { auto a = get_array(); return a.at(index); } simdjson_inline simdjson_result document::begin() & noexcept { return get_array().begin(); } simdjson_inline simdjson_result document::end() & noexcept { return {}; } simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { return start_or_resume_object()[key]; } simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { return start_or_resume_object()[key]; } simdjson_inline error_code document::consume() noexcept { auto error = iter.skip_child(0); if(error) { iter.abandon(); } return error; } simdjson_inline simdjson_result document::raw_json() noexcept { auto _iter = get_root_value_iterator(); const uint8_t * starting_point{_iter.peek_start()}; auto error = consume(); if(error) { return error; } // After 'consume()', we could be left pointing just beyond the document, but that // is ok because we are not going to dereference the final pointer position, we just // use it to compute the length in bytes. const uint8_t * final_point{iter.unsafe_pointer()}; return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } simdjson_inline simdjson_result document::type() noexcept { return get_root_value_iterator().type(); } simdjson_inline simdjson_result document::is_scalar() noexcept { json_type this_type; auto error = type().get(this_type); if(error) { return error; } return ! ((this_type == json_type::array) || (this_type == json_type::object)); } simdjson_inline bool document::is_negative() noexcept { return get_root_value_iterator().is_root_negative(); } simdjson_inline simdjson_result document::is_integer() noexcept { return get_root_value_iterator().is_root_integer(true); } simdjson_inline simdjson_result document::get_number_type() noexcept { return get_root_value_iterator().get_root_number_type(true); } simdjson_inline simdjson_result document::get_number() noexcept { return get_root_value_iterator().get_root_number(true); } simdjson_inline simdjson_result document::raw_json_token() noexcept { auto _iter = get_root_value_iterator(); return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); } simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { rewind(); // Rewind the document each time at_pointer is called if (json_pointer.empty()) { return this->get_value(); } json_type t; SIMDJSON_TRY(type().get(t)); switch (t) { case json_type::array: return (*this).get_array().at_pointer(json_pointer); case json_type::object: return (*this).get_object().at_pointer(json_pointer); default: return INVALID_JSON_POINTER; } } } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( fallback::ondemand::document &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base( error ) { } simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } template simdjson_inline simdjson_result simdjson_result::get() & noexcept { if (error()) { return error(); } return first.get(); } template simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first).get(); } template simdjson_inline error_code simdjson_result::get(T &out) & noexcept { if (error()) { return error(); } return first.get(out); } template simdjson_inline error_code simdjson_result::get(T &out) && noexcept { if (error()) { return error(); } return std::forward(first).get(out); } template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first); } template<> simdjson_inline error_code simdjson_result::get(fallback::ondemand::document &out) & noexcept = delete; template<> simdjson_inline error_code simdjson_result::get(fallback::ondemand::document &out) && noexcept { if (error()) { return error(); } out = std::forward(first); return SUCCESS; } simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } simdjson_inline bool simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS simdjson_inline simdjson_result::operator fallback::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator fallback::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator fallback::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator fallback::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } simdjson_inline bool simdjson_result::at_end() const noexcept { if (error()) { return error(); } return first.at_end(); } simdjson_inline int32_t simdjson_result::current_depth() const noexcept { if (error()) { return error(); } return first.current_depth(); } simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } } // namespace simdjson namespace simdjson { namespace fallback { namespace ondemand { simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } /** * The document_reference instances are used primarily/solely for streams of JSON * documents. * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should * give an error, so we check for trailing content. * * However, for streams of JSON documents, we want to be able to start from * "321" "321" "321" * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() * successfully each time. * * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: * this indicates that we allow trailing content. */ simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } template simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } #if SIMDJSON_EXCEPTIONS simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } #endif simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} simdjson_inline document_reference::operator document&() const noexcept { return *doc; } } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::document_reference value, error_code error) noexcept : implementation_simdjson_result_base(std::forward(value), error) {} simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS simdjson_inline simdjson_result::operator fallback::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator fallback::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator fallback::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator fallback::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H /* end file simdjson/generic/ondemand/document-inl.h for fallback */ /* including simdjson/generic/ondemand/document_stream-inl.h for fallback: #include "simdjson/generic/ondemand/document_stream-inl.h" */ /* begin file simdjson/generic/ondemand/document_stream-inl.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #include namespace simdjson { namespace fallback { namespace ondemand { #ifdef SIMDJSON_THREADS_ENABLED inline void stage1_worker::finish() { // After calling "run" someone would call finish() to wait // for the end of the processing. // This function will wait until either the thread has done // the processing or, else, the destructor has been called. std::unique_lock lock(locking_mutex); cond_var.wait(lock, [this]{return has_work == false;}); } inline stage1_worker::~stage1_worker() { // The thread may never outlive the stage1_worker instance // and will always be stopped/joined before the stage1_worker // instance is gone. stop_thread(); } inline void stage1_worker::start_thread() { std::unique_lock lock(locking_mutex); if(thread.joinable()) { return; // This should never happen but we never want to create more than one thread. } thread = std::thread([this]{ while(true) { std::unique_lock thread_lock(locking_mutex); // We wait for either "run" or "stop_thread" to be called. cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); // If, for some reason, the stop_thread() method was called (i.e., the // destructor of stage1_worker is called, then we want to immediately destroy // the thread (and not do any more processing). if(!can_work) { break; } this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, this->_next_batch_start); this->has_work = false; // The condition variable call should be moved after thread_lock.unlock() for performance // reasons but thread sanitizers may report it as a data race if we do. // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock cond_var.notify_one(); // will notify "finish" thread_lock.unlock(); } } ); } inline void stage1_worker::stop_thread() { std::unique_lock lock(locking_mutex); // We have to make sure that all locks can be released. can_work = false; has_work = false; cond_var.notify_all(); lock.unlock(); if(thread.joinable()) { thread.join(); } } inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { std::unique_lock lock(locking_mutex); owner = ds; _next_batch_start = next_batch_start; stage1_thread_parser = stage1; has_work = true; // The condition variable call should be moved after thread_lock.unlock() for performance // reasons but thread sanitizers may report it as a data race if we do. // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock cond_var.notify_one(); // will notify the thread lock that we have work lock.unlock(); } #endif // SIMDJSON_THREADS_ENABLED simdjson_inline document_stream::document_stream( ondemand::parser &_parser, const uint8_t *_buf, size_t _len, size_t _batch_size, bool _allow_comma_separated ) noexcept : parser{&_parser}, buf{_buf}, len{_len}, batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, allow_comma_separated{_allow_comma_separated}, error{SUCCESS} #ifdef SIMDJSON_THREADS_ENABLED , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change #endif { #ifdef SIMDJSON_THREADS_ENABLED if(worker.get() == nullptr) { error = MEMALLOC; } #endif } simdjson_inline document_stream::document_stream() noexcept : parser{nullptr}, buf{nullptr}, len{0}, batch_size{0}, allow_comma_separated{false}, error{UNINITIALIZED} #ifdef SIMDJSON_THREADS_ENABLED , use_thread(false) #endif { } simdjson_inline document_stream::~document_stream() noexcept { #ifdef SIMDJSON_THREADS_ENABLED worker.reset(); #endif } inline size_t document_stream::size_in_bytes() const noexcept { return len; } inline size_t document_stream::truncated_bytes() const noexcept { if(error == CAPACITY) { return len - batch_start; } return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; } simdjson_inline document_stream::iterator::iterator() noexcept : stream{nullptr}, finished{true} { } simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept : stream{_stream}, finished{is_end} { } simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { //if(stream->error) { return stream->error; } return simdjson_result(stream->doc, stream->error); } simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { // If there is an error, then we want the iterator // to be finished, no matter what. (E.g., we do not // keep generating documents with errors, or go beyond // a document with errors.) // // Users do not have to call "operator*()" when they use operator++, // so we need to end the stream in the operator++ function. // // Note that setting finished = true is essential otherwise // we would enter an infinite loop. if (stream->error) { finished = true; } // Note that stream->error() is guarded against error conditions // (it will immediately return if stream->error casts to false). // In effect, this next function does nothing when (stream->error) // is true (hence the risk of an infinite loop). stream->next(); // If that was the last document, we're finished. // It is the only type of error we do not want to appear // in operator*. if (stream->error == EMPTY) { finished = true; } // If we had any other kind of error (not EMPTY) then we want // to pass it along to the operator* and we cannot mark the result // as "finished" just yet. return *this; } simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { return finished != other.finished; } simdjson_inline document_stream::iterator document_stream::begin() noexcept { start(); // If there are no documents, we're finished. return iterator(this, error == EMPTY); } simdjson_inline document_stream::iterator document_stream::end() noexcept { return iterator(this, true); } inline void document_stream::start() noexcept { if (error) { return; } error = parser->allocate(batch_size); if (error) { return; } // Always run the first stage 1 parse immediately batch_start = 0; error = run_stage1(*parser, batch_start); while(error == EMPTY) { // In exceptional cases, we may start with an empty block batch_start = next_batch_start(); if (batch_start >= len) { return; } error = run_stage1(*parser, batch_start); } if (error) { return; } doc_index = batch_start; doc = document(json_iterator(&buf[batch_start], parser)); doc.iter._streaming = true; #ifdef SIMDJSON_THREADS_ENABLED if (use_thread && next_batch_start() < len) { // Kick off the first thread on next batch if needed error = stage1_thread_parser.allocate(batch_size); if (error) { return; } worker->start_thread(); start_stage1_thread(); if (error) { return; } } #endif // SIMDJSON_THREADS_ENABLED } inline void document_stream::next() noexcept { // We always enter at once once in an error condition. if (error) { return; } next_document(); if (error) { return; } auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; // Check if at end of structural indexes (i.e. at end of batch) if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { error = EMPTY; // Load another batch (if available) while (error == EMPTY) { batch_start = next_batch_start(); if (batch_start >= len) { break; } #ifdef SIMDJSON_THREADS_ENABLED if(use_thread) { load_from_stage1_thread(); } else { error = run_stage1(*parser, batch_start); } #else error = run_stage1(*parser, batch_start); #endif /** * Whenever we move to another window, we need to update all pointers to make * it appear as if the input buffer started at the beginning of the window. * * Take this input: * * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] * * Say you process the following window... * * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' * * When you do so, the json_iterator has a pointer at the beginning of the memory region * (pointing at the beginning of '{"z"...'. * * When you move to the window that starts at... * * '[7, 10, 9] [15, 11, 12, 13] ... * * then it is not sufficient to just run stage 1. You also need to re-anchor the * json_iterator so that it believes we are starting at '[7, 10, 9]...'. * * Under the DOM front-end, this gets done automatically because the parser owns * the pointer the data, and when you call stage1 and then stage2 on the same * parser, then stage2 will run on the pointer acquired by stage1. * * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that * we used. But json_iterator has no callback when stage1 is called on the parser. * In fact, I think that the parser is unaware of json_iterator. * * * So we need to re-anchor the json_iterator after each call to stage 1 so that * all of the pointers are in sync. */ doc.iter = json_iterator(&buf[batch_start], parser); doc.iter._streaming = true; /** * End of resync. */ if (error) { continue; } // If the error was EMPTY, we may want to load another batch. doc_index = batch_start; } } } inline void document_stream::next_document() noexcept { // Go to next place where depth=0 (document depth) error = doc.iter.skip_child(0); if (error) { return; } // Always set depth=1 at the start of document doc.iter._depth = 1; // consume comma if comma separated is allowed if (allow_comma_separated) { doc.iter.consume_character(','); } // Resets the string buffer at the beginning, thus invalidating the strings. doc.iter._string_buf_loc = parser->string_buf.get(); doc.iter._root = doc.iter.position(); } inline size_t document_stream::next_batch_start() const noexcept { return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; } inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { // This code only updates the structural index in the parser, it does not update any json_iterator // instance. size_t remaining = len - _batch_start; if (remaining <= batch_size) { return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); } else { return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); } } simdjson_inline size_t document_stream::iterator::current_index() const noexcept { return stream->doc_index; } simdjson_inline std::string_view document_stream::iterator::source() const noexcept { auto depth = stream->doc.iter.depth(); auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); // If at root, process the first token to determine if scalar value if (stream->doc.iter.at_root()) { switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { case '{': case '[': // Depth=1 already at start of document break; case '}': case ']': depth--; break; default: // Scalar value document // TODO: Remove any trailing whitespaces // This returns a string spanning from start of value to the beginning of the next document (excluded) return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1); } cur_struct_index++; } while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { case '{': case '[': depth++; break; case '}': case ']': depth--; break; } if (depth == 0) { break; } cur_struct_index++; } return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; } inline error_code document_stream::iterator::error() const noexcept { return stream->error; } #ifdef SIMDJSON_THREADS_ENABLED inline void document_stream::load_from_stage1_thread() noexcept { worker->finish(); // Swap to the parser that was loaded up in the thread. Make sure the parser has // enough memory to swap to, as well. std::swap(stage1_thread_parser,*parser); error = stage1_thread_error; if (error) { return; } // If there's anything left, start the stage 1 thread! if (next_batch_start() < len) { start_stage1_thread(); } } inline void document_stream::start_stage1_thread() noexcept { // we call the thread on a lambda that will update // this->stage1_thread_error // there is only one thread that may write to this value // TODO this is NOT exception-safe. this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error size_t _next_batch_start = this->next_batch_start(); worker->run(this, & this->stage1_thread_parser, _next_batch_start); } #endif // SIMDJSON_THREADS_ENABLED } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result::simdjson_result( fallback::ondemand::document_stream &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } } #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H /* end file simdjson/generic/ondemand/document_stream-inl.h for fallback */ /* including simdjson/generic/ondemand/field-inl.h for fallback: #include "simdjson/generic/ondemand/field-inl.h" */ /* begin file simdjson/generic/ondemand/field-inl.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { // clang 6 doesn't think the default constructor can be noexcept, so we make it explicit simdjson_inline field::field() noexcept : std::pair() {} simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept : std::pair(key, std::forward(value)) { } simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { raw_json_string key; SIMDJSON_TRY( parent_iter.field_key().get(key) ); SIMDJSON_TRY( parent_iter.field_value() ); return field::start(parent_iter, key); } simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { return field(key, parent_iter.child()); } simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); first.consume(); return answer; } simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; } simdjson_inline value &field::value() & noexcept { return second; } simdjson_inline value field::value() && noexcept { return std::forward(*this).second; } } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( fallback::ondemand::field &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result simdjson_result::key() noexcept { if (error()) { return error(); } return first.key(); } simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { if (error()) { return error(); } return first.unescaped_key(allow_replacement); } simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H /* end file simdjson/generic/ondemand/field-inl.h for fallback */ /* including simdjson/generic/ondemand/json_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/json_iterator-inl.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept : token(std::forward(other.token)), parser{other.parser}, _string_buf_loc{other._string_buf_loc}, error{other.error}, _depth{other._depth}, _root{other._root}, _streaming{other._streaming} { other.parser = nullptr; } simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { token = other.token; parser = other.parser; _string_buf_loc = other._string_buf_loc; error = other.error; _depth = other._depth; _root = other._root; _streaming = other._streaming; other.parser = nullptr; return *this; } simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept : token(buf, &_parser->implementation->structural_indexes[0]), parser{_parser}, _string_buf_loc{parser->string_buf.get()}, _depth{1}, _root{parser->implementation->structural_indexes.get()}, _streaming{false} { logger::log_headers(); #if SIMDJSON_CHECK_EOF assert_more_tokens(); #endif } inline void json_iterator::rewind() noexcept { token.set_position( root_position() ); logger::log_headers(); // We start again _string_buf_loc = parser->string_buf.get(); _depth = 1; } inline bool json_iterator::balanced() const noexcept { token_iterator ti(token); int32_t count{0}; ti.set_position( root_position() ); while(ti.peek() <= peek_last()) { switch (*ti.return_current_and_advance()) { case '[': case '{': count++; break; case ']': case '}': count--; break; default: break; } } return count == 0; } // GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller // relating depth and parent_depth, which is a desired effect. The warning does not show up if the // skip_child() function is not marked inline). SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { if (depth() <= parent_depth) { return SUCCESS; } switch (*return_current_and_advance()) { // TODO consider whether matching braces is a requirement: if non-matching braces indicates // *missing* braces, then future lookups are not in the object/arrays they think they are, // violating the rule "validate enough structure that the user can be confident they are // looking at the right values." // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth // For the first open array/object in a value, we've already incremented depth, so keep it the same // We never stop at colon, but if we did, it wouldn't affect depth case '[': case '{': case ':': logger::log_start_value(*this, "skip"); break; // If there is a comma, we have just finished a value in an array/object, and need to get back in case ',': logger::log_value(*this, "skip"); break; // ] or } means we just finished a value and need to jump out of the array/object case ']': case '}': logger::log_end_value(*this, "skip"); _depth--; if (depth() <= parent_depth) { return SUCCESS; } #if SIMDJSON_CHECK_EOF // If there are no more tokens, the parent is incomplete. if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } #endif // SIMDJSON_CHECK_EOF break; case '"': if(*peek() == ':') { // We are at a key!!! // This might happen if you just started an object and you skip it immediately. // Performance note: it would be nice to get rid of this check as it is somewhat // expensive. // https://github.com/simdjson/simdjson/issues/1742 logger::log_value(*this, "key"); return_current_and_advance(); // eat up the ':' break; // important!!! } simdjson_fallthrough; // Anything else must be a scalar value default: // For the first scalar, we will have incremented depth already, so we decrement it here. logger::log_value(*this, "skip"); _depth--; if (depth() <= parent_depth) { return SUCCESS; } break; } // Now that we've considered the first value, we only increment/decrement for arrays/objects while (position() < end_position()) { switch (*return_current_and_advance()) { case '[': case '{': logger::log_start_value(*this, "skip"); _depth++; break; // TODO consider whether matching braces is a requirement: if non-matching braces indicates // *missing* braces, then future lookups are not in the object/arrays they think they are, // violating the rule "validate enough structure that the user can be confident they are // looking at the right values." // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth case ']': case '}': logger::log_end_value(*this, "skip"); _depth--; if (depth() <= parent_depth) { return SUCCESS; } break; default: logger::log_value(*this, "skip", ""); break; } } return report_error(TAPE_ERROR, "not enough close braces"); } SIMDJSON_POP_DISABLE_WARNINGS simdjson_inline bool json_iterator::at_root() const noexcept { return position() == root_position(); } simdjson_inline bool json_iterator::is_single_token() const noexcept { return parser->implementation->n_structural_indexes == 1; } simdjson_inline bool json_iterator::streaming() const noexcept { return _streaming; } simdjson_inline token_position json_iterator::root_position() const noexcept { return _root; } simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { SIMDJSON_ASSUME( _depth == 1 ); } simdjson_inline void json_iterator::assert_at_root() const noexcept { SIMDJSON_ASSUME( _depth == 1 ); #ifndef SIMDJSON_CLANG_VISUAL_STUDIO // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument // has side effects that will be discarded. SIMDJSON_ASSUME( token.position() == _root ); #endif } simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { assert_valid_position(token._position + required_tokens - 1); } simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { #ifndef SIMDJSON_CLANG_VISUAL_STUDIO SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); #endif } simdjson_inline bool json_iterator::at_end() const noexcept { return position() == end_position(); } simdjson_inline token_position json_iterator::end_position() const noexcept { uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; return &parser->implementation->structural_indexes[n_structural_indexes]; } inline std::string json_iterator::to_string() const noexcept { if( !is_alive() ) { return "dead json_iterator instance"; } const char * current_structural = reinterpret_cast(token.peek()); return std::string("json_iterator [ depth : ") + std::to_string(_depth) + std::string(", structural : '") + std::string(current_structural,1) + std::string("', offset : ") + std::to_string(token.current_offset()) + std::string("', error : ") + error_message(error) + std::string(" ]"); } inline simdjson_result json_iterator::current_location() const noexcept { if (!is_alive()) { // Unrecoverable error if (!at_root()) { return reinterpret_cast(token.peek(-1)); } else { return reinterpret_cast(token.peek()); } } if (at_end()) { return OUT_OF_BOUNDS; } return reinterpret_cast(token.peek()); } simdjson_inline bool json_iterator::is_alive() const noexcept { return parser; } simdjson_inline void json_iterator::abandon() noexcept { parser = nullptr; _depth = 0; } simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(); #endif // SIMDJSON_CHECK_EOF return token.return_current_and_advance(); } simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { // deliberately done without safety guard: return token.peek(); } simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(delta+1); #endif // SIMDJSON_CHECK_EOF return token.peek(delta); } simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(delta+1); #endif // #if SIMDJSON_CHECK_EOF return token.peek_length(delta); } simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { // todo: currently we require end-of-string buffering, but the following // assert_valid_position should be turned on if/when we lift that condition. // assert_valid_position(position); // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF // is ON by default, we have no choice but to disable it for real with a comment. return token.peek(position); } simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { #if SIMDJSON_CHECK_EOF assert_valid_position(position); #endif // SIMDJSON_CHECK_EOF return token.peek_length(position); } simdjson_inline token_position json_iterator::last_position() const noexcept { // The following line fails under some compilers... // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); // since it has side-effects. uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; SIMDJSON_ASSUME(n_structural_indexes > 0); return &parser->implementation->structural_indexes[n_structural_indexes - 1]; } simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { return token.peek(last_position()); } simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); SIMDJSON_ASSUME(_depth == parent_depth + 1); _depth = parent_depth; } simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); SIMDJSON_ASSUME(_depth == child_depth - 1); _depth = child_depth; } simdjson_inline depth_t json_iterator::depth() const noexcept { return _depth; } simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { return _string_buf_loc; } simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); logger::log_error(*this, message); error = _error; return error; } simdjson_inline token_position json_iterator::position() const noexcept { return token.position(); } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { return parser->unescape(in, _string_buf_loc, allow_replacement); } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { return parser->unescape_wobbly(in, _string_buf_loc); } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); SIMDJSON_ASSUME(_depth == child_depth - 1); #if SIMDJSON_DEVELOPMENT_CHECKS #ifndef SIMDJSON_CLANG_VISUAL_STUDIO SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); #endif #endif token.set_position(position); _depth = child_depth; } simdjson_inline error_code json_iterator::consume_character(char c) noexcept { if (*peek() == c) { return_current_and_advance(); return SUCCESS; } return TAPE_ERROR; } #if SIMDJSON_DEVELOPMENT_CHECKS simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; } simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } } #endif simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); logger::log_error(*this, message); return _error; } simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { // This function is not expected to be called in performance-sensitive settings. // Let us guard against silly cases: if((N < max_len) || (N == 0)) { return false; } // Copy to the buffer. std::memcpy(tmpbuf, json, max_len); if(N > max_len) { // We pad whatever remains with ' '. std::memset(tmpbuf + max_len, ' ', N - max_len); } return true; } } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::json_iterator &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H /* end file simdjson/generic/ondemand/json_iterator-inl.h for fallback */ /* including simdjson/generic/ondemand/json_type-inl.h for fallback: #include "simdjson/generic/ondemand/json_type-inl.h" */ /* begin file simdjson/generic/ondemand/json_type-inl.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { switch (type) { case json_type::array: out << "array"; break; case json_type::object: out << "object"; break; case json_type::number: out << "number"; break; case json_type::string: out << "string"; break; case json_type::boolean: out << "boolean"; break; case json_type::null: out << "null"; break; default: SIMDJSON_UNREACHABLE(); } return out; } #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { return out << type.value(); } #endif simdjson_inline number_type number::get_number_type() const noexcept { return type; } simdjson_inline bool number::is_uint64() const noexcept { return get_number_type() == number_type::unsigned_integer; } simdjson_inline uint64_t number::get_uint64() const noexcept { return payload.unsigned_integer; } simdjson_inline number::operator uint64_t() const noexcept { return get_uint64(); } simdjson_inline bool number::is_int64() const noexcept { return get_number_type() == number_type::signed_integer; } simdjson_inline int64_t number::get_int64() const noexcept { return payload.signed_integer; } simdjson_inline number::operator int64_t() const noexcept { return get_int64(); } simdjson_inline bool number::is_double() const noexcept { return get_number_type() == number_type::floating_point_number; } simdjson_inline double number::get_double() const noexcept { return payload.floating_point_number; } simdjson_inline number::operator double() const noexcept { return get_double(); } simdjson_inline double number::as_double() const noexcept { if(is_double()) { return payload.floating_point_number; } if(is_int64()) { return double(payload.signed_integer); } return double(payload.unsigned_integer); } simdjson_inline void number::append_s64(int64_t value) noexcept { payload.signed_integer = value; type = number_type::signed_integer; } simdjson_inline void number::append_u64(uint64_t value) noexcept { payload.unsigned_integer = value; type = number_type::unsigned_integer; } simdjson_inline void number::append_double(double value) noexcept { payload.floating_point_number = value; type = number_type::floating_point_number; } simdjson_inline void number::skip_double() noexcept { type = number_type::floating_point_number; } } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::json_type &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H /* end file simdjson/generic/ondemand/json_type-inl.h for fallback */ /* including simdjson/generic/ondemand/logger-inl.h for fallback: #include "simdjson/generic/ondemand/logger-inl.h" */ /* begin file simdjson/generic/ondemand/logger-inl.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #include namespace simdjson { namespace fallback { namespace ondemand { namespace logger { static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; static constexpr const int LOG_EVENT_LEN = 20; static constexpr const int LOG_BUFFER_LEN = 30; static constexpr const int LOG_SMALL_BUFFER_LEN = 10; static int log_depth = 0; // Not threadsafe. Log only. // Helper to turn unprintable or newline characters into spaces static inline char printable_char(char c) { if (c >= 0x20) { return c; } else { return ' '; } } template static inline std::string string_format(const std::string& format, const Args&... args) { SIMDJSON_PUSH_DISABLE_ALL_WARNINGS int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; auto size = static_cast(size_s); if (size <= 0) return std::string(); std::unique_ptr buf(new char[size]); std::snprintf(buf.get(), size, format.c_str(), args...); SIMDJSON_POP_DISABLE_WARNINGS return std::string(buf.get(), buf.get() + size - 1); } static inline log_level get_log_level_from_env() { SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe char *lvl = getenv("SIMDJSON_LOG_LEVEL"); SIMDJSON_POP_DISABLE_WARNINGS if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } return log_level::info; } static inline log_level log_threshold() { static log_level threshold = get_log_level_from_env(); return threshold; } static inline bool should_log(log_level level) { return level >= log_threshold(); } inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { log_line(iter, index, depth, "", type, detail, log_level::info); } inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { log_line(iter, index, depth, "+", type, detail, log_level::info); if (LOG_ENABLED) { log_depth++; } } inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { log_line(iter, "+", type, "", delta, depth_delta, log_level::info); if (LOG_ENABLED) { log_depth++; } } inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { if (LOG_ENABLED) { log_depth--; } log_line(iter, "-", type, "", delta, depth_delta, log_level::info); } inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); } inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); } inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_event(iter.json_iter(), type, detail, delta, depth_delta); } inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_value(iter.json_iter(), type, detail, delta, depth_delta); } inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { log_start_value(iter.json_iter(), type, delta, depth_delta); } inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { log_end_value(iter.json_iter(), type, delta, depth_delta); } inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { log_error(iter.json_iter(), error, detail, delta, depth_delta); } inline void log_headers() noexcept { if (LOG_ENABLED) { if (simdjson_unlikely(should_log(log_level::info))) { // Technically a static variable is not thread-safe, but if you are using threads and logging... well... static bool displayed_hint{false}; log_depth = 0; printf("\n"); if (!displayed_hint) { // We only print this helpful header once. printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); printf("# +array says 'this is where we were when we discovered the start array'\n"); printf( "# -array says 'this is where we were when we ended the array'\n"); printf("# skip says 'this is a structural or value I am skipping'\n"); printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); printf("#\n"); printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); printf("# in addition to the depth being displayed.\n"); printf("#\n"); printf("# Every token in the document has a single depth determined by the tokens before it,\n"); printf("# and is not affected by what the token actually is.\n"); printf("#\n"); printf("# Not all structural elements are presented as tokens in the logs.\n"); printf("#\n"); printf("# We never give control to the user within an empty array or an empty object.\n"); printf("#\n"); printf("# Inside an array, having a depth greater than the array's depth means that\n"); printf("# we are pointing inside a value.\n"); printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); displayed_hint = true; } printf("\n"); printf("| %-*s ", LOG_EVENT_LEN, "Event"); printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); // printf("| %-*s ", 5, "Next#"); printf("| %-*s ", 5, "Depth"); printf("| Detail "); printf("|\n"); printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); // printf("|%.*s", 5+2, DASHES); printf("|%.*s", 5 + 2, DASHES); printf("|--------"); printf("|\n"); fflush(stdout); } } } template inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); } template inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { if (LOG_ENABLED) { if (simdjson_unlikely(should_log(level))) { const int indent = depth * 2; const auto buf = iter.token.buf; auto msg = string_format(title, std::forward(args)...); printf("| %*s%s%-*s ", indent, "", title_prefix, LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); { // Print the current structural. printf("| "); // Before we begin, the index might point right before the document. // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 if (index < iter._root) { printf("%*s", LOG_BUFFER_LEN, ""); } else { auto current_structural = &buf[*index]; for (int i = 0; i < LOG_BUFFER_LEN; i++) { printf("%c", printable_char(current_structural[i])); } } printf(" "); } { // Print the next structural. printf("| "); auto next_structural = &buf[*(index + 1)]; for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { printf("%c", printable_char(next_structural[i])); } printf(" "); } // printf("| %5u ", *(index+1)); printf("| %5i ", depth); printf("| %6.*s ", int(detail.size()), detail.data()); printf("|\n"); fflush(stdout); } } } } // namespace logger } // namespace ondemand } // namespace fallback } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* end file simdjson/generic/ondemand/logger-inl.h for fallback */ /* including simdjson/generic/ondemand/object-inl.h for fallback: #include "simdjson/generic/ondemand/object-inl.h" */ /* begin file simdjson/generic/ondemand/object-inl.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { return find_field_unordered(key); } simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { return std::forward(*this).find_field_unordered(key); } simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { SIMDJSON_TRY( iter.start_object().error() ); return object(iter); } simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { SIMDJSON_TRY( iter.start_root_object().error() ); return object(iter); } simdjson_inline error_code object::consume() noexcept { if(iter.is_at_key()) { /** * whenever you are pointing at a key, calling skip_child() is * unsafe because you will hit a string and you will assume that * it is string value, and this mistake will lead you to make bad * depth computation. */ /** * We want to 'consume' the key. We could really * just do _json_iter->return_current_and_advance(); at this * point, but, for clarity, we will use the high-level API to * eat the key. We assume that the compiler optimizes away * most of the work. */ simdjson_unused raw_json_string actual_key; auto error = iter.field_key().get(actual_key); if (error) { iter.abandon(); return error; }; // Let us move to the value while we are at it. if ((error = iter.field_value())) { iter.abandon(); return error; } } auto error_skip = iter.json_iter().skip_child(iter.depth()-1); if(error_skip) { iter.abandon(); } return error_skip; } simdjson_inline simdjson_result object::raw_json() noexcept { const uint8_t * starting_point{iter.peek_start()}; auto error = consume(); if(error) { return error; } const uint8_t * final_point{iter._json_iter->peek()}; return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { SIMDJSON_TRY( iter.started_object().error() ); return object(iter); } simdjson_inline object object::resume(const value_iterator &iter) noexcept { return iter; } simdjson_inline object::object(const value_iterator &_iter) noexcept : iter{_iter} { } simdjson_inline simdjson_result object::begin() noexcept { #if SIMDJSON_DEVELOPMENT_CHECKS if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif return object_iterator(iter); } simdjson_inline simdjson_result object::end() noexcept { return object_iterator(iter); } inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } json_pointer = json_pointer.substr(1); size_t slash = json_pointer.find('/'); std::string_view key = json_pointer.substr(0, slash); // Grab the child with the given key simdjson_result child; // If there is an escape character in the key, unescape it and then get the child. size_t escape = key.find('~'); if (escape != std::string_view::npos) { // Unescape the key std::string unescaped(key); do { switch (unescaped[escape+1]) { case '0': unescaped.replace(escape, 2, "~"); break; case '1': unescaped.replace(escape, 2, "/"); break; default: return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); } escape = unescaped.find('~', escape+1); } while (escape != std::string::npos); child = find_field(unescaped); // Take note find_field does not unescape keys when matching } else { child = find_field(key); } if(child.error()) { return child; // we do not continue if there was an error } // If there is a /, we have to recurse and look up more of the path if (slash != std::string_view::npos) { child = child.at_pointer(json_pointer.substr(slash)); } return child; } simdjson_inline simdjson_result object::count_fields() & noexcept { size_t count{0}; // Important: we do not consume any of the values. for(simdjson_unused auto v : *this) { count++; } // The above loop will always succeed, but we want to report errors. if(iter.error()) { return iter.error(); } // We need to move back at the start because we expect users to iterate through // the object after counting the number of elements. iter.reset_object(); return count; } simdjson_inline simdjson_result object::is_empty() & noexcept { bool is_not_empty; auto error = iter.reset_object().get(is_not_empty); if(error) { return error; } return !is_not_empty; } simdjson_inline simdjson_result object::reset() & noexcept { return iter.reset_object(); } } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::object &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} simdjson_inline simdjson_result simdjson_result::begin() noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() noexcept { if (error()) { return error(); } return first.end(); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first).find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first)[key]; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first).find_field(key); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } inline simdjson_result simdjson_result::reset() noexcept { if (error()) { return error(); } return first.reset(); } inline simdjson_result simdjson_result::is_empty() noexcept { if (error()) { return error(); } return first.is_empty(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* end file simdjson/generic/ondemand/object-inl.h for fallback */ /* including simdjson/generic/ondemand/object_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/object_iterator-inl.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { // // object_iterator // simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept : iter{_iter} {} simdjson_inline simdjson_result object_iterator::operator*() noexcept { error_code error = iter.error(); if (error) { iter.abandon(); return error; } auto result = field::start(iter); // TODO this is a safety rail ... users should exit loops as soon as they receive an error. // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. if (result.error()) { iter.abandon(); } return result; } simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { return !(*this != other); } simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { return iter.is_open(); } SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_inline object_iterator &object_iterator::operator++() noexcept { // TODO this is a safety rail ... users should exit loops as soon as they receive an error. // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error simdjson_unused error_code error; if ((error = iter.skip_child() )) { return *this; } simdjson_unused bool has_value; if ((error = iter.has_next_field().get(has_value) )) { return *this; }; return *this; } SIMDJSON_POP_DISABLE_WARNINGS // // ### Live States // // While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is // always SUCCESS: // // - Start: This is the state when the object is first found and the iterator is just past the {. // In this state, at_start == true. // - Next: After we hand a scalar value to the user, or an array/object which they then fully // iterate over, the iterator is at the , or } before the next value. In this state, // depth == iter.depth, at_start == false, and error == SUCCESS. // - Unfinished Business: When we hand an array/object to the user which they do not fully // iterate over, we need to finish that iteration by skipping child values until we reach the // Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. // // ## Error States // // In error states, we will yield exactly one more value before stopping. iter.depth == depth // and at_start is always false. We decrement after yielding the error, moving to the Finished // state. // // - Chained Error: When the object iterator is part of an error chain--for example, in // `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an // object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and // iter.depth == depth, and at_start == false. We decrement depth when we yield the error. // - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, // we flag that as an error and treat it exactly the same as a Chained Error. In this state, // error == TAPE_ERROR, iter.depth == depth, and at_start == false. // // Errors that occur while reading a field to give to the user (such as when the key is not a // string or the field is missing a colon) are yielded immediately. Depth is then decremented, // moving to the Finished state without transitioning through an Error state at all. // // ## Terminal State // // The terminal state has iter.depth < depth. at_start is always false. // // - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. // In this state, iter.depth < depth, at_start == false, and error == SUCCESS. // } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( fallback::ondemand::object_iterator &&value ) noexcept : implementation_simdjson_result_base(std::forward(value)) { first.iter.assert_is_valid(); } simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base({}, error) { } simdjson_inline simdjson_result simdjson_result::operator*() noexcept { if (error()) { return error(); } return *first; } // If we're iterating and there is an error, return the error once. simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return !error(); } return first == other.first; } // If we're iterating and there is an error, return the error once. simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return error(); } return first != other.first; } // Checks for ']' and ',' simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { // Clear the error if there is one, so we don't yield it twice if (error()) { second = SUCCESS; return *this; } ++first; return *this; } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H /* end file simdjson/generic/ondemand/object_iterator-inl.h for fallback */ /* including simdjson/generic/ondemand/parser-inl.h for fallback: #include "simdjson/generic/ondemand/parser-inl.h" */ /* begin file simdjson/generic/ondemand/parser-inl.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ /* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { simdjson_inline parser::parser(size_t max_capacity) noexcept : _max_capacity{max_capacity} { } simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { if (new_capacity > max_capacity()) { return CAPACITY; } if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } // string_capacity copied from document::allocate _capacity = 0; size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); #if SIMDJSON_DEVELOPMENT_CHECKS start_positions.reset(new (std::nothrow) token_position[new_max_depth]); #endif if (implementation) { SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); } else { SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); } _capacity = new_capacity; _max_depth = new_max_depth; return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); // Allocate if needed if (capacity() < json.length() || !string_buf) { SIMDJSON_TRY( allocate(json.length(), max_depth()) ); } // Run stage 1. SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); return document::start({ reinterpret_cast(json.data()), this }); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { return iterate(padded_string_view(json, allocated)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { if(json.capacity() - json.size() < SIMDJSON_PADDING) { json.reserve(json.size() + SIMDJSON_PADDING); } return iterate(padded_string_view(json)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { return iterate(padded_string_view(json)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception SIMDJSON_TRY( result.error() ); padded_string_view json = result.value_unsafe(); return iterate(json); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception SIMDJSON_TRY( result.error() ); const padded_string &json = result.value_unsafe(); return iterate(json); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); // Allocate if needed if (capacity() < json.length()) { SIMDJSON_TRY( allocate(json.length(), max_depth()) ); } // Run stage 1. SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); return json_iterator(reinterpret_cast(json.data()), this); } inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { buf += 3; len -= 3; } if(allow_comma_separated && batch_size < len) { batch_size = len; } return document_stream(*this, buf, len, batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } simdjson_inline size_t parser::max_depth() const noexcept { return _max_depth; } simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { _max_capacity = max_capacity; } else { _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; } } simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); if (!end) { return STRING_ERROR; } std::string_view result(reinterpret_cast(dst), end-dst); dst = end; return result; } simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); if (!end) { return STRING_ERROR; } std::string_view result(reinterpret_cast(dst), end-dst); dst = end; return result; } } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::parser &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* end file simdjson/generic/ondemand/parser-inl.h for fallback */ /* including simdjson/generic/ondemand/raw_json_string-inl.h for fallback: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ /* begin file simdjson/generic/ondemand/raw_json_string-inl.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; // if the content has no escape character, just scan through it quickly! for(;pos < target.size() && target[pos] != '\\';pos++) {} // slow path may begin. bool escaping{false}; for(;pos < target.size();pos++) { if((target[pos] == '"') && !escaping) { return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } return true; } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { size_t pos{0}; // if the content has no escape character, just scan through it quickly! for(;target[pos] && target[pos] != '\\';pos++) {} // slow path may begin. bool escaping{false}; for(;target[pos];pos++) { if((target[pos] == '"') && !escaping) { return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } return true; } simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { // If we are going to call memcmp, then we must know something about the length of the raw_json_string. return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { // Assumptions: does not contain unescaped quote characters, and // the raw content is quote terminated within a valid JSON string. if(target.size() <= SIMDJSON_PADDING) { return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } const char * r{raw()}; size_t pos{0}; for(;pos < target.size();pos++) { if(r[pos] != target[pos]) { return false; } } if(r[pos] != '"') { return false; } return true; } simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { const char * r{raw()}; size_t pos{0}; bool escaping{false}; for(;pos < target.size();pos++) { if(r[pos] != target[pos]) { return false; } // if target is a compile-time constant and it is free from // quotes, then the next part could get optimized away through // inlining. if((target[pos] == '"') && !escaping) { // We have reached the end of the raw_json_string but // the target is not done. return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } if(r[pos] != '"') { return false; } return true; } simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and // the raw content is quote terminated within a valid JSON string. const char * r{raw()}; size_t pos{0}; for(;target[pos];pos++) { if(r[pos] != target[pos]) { return false; } } if(r[pos] != '"') { return false; } return true; } simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { // Assumptions: does not contain unescaped quote characters, and // the raw content is quote terminated within a valid JSON string. const char * r{raw()}; size_t pos{0}; bool escaping{false}; for(;target[pos];pos++) { if(r[pos] != target[pos]) { return false; } // if target is a compile-time constant and it is free from // quotes, then the next part could get optimized away through // inlining. if((target[pos] == '"') && !escaping) { // We have reached the end of the raw_json_string but // the target is not done. return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } if(r[pos] != '"') { return false; } return true; } simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { return a.unsafe_is_equal(c); } simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { return a == c; } simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { return !(a == c); } simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { return !(a == c); } simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { return iter.unescape(*this, allow_replacement); } simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { return iter.unescape_wobbly(*this); } simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { bool in_escape = false; const char *s = str.raw(); while (true) { switch (*s) { case '\\': in_escape = !in_escape; break; case '"': if (in_escape) { in_escape = false; } else { return out; } break; default: if (in_escape) { in_escape = false; } } out << *s; s++; } } } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::raw_json_string &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} simdjson_inline simdjson_result simdjson_result::raw() const noexcept { if (error()) { return error(); } return first.raw(); } simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(fallback::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { if (error()) { return error(); } return first.unescape(iter, allow_replacement); } simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(fallback::ondemand::json_iterator &iter) const noexcept { if (error()) { return error(); } return first.unescape_wobbly(iter); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* end file simdjson/generic/ondemand/raw_json_string-inl.h for fallback */ /* including simdjson/generic/ondemand/serialization-inl.h for fallback: #include "simdjson/generic/ondemand/serialization-inl.h" */ /* begin file simdjson/generic/ondemand/serialization-inl.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { inline std::string_view trim(const std::string_view str) noexcept { // We can almost surely do better by rolling our own find_first_not_of function. size_t first = str.find_first_not_of(" \t\n\r"); // If we have the empty string (just white space), then no trimming is possible, and // we return the empty string_view. if (std::string_view::npos == first) { return std::string_view(); } size_t last = str.find_last_not_of(" \t\n\r"); return str.substr(first, (last - first + 1)); } inline simdjson_result to_json_string(fallback::ondemand::document& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(fallback::ondemand::document_reference& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(fallback::ondemand::value& x) noexcept { /** * If we somehow receive a value that has already been consumed, * then the following code could be in trouble. E.g., we create * an array as needed, but if an array was already created, then * it could be bad. */ using namespace fallback::ondemand; fallback::ondemand::json_type t; auto error = x.type().get(t); if(error != SUCCESS) { return error; } switch (t) { case json_type::array: { fallback::ondemand::array array; error = x.get_array().get(array); if(error) { return error; } return to_json_string(array); } case json_type::object: { fallback::ondemand::object object; error = x.get_object().get(object); if(error) { return error; } return to_json_string(object); } default: return trim(x.raw_json_token()); } } inline simdjson_result to_json_string(fallback::ondemand::object& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(fallback::ondemand::array& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } } // namespace simdjson namespace simdjson { namespace fallback { namespace ondemand { #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::value x) { std::string_view v; auto error = simdjson::to_json_string(x).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::value x) { std::string_view v; auto error = simdjson::to_json_string(x).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::array value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::array value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document_reference& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::object value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::object value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif }}} // namespace simdjson::fallback::ondemand #endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* end file simdjson/generic/ondemand/serialization-inl.h for fallback */ /* including simdjson/generic/ondemand/token_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/token_iterator-inl.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { simdjson_inline token_iterator::token_iterator( const uint8_t *_buf, token_position position ) noexcept : buf{_buf}, _position{position} { } simdjson_inline uint32_t token_iterator::current_offset() const noexcept { return *(_position); } simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { return &buf[*(_position++)]; } simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { return &buf[*position]; } simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { return *position; } simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { return *(position+1) - *position; } simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { return &buf[*(_position+delta)]; } simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { return *(_position+delta); } simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { return *(_position+delta+1) - *(_position+delta); } simdjson_inline token_position token_iterator::position() const noexcept { return _position; } simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { _position = target_position; } simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { return _position == other._position; } simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { return _position != other._position; } simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { return _position > other._position; } simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { return _position >= other._position; } simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { return _position < other._position; } simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { return _position <= other._position; } } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::token_iterator &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H /* end file simdjson/generic/ondemand/token_iterator-inl.h for fallback */ /* including simdjson/generic/ondemand/value-inl.h for fallback: #include "simdjson/generic/ondemand/value-inl.h" */ /* begin file simdjson/generic/ondemand/value-inl.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { simdjson_inline value::value(const value_iterator &_iter) noexcept : iter{_iter} { } simdjson_inline value value::start(const value_iterator &iter) noexcept { return iter; } simdjson_inline value value::resume(const value_iterator &iter) noexcept { return iter; } simdjson_inline simdjson_result value::get_array() noexcept { return array::start(iter); } simdjson_inline simdjson_result value::get_object() noexcept { return object::start(iter); } simdjson_inline simdjson_result value::start_or_resume_object() noexcept { if (iter.at_start()) { return get_object(); } else { return object::resume(iter); } } simdjson_inline simdjson_result value::get_raw_json_string() noexcept { return iter.get_raw_json_string(); } simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { return iter.get_string(allow_replacement); } template simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { return iter.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result value::get_wobbly_string() noexcept { return iter.get_wobbly_string(); } simdjson_inline simdjson_result value::get_double() noexcept { return iter.get_double(); } simdjson_inline simdjson_result value::get_double_in_string() noexcept { return iter.get_double_in_string(); } simdjson_inline simdjson_result value::get_uint64() noexcept { return iter.get_uint64(); } simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { return iter.get_uint64_in_string(); } simdjson_inline simdjson_result value::get_int64() noexcept { return iter.get_int64(); } simdjson_inline simdjson_result value::get_int64_in_string() noexcept { return iter.get_int64_in_string(); } simdjson_inline simdjson_result value::get_bool() noexcept { return iter.get_bool(); } simdjson_inline simdjson_result value::is_null() noexcept { return iter.is_null(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } template simdjson_inline error_code value::get(T &out) noexcept { return get().get(out); } #if SIMDJSON_EXCEPTIONS simdjson_inline value::operator array() noexcept(false) { return get_array(); } simdjson_inline value::operator object() noexcept(false) { return get_object(); } simdjson_inline value::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline value::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline value::operator double() noexcept(false) { return get_double(); } simdjson_inline value::operator std::string_view() noexcept(false) { return get_string(false); } simdjson_inline value::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } simdjson_inline value::operator bool() noexcept(false) { return get_bool(); } #endif simdjson_inline simdjson_result value::begin() & noexcept { return get_array().begin(); } simdjson_inline simdjson_result value::end() & noexcept { return {}; } simdjson_inline simdjson_result value::count_elements() & noexcept { simdjson_result answer; auto a = get_array(); answer = a.count_elements(); // count_elements leaves you pointing inside the array, at the first element. // We need to move back so that the user can create a new array (which requires that // we point at '['). iter.move_at_start(); return answer; } simdjson_inline simdjson_result value::count_fields() & noexcept { simdjson_result answer; auto a = get_object(); answer = a.count_fields(); iter.move_at_start(); return answer; } simdjson_inline simdjson_result value::at(size_t index) noexcept { auto a = get_array(); return a.at(index); } simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result value::find_field(const char *key) noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { return start_or_resume_object()[key]; } simdjson_inline simdjson_result value::operator[](const char *key) noexcept { return start_or_resume_object()[key]; } simdjson_inline simdjson_result value::type() noexcept { return iter.type(); } simdjson_inline simdjson_result value::is_scalar() noexcept { json_type this_type; auto error = type().get(this_type); if(error) { return error; } return ! ((this_type == json_type::array) || (this_type == json_type::object)); } simdjson_inline bool value::is_negative() noexcept { return iter.is_negative(); } simdjson_inline simdjson_result value::is_integer() noexcept { return iter.is_integer(); } simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { return iter.get_number_type(); } simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { return iter.get_number(); } simdjson_inline std::string_view value::raw_json_token() noexcept { return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); } simdjson_inline simdjson_result value::raw_json() noexcept { json_type t; SIMDJSON_TRY(type().get(t)); switch (t) { case json_type::array: { ondemand::array array; SIMDJSON_TRY(get_array().get(array)); return array.raw_json(); } case json_type::object: { ondemand::object object; SIMDJSON_TRY(get_object().get(object)); return object.raw_json(); } default: return raw_json_token(); } } simdjson_inline simdjson_result value::current_location() noexcept { return iter.json_iter().current_location(); } simdjson_inline int32_t value::current_depth() const noexcept{ return iter.json_iter().depth(); } simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { json_type t; SIMDJSON_TRY(type().get(t)); switch (t) { case json_type::array: return (*this).get_array().at_pointer(json_pointer); case json_type::object: return (*this).get_object().at_pointer(json_pointer); default: return INVALID_JSON_POINTER; } } } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( fallback::ondemand::value &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() & noexcept { if (error()) { return error(); } return {}; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::get_array() noexcept { if (error()) { return error(); } return first.get_array(); } simdjson_inline simdjson_result simdjson_result::get_object() noexcept { if (error()) { return error(); } return first.get_object(); } simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } template simdjson_inline simdjson_result simdjson_result::get() noexcept { if (error()) { return error(); } return first.get(); } template simdjson_inline error_code simdjson_result::get(T &out) noexcept { if (error()) { return error(); } return first.get(out); } template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { if (error()) { return error(); } return std::move(first); } template<> simdjson_inline error_code simdjson_result::get(fallback::ondemand::value &out) noexcept { if (error()) { return error(); } out = first; return SUCCESS; } simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS simdjson_inline simdjson_result::operator fallback::ondemand::array() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator fallback::ondemand::object() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator fallback::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); } simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { if (error()) { return error(); } return first.current_depth(); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H /* end file simdjson/generic/ondemand/value-inl.h for fallback */ /* including simdjson/generic/ondemand/value_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/value_iterator-inl.h for fallback */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { simdjson_inline value_iterator::value_iterator( json_iterator *json_iter, depth_t depth, token_position start_position ) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} { } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { SIMDJSON_TRY( start_container('{', "Not an object", "object") ); return started_object(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { SIMDJSON_TRY( start_container('{', "Not an object", "object") ); return started_root_object(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { assert_at_container_start(); #if SIMDJSON_DEVELOPMENT_CHECKS _json_iter->set_start_position(_depth, start_position()); #endif if (*_json_iter->peek() == '}') { logger::log_value(*_json_iter, "empty object"); _json_iter->return_current_and_advance(); end_container(); return false; } return true; } simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { // When in streaming mode, we cannot expect peek_last() to be the last structural element of the // current document. It only works in the normal mode where we have indexed a single document. // Note that adding a check for 'streaming' is not expensive since we only have at most // one root element. if ( ! _json_iter->streaming() ) { // The following lines do not fully protect against garbage content within the // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should // call `at_end()` on the document instance at the end of the processing to // ensure that the processing has finished at the end. // if (*_json_iter->peek_last() != '}') { _json_iter->abandon(); return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); } // If the last character is } *and* the first gibberish character is also '}' // then on-demand could accidentally go over. So we need additional checks. // https://github.com/simdjson/simdjson/issues/1834 // Checking that the document is balanced requires a full scan which is potentially // expensive, but it only happens in edge cases where the first padding character is // a closing bracket. if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { _json_iter->abandon(); // The exact error would require more work. It will typically be an unclosed object. return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); } } return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { auto error = check_root_object(); if(error) { return error; } return started_object(); } simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { #if SIMDJSON_CHECK_EOF if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } #endif // SIMDJSON_CHECK_EOF _json_iter->ascend_to(depth()-1); return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { assert_at_next(); // It's illegal to call this unless there are more tokens: anything that ends in } or ] is // obligated to verify there are more tokens if they are not the top level. switch (*_json_iter->return_current_and_advance()) { case '}': logger::log_end_value(*_json_iter, "object"); SIMDJSON_TRY( end_container() ); return false; case ',': return true; default: return report_error(TAPE_ERROR, "Missing comma between object fields"); } } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { error_code error; bool has_value; // // Initially, the object can be in one of a few different places: // // 1. The start of the object, at the first field: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2, index 1) // ``` if (at_first_field()) { has_value = true; // // 2. When a previous search did not yield a value or the object is empty: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 0) // { } // ^ (depth 0, index 2) // ``` // } else if (!is_open()) { #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif return false; // 3. When a previous search found a field or an iterator yielded a value: // // ``` // // When a field was not fully consumed (or not even touched at all) // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2) // // When a field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // // When the last field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // ``` // } else { if ((error = skip_child() )) { abandon(); return error; } if ((error = has_next_field().get(has_value) )) { abandon(); return error; } #if SIMDJSON_DEVELOPMENT_CHECKS if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif } while (has_value) { // Get the key and colon, stopping at the value. raw_json_string actual_key; // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. // field_key() advances the pointer and checks that '"' is found (corresponding to a key). // The depth is left unchanged by field_key(). if ((error = field_key().get(actual_key) )) { abandon(); return error; }; // field_value() will advance and check that we find a ':' separating the // key and the value. It will also increment the depth by one. if ((error = field_value() )) { abandon(); return error; } // If it matches, stop and return // We could do it this way if we wanted to allow arbitrary // key content (including escaped quotes). //if (actual_key.unsafe_is_equal(max_key_length, key)) { // Instead we do the following which may trigger buffer overruns if the // user provides an adversarial key (containing a well placed unescaped quote // character and being longer than the number of bytes remaining in the JSON // input). if (actual_key.unsafe_is_equal(key)) { logger::log_event(*this, "match", key, -2); // If we return here, then we return while pointing at the ':' that we just checked. return true; } // No match: skip the value and see if , or } is next logger::log_event(*this, "no match", key, -2); // The call to skip_child is meant to skip over the value corresponding to the key. // After skip_child(), we are right before the next comma (',') or the final brace ('}'). SIMDJSON_TRY( skip_child() ); // Skip the value entirely // The has_next_field() advances the pointer and check that either ',' or '}' is found. // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, // then we are in error and we abort. if ((error = has_next_field().get(has_value) )) { abandon(); return error; } } // If the loop ended, we're out of fields to look at. return false; } SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { /** * When find_field_unordered_raw is called, we can either be pointing at the * first key, pointing outside (at the closing brace) or if a key was matched * we can be either pointing right afterthe ':' right before the value (that we need skip), * or we may have consumed the value and we might be at a comma or at the * final brace (ready for a call to has_next_field()). */ error_code error; bool has_value; // First, we scan from that point to the end. // If we don't find a match, we may loop back around, and scan from the beginning to that point. token_position search_start = _json_iter->position(); // We want to know whether we need to go back to the beginning. bool at_first = at_first_field(); /////////////// // Initially, the object can be in one of a few different places: // // 1. At the first key: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2, index 1) // ``` // if (at_first) { has_value = true; // 2. When a previous search did not yield a value or the object is empty: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 0) // { } // ^ (depth 0, index 2) // ``` // } else if (!is_open()) { #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif SIMDJSON_TRY(reset_object().get(has_value)); at_first = true; // 3. When a previous search found a field or an iterator yielded a value: // // ``` // // When a field was not fully consumed (or not even touched at all) // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2) // // When a field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // // When the last field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // ``` // } else { // If someone queried a key but they not did access the value, then we are left pointing // at the ':' and we need to move forward through the value... If the value was // processed then skip_child() does not move the iterator (but may adjust the depth). if ((error = skip_child() )) { abandon(); return error; } search_start = _json_iter->position(); if ((error = has_next_field().get(has_value) )) { abandon(); return error; } #if SIMDJSON_DEVELOPMENT_CHECKS if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif } // After initial processing, we will be in one of two states: // // ``` // // At the beginning of a field // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // // At the end of the object // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 0) // ``` // // Next, we find a match starting from the current position. while (has_value) { SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field // Get the key and colon, stopping at the value. raw_json_string actual_key; // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. // field_key() advances the pointer and checks that '"' is found (corresponding to a key). // The depth is left unchanged by field_key(). if ((error = field_key().get(actual_key) )) { abandon(); return error; }; // field_value() will advance and check that we find a ':' separating the // key and the value. It will also increment the depth by one. if ((error = field_value() )) { abandon(); return error; } // If it matches, stop and return // We could do it this way if we wanted to allow arbitrary // key content (including escaped quotes). // if (actual_key.unsafe_is_equal(max_key_length, key)) { // Instead we do the following which may trigger buffer overruns if the // user provides an adversarial key (containing a well placed unescaped quote // character and being longer than the number of bytes remaining in the JSON // input). if (actual_key.unsafe_is_equal(key)) { logger::log_event(*this, "match", key, -2); // If we return here, then we return while pointing at the ':' that we just checked. return true; } // No match: skip the value and see if , or } is next logger::log_event(*this, "no match", key, -2); // The call to skip_child is meant to skip over the value corresponding to the key. // After skip_child(), we are right before the next comma (',') or the final brace ('}'). SIMDJSON_TRY( skip_child() ); // The has_next_field() advances the pointer and check that either ',' or '}' is found. // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, // then we are in error and we abort. if ((error = has_next_field().get(has_value) )) { abandon(); return error; } } // Performance note: it maybe wasteful to rewind to the beginning when there might be // no other query following. Indeed, it would require reskipping the whole object. // Instead, you can just stay where you are. If there is a new query, there is always time // to rewind. if(at_first) { return false; } // If we reach the end without finding a match, search the rest of the fields starting at the // beginning of the object. // (We have already run through the object before, so we've already validated its structure. We // don't check errors in this bit.) SIMDJSON_TRY(reset_object().get(has_value)); while (true) { SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field // Get the key and colon, stopping at the value. raw_json_string actual_key; // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. // field_key() advances the pointer and checks that '"' is found (corresponding to a key). // The depth is left unchanged by field_key(). error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); // field_value() will advance and check that we find a ':' separating the // key and the value. It will also increment the depth by one. error = field_value(); SIMDJSON_ASSUME(!error); // If it matches, stop and return // We could do it this way if we wanted to allow arbitrary // key content (including escaped quotes). // if (actual_key.unsafe_is_equal(max_key_length, key)) { // Instead we do the following which may trigger buffer overruns if the // user provides an adversarial key (containing a well placed unescaped quote // character and being longer than the number of bytes remaining in the JSON // input). if (actual_key.unsafe_is_equal(key)) { logger::log_event(*this, "match", key, -2); // If we return here, then we return while pointing at the ':' that we just checked. return true; } // No match: skip the value and see if , or } is next logger::log_event(*this, "no match", key, -2); // The call to skip_child is meant to skip over the value corresponding to the key. // After skip_child(), we are right before the next comma (',') or the final brace ('}'). SIMDJSON_TRY( skip_child() ); // If we reached the end of the key-value pair we started from, then we know // that the key is not there so we return false. We are either right before // the next comma or the final brace. if(_json_iter->position() == search_start) { return false; } // The has_next_field() advances the pointer and check that either ',' or '}' is found. // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, // then we are in error and we abort. error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); // If we make the mistake of exiting here, then we could be left pointing at a key // in the middle of an object. That's not an allowable state. } // If the loop ended, we're out of fields to look at. The program should // never reach this point. return false; } SIMDJSON_POP_DISABLE_WARNINGS simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { assert_at_next(); const uint8_t *key = _json_iter->return_current_and_advance(); if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } return raw_json_string(key); } simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { assert_at_next(); if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } _json_iter->descend_to(depth()+1); return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { SIMDJSON_TRY( start_container('[', "Not an array", "array") ); return started_array(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { SIMDJSON_TRY( start_container('[', "Not an array", "array") ); return started_root_array(); } inline std::string value_iterator::to_string() const noexcept { auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); if(_json_iter != nullptr) { answer += _json_iter->to_string(); } answer += std::string(" ]"); return answer; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { assert_at_container_start(); if (*_json_iter->peek() == ']') { logger::log_value(*_json_iter, "empty array"); _json_iter->return_current_and_advance(); SIMDJSON_TRY( end_container() ); return false; } _json_iter->descend_to(depth()+1); #if SIMDJSON_DEVELOPMENT_CHECKS _json_iter->set_start_position(_depth, start_position()); #endif return true; } simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { // When in streaming mode, we cannot expect peek_last() to be the last structural element of the // current document. It only works in the normal mode where we have indexed a single document. // Note that adding a check for 'streaming' is not expensive since we only have at most // one root element. if ( ! _json_iter->streaming() ) { // The following lines do not fully protect against garbage content within the // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should // also call `at_end()` on the document instance at the end of the processing to // ensure that the processing has finished at the end. // if (*_json_iter->peek_last() != ']') { _json_iter->abandon(); return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); } // If the last character is ] *and* the first gibberish character is also ']' // then on-demand could accidentally go over. So we need additional checks. // https://github.com/simdjson/simdjson/issues/1834 // Checking that the document is balanced requires a full scan which is potentially // expensive, but it only happens in edge cases where the first padding character is // a closing bracket. if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { _json_iter->abandon(); // The exact error would require more work. It will typically be an unclosed array. return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); } } return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { auto error = check_root_array(); if (error) { return error; } return started_array(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { assert_at_next(); logger::log_event(*this, "has_next_element"); switch (*_json_iter->return_current_and_advance()) { case ']': logger::log_end_value(*_json_iter, "array"); SIMDJSON_TRY( end_container() ); return false; case ',': _json_iter->descend_to(depth()+1); return true; default: return report_error(TAPE_ERROR, "Missing comma between array elements"); } } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { auto not_true = atomparsing::str4ncmp(json, "true"); auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); if (error) { return incorrect_type_error("Not a boolean"); } return simdjson_result(!not_true); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); // if we start with 'n', we must be a null if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } return is_null_string; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { return get_raw_json_string().unescape(json_iter(), allow_replacement); } template simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { std::string_view content; auto err = get_string(allow_replacement).get(content); if (err) { return err; } receiver = content; return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { return get_raw_json_string().unescape_wobbly(json_iter()); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { auto json = peek_scalar("string"); if (*json != '"') { return incorrect_type_error("Not a string"); } advance_scalar("string"); return raw_json_string(json+1); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { auto result = numberparsing::parse_double(peek_non_root_scalar("double")); if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { auto result = parse_bool(peek_non_root_scalar("bool")); if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } return result; } simdjson_inline simdjson_result value_iterator::is_null() noexcept { bool is_null_value; SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); if(is_null_value) { advance_non_root_scalar("null"); } return is_null_value; } simdjson_inline bool value_iterator::is_negative() noexcept { return numberparsing::is_negative(peek_non_root_scalar("numbersign")); } simdjson_inline bool value_iterator::is_root_negative() noexcept { return numberparsing::is_negative(peek_root_scalar("numbersign")); } simdjson_inline simdjson_result value_iterator::is_integer() noexcept { return numberparsing::is_integer(peek_non_root_scalar("integer")); } simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { return numberparsing::get_number_type(peek_non_root_scalar("integer")); } simdjson_inline simdjson_result value_iterator::get_number() noexcept { number num; error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); if(error) { return error; } return num; } simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("is_root_integer"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { return false; // if there are more than 20 characters, it cannot be represented as an integer. } auto answer = numberparsing::is_integer(tmpbuf); // If the parsing was a success, we must still check that it is // a single scalar. Note that we parse first because of cases like '[]' where // getting TRAILING_CONTENT is wrong. if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } return answer; } simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } auto answer = numberparsing::get_number_type(tmpbuf); if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } return answer; } simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } number num; error_code error = numberparsing::parse_number(tmpbuf, num); if(error) { return error; } if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("number"); return num; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); } template simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { std::string_view content; auto err = get_root_string(check_trailing, allow_replacement).get(content); if (err) { return err; } receiver = content; return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { auto json = peek_scalar("string"); if (*json != '"') { return incorrect_type_error("Not a string"); } if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_scalar("string"); return raw_json_string(json+1); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_unsigned(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_unsigned_in_string(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_integer(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_integer_in_string(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_double(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_double_in_string(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("bool"); uint8_t tmpbuf[5+1+1]; // +1 for null termination tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } auto result = parse_bool(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("bool"); } return result; } simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("null"); bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); if(result) { // we have something that looks like a null. if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("null"); } return result; } simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); return _json_iter->skip_child(depth()); } simdjson_inline value_iterator value_iterator::child() const noexcept { assert_at_child(); return { _json_iter, depth()+1, _json_iter->token.position() }; } // GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller // relating depth and iterator depth, which is a desired effect. It does not happen if is_open is // marked non-inline. SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_inline bool value_iterator::is_open() const noexcept { return _json_iter->depth() >= depth(); } SIMDJSON_POP_DISABLE_WARNINGS simdjson_inline bool value_iterator::at_end() const noexcept { return _json_iter->at_end(); } simdjson_inline bool value_iterator::at_start() const noexcept { return _json_iter->token.position() == start_position(); } simdjson_inline bool value_iterator::at_first_field() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); return _json_iter->token.position() == start_position() + 1; } simdjson_inline void value_iterator::abandon() noexcept { _json_iter->abandon(); } simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { return _depth; } simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { return _json_iter->error; } simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { return _json_iter->string_buf_loc(); } simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { return *_json_iter; } simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { return *_json_iter; } simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { return _json_iter->peek(start_position()); } simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { return _json_iter->peek_length(start_position()); } simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. if (!is_at_start()) { return peek_start(); } // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. assert_at_start(); return _json_iter->peek(); } simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. if (!is_at_start()) { return; } // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. assert_at_start(); _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); } simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { logger::log_start_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. const uint8_t *json; if (!is_at_start()) { #if SIMDJSON_DEVELOPMENT_CHECKS if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif json = peek_start(); if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } } else { assert_at_start(); /** * We should be prudent. Let us peek. If it is not the right type, we * return an error. Only once we have determined that we have the right * type are we allowed to advance! */ json = _json_iter->peek(); if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } _json_iter->return_current_and_advance(); } return SUCCESS; } simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return peek_start(); } assert_at_root(); return _json_iter->peek(); } simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return peek_start(); } assert_at_non_root_start(); return _json_iter->peek(); } simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return; } assert_at_root(); _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); } simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return; } assert_at_non_root_start(); _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); } simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { logger::log_error(*_json_iter, start_position(), depth(), message); return INCORRECT_TYPE; } simdjson_inline bool value_iterator::is_at_start() const noexcept { return position() == start_position(); } simdjson_inline bool value_iterator::is_at_key() const noexcept { // Keys are at the same depth as the object. // Note here that we could be safer and check that we are within an object, // but we do not. return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; } simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). auto delta = position() - start_position(); return delta == 1 || delta == 2; } inline void value_iterator::assert_at_start() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth ); SIMDJSON_ASSUME( _depth > 0 ); } inline void value_iterator::assert_at_container_start() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); SIMDJSON_ASSUME( _json_iter->_depth == _depth ); SIMDJSON_ASSUME( _depth > 0 ); } inline void value_iterator::assert_at_next() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth ); SIMDJSON_ASSUME( _depth > 0 ); } simdjson_inline void value_iterator::move_at_start() noexcept { _json_iter->_depth = _depth; _json_iter->token.set_position(_start_position); } simdjson_inline void value_iterator::move_at_container_start() noexcept { _json_iter->_depth = _depth; _json_iter->token.set_position(_start_position + 1); } simdjson_inline simdjson_result value_iterator::reset_array() noexcept { if(error()) { return error(); } move_at_container_start(); return started_array(); } simdjson_inline simdjson_result value_iterator::reset_object() noexcept { if(error()) { return error(); } move_at_container_start(); return started_object(); } inline void value_iterator::assert_at_child() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); SIMDJSON_ASSUME( _depth > 0 ); } inline void value_iterator::assert_at_root() const noexcept { assert_at_start(); SIMDJSON_ASSUME( _depth == 1 ); } inline void value_iterator::assert_at_non_root_start() const noexcept { assert_at_start(); SIMDJSON_ASSUME( _depth > 1 ); } inline void value_iterator::assert_is_valid() const noexcept { SIMDJSON_ASSUME( _json_iter != nullptr ); } simdjson_inline bool value_iterator::is_valid() const noexcept { return _json_iter != nullptr; } simdjson_inline simdjson_result value_iterator::type() const noexcept { switch (*peek_start()) { case '{': return json_type::object; case '[': return json_type::array; case '"': return json_type::string; case 'n': return json_type::null; case 't': case 'f': return json_type::boolean; case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return json_type::number; default: return TAPE_ERROR; } } simdjson_inline token_position value_iterator::start_position() const noexcept { return _start_position; } simdjson_inline token_position value_iterator::position() const noexcept { return _json_iter->position(); } simdjson_inline token_position value_iterator::end_position() const noexcept { return _json_iter->end_position(); } simdjson_inline token_position value_iterator::last_position() const noexcept { return _json_iter->last_position(); } simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { return _json_iter->report_error(error, message); } } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::value_iterator &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* end file simdjson/generic/ondemand/value_iterator-inl.h for fallback */ /* end file simdjson/generic/ondemand/amalgamated.h for fallback */ /* including simdjson/fallback/end.h: #include "simdjson/fallback/end.h" */ /* begin file simdjson/fallback/end.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ /* undefining SIMDJSON_IMPLEMENTATION from "fallback" */ #undef SIMDJSON_IMPLEMENTATION /* end file simdjson/fallback/end.h */ #endif // SIMDJSON_FALLBACK_ONDEMAND_H /* end file simdjson/fallback/ondemand.h */ #elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(haswell) /* including simdjson/haswell/ondemand.h: #include "simdjson/haswell/ondemand.h" */ /* begin file simdjson/haswell/ondemand.h */ #ifndef SIMDJSON_HASWELL_ONDEMAND_H #define SIMDJSON_HASWELL_ONDEMAND_H /* including simdjson/haswell/begin.h: #include "simdjson/haswell/begin.h" */ /* begin file simdjson/haswell/begin.h */ /* defining SIMDJSON_IMPLEMENTATION to "haswell" */ #define SIMDJSON_IMPLEMENTATION haswell /* including simdjson/haswell/base.h: #include "simdjson/haswell/base.h" */ /* begin file simdjson/haswell/base.h */ #ifndef SIMDJSON_HASWELL_BASE_H #define SIMDJSON_HASWELL_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ // The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL namespace simdjson { /** * Implementation for Haswell (Intel AVX2). */ namespace haswell { class implementation; namespace { namespace simd { template struct simd8; template struct simd8x64; } // namespace simd } // unnamed namespace } // namespace haswell } // namespace simdjson #endif // SIMDJSON_HASWELL_BASE_H /* end file simdjson/haswell/base.h */ /* including simdjson/haswell/intrinsics.h: #include "simdjson/haswell/intrinsics.h" */ /* begin file simdjson/haswell/intrinsics.h */ #ifndef SIMDJSON_HASWELL_INTRINSICS_H #define SIMDJSON_HASWELL_INTRINSICS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #if SIMDJSON_VISUAL_STUDIO // under clang within visual studio, this will include #include // visual studio or clang #else #include // elsewhere #endif // SIMDJSON_VISUAL_STUDIO #if SIMDJSON_CLANG_VISUAL_STUDIO /** * You are not supposed, normally, to include these * headers directly. Instead you should either include intrin.h * or x86intrin.h. However, when compiling with clang * under Windows (i.e., when _MSC_VER is set), these headers * only get included *if* the corresponding features are detected * from macros: * e.g., if __AVX2__ is set... in turn, we normally set these * macros by compiling against the corresponding architecture * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole * software with these advanced instructions. In simdjson, we * want to compile the whole program for a generic target, * and only target our specific kernels. As a workaround, * we directly include the needed headers. These headers would * normally guard against such usage, but we carefully included * (or ) before, so the headers * are fooled. */ #include // for _blsr_u64 #include // for __lzcnt64 #include // for most things (AVX2, AVX512, _popcnt64) #include #include #include #include #include // for _mm_clmulepi64_si128 // unfortunately, we may not get _blsr_u64, but, thankfully, clang // has it as a macro. #ifndef _blsr_u64 // we roll our own #define _blsr_u64(n) ((n - 1) & n) #endif // _blsr_u64 #endif // SIMDJSON_CLANG_VISUAL_STUDIO static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel."); #endif // SIMDJSON_HASWELL_INTRINSICS_H /* end file simdjson/haswell/intrinsics.h */ #if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt") #endif /* including simdjson/haswell/bitmanipulation.h: #include "simdjson/haswell/bitmanipulation.h" */ /* begin file simdjson/haswell/bitmanipulation.h */ #ifndef SIMDJSON_HASWELL_BITMANIPULATION_H #define SIMDJSON_HASWELL_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmask.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace { // We sometimes call trailing_zero on inputs that are zero, // but the algorithms do not end up using the returned value. // Sadly, sanitizers are not smart enough to figure it out. SIMDJSON_NO_SANITIZE_UNDEFINED // This function can be used safely even if not all bytes have been // initialized. // See issue https://github.com/simdjson/simdjson/issues/1965 SIMDJSON_NO_SANITIZE_MEMORY simdjson_inline int trailing_zeroes(uint64_t input_num) { #if SIMDJSON_REGULAR_VISUAL_STUDIO return (int)_tzcnt_u64(input_num); #else // SIMDJSON_REGULAR_VISUAL_STUDIO //////// // You might expect the next line to be equivalent to // return (int)_tzcnt_u64(input_num); // but the generated code differs and might be less efficient? //////// return __builtin_ctzll(input_num); #endif // SIMDJSON_REGULAR_VISUAL_STUDIO } /* result might be undefined when input_num is zero */ simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return _blsr_u64(input_num); } /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { return int(_lzcnt_u64(input_num)); } #if SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { // note: we do not support legacy 32-bit Windows in this kernel return __popcnt64(input_num);// Visual Studio wants two underscores } #else simdjson_inline long long int count_ones(uint64_t input_num) { return _popcnt64(input_num); } #endif simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { #if SIMDJSON_REGULAR_VISUAL_STUDIO return _addcarry_u64(0, value1, value2, reinterpret_cast(result)); #else return __builtin_uaddll_overflow(value1, value2, reinterpret_cast(result)); #endif } } // unnamed namespace } // namespace haswell } // namespace simdjson #endif // SIMDJSON_HASWELL_BITMANIPULATION_H /* end file simdjson/haswell/bitmanipulation.h */ /* including simdjson/haswell/bitmask.h: #include "simdjson/haswell/bitmask.h" */ /* begin file simdjson/haswell/bitmask.h */ #ifndef SIMDJSON_HASWELL_BITMASK_H #define SIMDJSON_HASWELL_BITMASK_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace { // // Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. // // For example, prefix_xor(00100100) == 00011100 // simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { // There should be no such thing with a processor supporting avx2 // but not clmul. __m128i all_ones = _mm_set1_epi8('\xFF'); __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); return _mm_cvtsi128_si64(result); } } // unnamed namespace } // namespace haswell } // namespace simdjson #endif // SIMDJSON_HASWELL_BITMASK_H /* end file simdjson/haswell/bitmask.h */ /* including simdjson/haswell/numberparsing_defs.h: #include "simdjson/haswell/numberparsing_defs.h" */ /* begin file simdjson/haswell/numberparsing_defs.h */ #ifndef SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H #define SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace numberparsing { /** @private */ static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { // this actually computes *16* values so we are being wasteful. const __m128i ascii0 = _mm_set1_epi8('0'); const __m128i mul_1_10 = _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); const __m128i mul_1_10000 = _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); const __m128i input = _mm_sub_epi8( _mm_loadu_si128(reinterpret_cast(chars)), ascii0); const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); const __m128i t3 = _mm_packus_epi32(t2, t2); const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); return _mm_cvtsi128_si32( t4); // only captures the sum of the first 8 digits, drop the rest } /** @private */ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { internal::value128 answer; #if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS #ifdef _M_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); answer.low = value1 * value2; #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 #endif // _M_ARM64 #else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); #endif return answer; } } // namespace numberparsing } // namespace haswell } // namespace simdjson #define SIMDJSON_SWAR_NUMBER_PARSING 1 #endif // SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H /* end file simdjson/haswell/numberparsing_defs.h */ /* including simdjson/haswell/simd.h: #include "simdjson/haswell/simd.h" */ /* begin file simdjson/haswell/simd.h */ #ifndef SIMDJSON_HASWELL_SIMD_H #define SIMDJSON_HASWELL_SIMD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace { namespace simd { // Forward-declared so they can be used by splat and friends. template struct base { __m256i value; // Zero constructor simdjson_inline base() : value{__m256i()} {} // Conversion from SIMD register simdjson_inline base(const __m256i _value) : value(_value) {} // Conversion to SIMD register simdjson_inline operator const __m256i&() const { return this->value; } simdjson_inline operator __m256i&() { return this->value; } // Bit operations simdjson_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } simdjson_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } simdjson_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } simdjson_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } }; // Forward-declared so they can be used by splat and friends. template struct simd8; template> struct base8: base> { typedef uint32_t bitmask_t; typedef uint64_t bitmask2_t; simdjson_inline base8() : base>() {} simdjson_inline base8(const __m256i _value) : base>(_value) {} friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } static const int SIZE = sizeof(base::value); template simdjson_inline simd8 prev(const simd8 prev_chunk) const { return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); } }; // SIMD byte mask type (returned by things like eq and gt) template<> struct simd8: base8 { static simdjson_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } simdjson_inline simd8() : base8() {} simdjson_inline simd8(const __m256i _value) : base8(_value) {} // Splat constructor simdjson_inline simd8(bool _value) : base8(splat(_value)) {} simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); } simdjson_inline simd8 operator~() const { return *this ^ true; } }; template struct base8_numeric: base8 { static simdjson_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } static simdjson_inline simd8 zero() { return _mm256_setzero_si256(); } static simdjson_inline simd8 load(const T values[32]) { return _mm256_loadu_si256(reinterpret_cast(values)); } // Repeat 16 values as many times as necessary (usually for lookup tables) static simdjson_inline simd8 repeat_16( T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } simdjson_inline base8_numeric() : base8() {} simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} // Store to array simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } // Addition/subtraction are the same for signed and unsigned simdjson_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } simdjson_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } // Override to distinguish from bool version simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) template simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return _mm256_shuffle_epi8(lookup_table, *this); } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes // get written. // Design consideration: it seems like a function with the // signature simd8 compress(uint32_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template simdjson_inline void compress(uint32_t mask, L * output) const { using internal::thintable_epi8; using internal::BitsSetTable256mul2; using internal::pshufb_combine_table; // this particular implementation was inspired by work done by @animetosho // we do it in four steps, first 8 bytes and then second 8 bytes... uint8_t mask1 = uint8_t(mask); // least significant 8 bits uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits uint8_t mask3 = uint8_t(mask >> 16); // ... uint8_t mask4 = uint8_t(mask >> 24); // ... // next line just loads the 64-bit values thintable_epi8[mask1] and // thintable_epi8[mask2] into a 128-bit register, using only // two instructions on most compilers. __m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3], thintable_epi8[mask2], thintable_epi8[mask1]); // we increment by 0x08 the second half of the mask and so forth shufmask = _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818, 0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0)); // this is the version "nearly pruned" __m256i pruned = _mm256_shuffle_epi8(*this, shufmask); // we still need to put the pieces back together. // we compute the popcount of the first words: int pop1 = BitsSetTable256mul2[mask1]; int pop3 = BitsSetTable256mul2[mask3]; // then load the corresponding mask // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic. __m256i v256 = _mm256_castsi128_si256( _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8))); __m256i compactmask = _mm256_insertf128_si256(v256, _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop3 * 8)), 1); __m256i almostthere = _mm256_shuffle_epi8(pruned, compactmask); // We just need to write out the result. // This is the tricky bit that is hard to do // if we want to return a SIMD register, since there // is no single-instruction approach to recombine // the two 128-bit lanes with an offset. __m128i v128; v128 = _mm256_castsi256_si128(almostthere); _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128); v128 = _mm256_extractf128_si256(almostthere, 1); _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128); } template simdjson_inline simd8 lookup_16( L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, L replace12, L replace13, L replace14, L replace15) const { return lookup_16(simd8::repeat_16( replace0, replace1, replace2, replace3, replace4, replace5, replace6, replace7, replace8, replace9, replace10, replace11, replace12, replace13, replace14, replace15 )); } }; // Signed bytes template<> struct simd8 : base8_numeric { simdjson_inline simd8() : base8_numeric() {} simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 ) : simd8(_mm256_setr_epi8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v16,v17,v18,v19,v20,v21,v22,v23, v24,v25,v26,v27,v28,v29,v30,v31 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Order-sensitive comparisons simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } simdjson_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } simdjson_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } }; // Unsigned bytes template<> struct simd8: base8_numeric { simdjson_inline simd8() : base8_numeric() {} simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Array constructor simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 ) : simd8(_mm256_setr_epi8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v16,v17,v18,v19,v20,v21,v22,v23, v24,v25,v26,v27,v28,v29,v30,v31 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Saturated math simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } // Order-specific operations simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } // Same as >, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } // Same as <, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } // Bit-specific operations simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } template simdjson_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } template simdjson_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } // Get one of the bits and make a bitmask out of it. // e.g. value.get_bit<7>() gets the high bit template simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } }; template struct simd8x64 { static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block."); const simd8 chunks[NUM_CHUNKS]; simd8x64(const simd8x64& o) = delete; // no copy allowed simd8x64& operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} simdjson_inline uint64_t compress(uint64_t mask, T * output) const { uint32_t mask1 = uint32_t(mask); uint32_t mask2 = uint32_t(mask >> 32); this->chunks[0].compress(mask1, output); this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); return 64 - count_ones(mask); } simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr+sizeof(simd8)*0); this->chunks[1].store(ptr+sizeof(simd8)*1); } simdjson_inline uint64_t to_bitmask() const { uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); uint64_t r_hi = this->chunks[1].to_bitmask(); return r_lo | (r_hi << 32); } simdjson_inline simd8 reduce_or() const { return this->chunks[0] | this->chunks[1]; } simdjson_inline simd8x64 bit_or(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] | mask, this->chunks[1] | mask ); } simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] == mask, this->chunks[1] == mask ).to_bitmask(); } simdjson_inline uint64_t eq(const simd8x64 &other) const { return simd8x64( this->chunks[0] == other.chunks[0], this->chunks[1] == other.chunks[1] ).to_bitmask(); } simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] <= mask, this->chunks[1] <= mask ).to_bitmask(); } }; // struct simd8x64 } // namespace simd } // unnamed namespace } // namespace haswell } // namespace simdjson #endif // SIMDJSON_HASWELL_SIMD_H /* end file simdjson/haswell/simd.h */ /* including simdjson/haswell/stringparsing_defs.h: #include "simdjson/haswell/stringparsing_defs.h" */ /* begin file simdjson/haswell/stringparsing_defs.h */ #ifndef SIMDJSON_HASWELL_STRINGPARSING_DEFS_H #define SIMDJSON_HASWELL_STRINGPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/simd.h" */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace { using namespace simd; // Holds backslashes and quotes locations. struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } uint32_t bs_bits; uint32_t quote_bits; }; // struct backslash_and_quote simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { // this can read up to 15 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); simd8 v(src); // store to dest unconditionally - we can overwrite the bits we don't like later v.store(dst); return { static_cast((v == '\\').to_bitmask()), // bs_bits static_cast((v == '"').to_bitmask()), // quote_bits }; } } // unnamed namespace } // namespace haswell } // namespace simdjson #endif // SIMDJSON_HASWELL_STRINGPARSING_DEFS_H /* end file simdjson/haswell/stringparsing_defs.h */ /* end file simdjson/haswell/begin.h */ /* including simdjson/generic/ondemand/amalgamated.h for haswell: #include "simdjson/generic/ondemand/amalgamated.h" */ /* begin file simdjson/generic/ondemand/amalgamated.h for haswell */ #if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) #error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! #endif // Stuff other things depend on /* including simdjson/generic/ondemand/base.h for haswell: #include "simdjson/generic/ondemand/base.h" */ /* begin file simdjson/generic/ondemand/base.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { /** * A fast, simple, DOM-like interface that parses JSON as you use it. * * Designed for maximum speed and a lower memory profile. */ namespace ondemand { /** Represents the depth of a JSON value (number of nested arrays/objects). */ using depth_t = int32_t; /** @copydoc simdjson::haswell::number_type */ using number_type = simdjson::haswell::number_type; /** @private Position in the JSON buffer indexes */ using token_position = const uint32_t *; class array; class array_iterator; class document; class document_reference; class document_stream; class field; class json_iterator; enum class json_type; struct number; class object; class object_iterator; class parser; class raw_json_string; class token_iterator; class value; class value_iterator; } // namespace ondemand } // namespace haswell } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H /* end file simdjson/generic/ondemand/base.h for haswell */ /* including simdjson/generic/ondemand/value_iterator.h for haswell: #include "simdjson/generic/ondemand/value_iterator.h" */ /* begin file simdjson/generic/ondemand/value_iterator.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { /** * Iterates through a single JSON value at a particular depth. * * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects * the caller to call the right ones. * * @private This is not intended for external use. */ class value_iterator { protected: /** The underlying JSON iterator */ json_iterator *_json_iter{}; /** The depth of this value */ depth_t _depth{}; /** * The starting token index for this value */ token_position _start_position{}; public: simdjson_inline value_iterator() noexcept = default; /** * Denote that we're starting a document. */ simdjson_inline void start_document() noexcept; /** * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. * * Optimized for scalars. */ simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; /** * Tell whether the iterator is at the EOF mark */ simdjson_inline bool at_end() const noexcept; /** * Tell whether the iterator is at the start of the value */ simdjson_inline bool at_start() const noexcept; /** * Tell whether the value is open--if the value has not been used, or the array/object is still open. */ simdjson_inline bool is_open() const noexcept; /** * Tell whether the value is at an object's first field (just after the {). */ simdjson_inline bool at_first_field() const noexcept; /** * Abandon all iteration. */ simdjson_inline void abandon() noexcept; /** * Get the child value as a value_iterator. */ simdjson_inline value_iterator child_value() const noexcept; /** * Get the depth of this value. */ simdjson_inline int32_t depth() const noexcept; /** * Get the JSON type of this value. * * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result type() const noexcept; /** * @addtogroup object Object iteration * * Methods to iterate and find object fields. These methods generally *assume* the value is * actually an object; the caller is responsible for keeping track of that fact. * * @{ */ /** * Start an object iteration. * * @returns Whether the object had any fields (returns false for empty). * @error INCORRECT_TYPE if there is no opening { */ simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; /** * Start an object iteration from the root. * * @returns Whether the object had any fields (returns false for empty). * @error INCORRECT_TYPE if there is no opening { * @error TAPE_ERROR if there is no matching } at end of document */ simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; /** * Checks whether an object could be started from the root. May be called by start_root_object. * * @returns SUCCESS if it is possible to safely start an object from the root (document level). * @error INCORRECT_TYPE if there is no opening { * @error TAPE_ERROR if there is no matching } at end of document */ simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; /** * Start an object iteration after the user has already checked and moved past the {. * * Does not move the iterator unless the object is empty ({}). * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** * Start an object iteration from the root, after the user has already checked and moved past the {. * * Does not move the iterator unless the object is empty ({}). * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; /** * Moves to the next field in an object. * * Looks for , and }. If } is found, the object is finished and the iterator advances past it. * Otherwise, it advances to the next value. * * @return whether there is another field in the object. * @error TAPE_ERROR If there is a comma missing between fields. * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. */ simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; /** * Get the current field's key. */ simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; /** * Pass the : in the field and move to its value. */ simdjson_warn_unused simdjson_inline error_code field_value() noexcept; /** * Find the next field with the given key. * * Assumes you have called next_field() or otherwise matched the previous value. * * This means the iterator must be sitting at the next key: * * ``` * { "a": 1, "b": 2 } * ^ * ``` * * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; /** * Find the next field with the given key, *without* unescaping. This assumes object order: it * will not find the field if it was already passed when looking for some *other* field. * * Assumes you have called next_field() or otherwise matched the previous value. * * This means the iterator must be sitting at the next key: * * ``` * { "a": 1, "b": 2 } * ^ * ``` * * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; /** * Find the field with the given key without regard to order, and *without* unescaping. * * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. * * Assumes you have called next_field() or otherwise matched the previous value. * * This means the iterator must be sitting at the next key: * * ``` * { "a": 1, "b": 2 } * ^ * ``` * * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; /** @} */ /** * @addtogroup array Array iteration * Methods to iterate over array elements. These methods generally *assume* the value is actually * an object; the caller is responsible for keeping track of that fact. * @{ */ /** * Check for an opening [ and start an array iteration. * * @returns Whether the array had any elements (returns false for empty). * @error INCORRECT_TYPE If there is no [. */ simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; /** * Check for an opening [ and start an array iteration while at the root. * * @returns Whether the array had any elements (returns false for empty). * @error INCORRECT_TYPE If there is no [. * @error TAPE_ERROR if there is no matching ] at end of document */ simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; /** * Checks whether an array could be started from the root. May be called by start_root_array. * * @returns SUCCESS if it is possible to safely start an array from the root (document level). * @error INCORRECT_TYPE If there is no [. * @error TAPE_ERROR if there is no matching ] at end of document */ simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; /** * Start an array iteration, after the user has already checked and moved past the [. * * Does not move the iterator unless the array is empty ([]). * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** * Start an array iteration from the root, after the user has already checked and moved past the [. * * Does not move the iterator unless the array is empty ([]). * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; /** * Moves to the next element in an array. * * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. * Otherwise, it advances to the next value. * * @return Whether there is another element in the array. * @error TAPE_ERROR If there is a comma missing between elements. */ simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; /** * Get a child value iterator. */ simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; /** @} */ /** * @defgroup scalar Scalar values * @addtogroup scalar * @{ */ simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; template simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; simdjson_warn_unused simdjson_inline bool is_negative() noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; template simdjson_warn_unused simdjson_inline error_code get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; simdjson_inline error_code error() const noexcept; simdjson_inline uint8_t *&string_buf_loc() noexcept; simdjson_inline const json_iterator &json_iter() const noexcept; simdjson_inline json_iterator &json_iter() noexcept; simdjson_inline void assert_is_valid() const noexcept; simdjson_inline bool is_valid() const noexcept; /** @} */ protected: /** * Restarts an array iteration. * @returns Whether the array has any elements (returns false for empty). */ simdjson_inline simdjson_result reset_array() noexcept; /** * Restarts an object iteration. * @returns Whether the object has any fields (returns false for empty). */ simdjson_inline simdjson_result reset_object() noexcept; /** * move_at_start(): moves us so that we are pointing at the beginning of * the container. It updates the index so that at_start() is true and it * syncs the depth. The user can then create a new container instance. * * Usage: used with value::count_elements(). **/ simdjson_inline void move_at_start() noexcept; /** * move_at_container_start(): moves us so that we are pointing at the beginning of * the container so that assert_at_container_start() passes. * * Usage: used with reset_array() and reset_object(). **/ simdjson_inline void move_at_container_start() noexcept; /* Useful for debugging and logging purposes. */ inline std::string to_string() const noexcept; simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; simdjson_inline const uint8_t *peek_start() const noexcept; simdjson_inline uint32_t peek_start_length() const noexcept; /** * The general idea of the advance_... methods and the peek_* methods * is that you first peek and check that you have desired type. If you do, * and only if you do, then you advance. * * We used to unconditionally advance. But this made reasoning about our * current state difficult. * Suppose you always advance. Look at the 'value' matching the key * "shadowable" in the following example... * * ({"globals":{"a":{"shadowable":[}}}}) * * If the user thinks it is a Boolean and asks for it, then we check the '[', * decide it is not a Boolean, but still move into the next character ('}'). Now * we are left pointing at '}' right after a '['. And we have not yet reported * an error, only that we do not have a Boolean. * * If, instead, you just stand your ground until it is content that you know, then * you will only even move beyond the '[' if the user tells you that you have an * array. So you will be at the '}' character inside the array and, hopefully, you * will then catch the error because an array cannot start with '}', but the code * processing Boolean values does not know this. * * So the contract is: first call 'peek_...' and then call 'advance_...' only * if you have determined that it is a type you can handle. * * Unfortunately, it makes the code more verbose, longer and maybe more error prone. */ simdjson_inline void advance_scalar(const char *type) noexcept; simdjson_inline void advance_root_scalar(const char *type) noexcept; simdjson_inline void advance_non_root_scalar(const char *type) noexcept; simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; simdjson_inline error_code end_container() noexcept; /** * Advance to a place expecting a value (increasing depth). * * @return The current token (the one left behind). * @error TAPE_ERROR If the document ended early. */ simdjson_inline simdjson_result advance_to_value() noexcept; simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; simdjson_inline bool is_at_start() const noexcept; /** * is_at_iterator_start() returns true on an array or object after it has just been * created, whether the instance is empty or not. * * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) */ simdjson_inline bool is_at_iterator_start() const noexcept; /** * Assuming that we are within an object, this returns true if we * are pointing at a key. * * Usage: the skip_child() method should never be used while we are pointing * at a key inside an object. */ simdjson_inline bool is_at_key() const noexcept; inline void assert_at_start() const noexcept; inline void assert_at_container_start() const noexcept; inline void assert_at_root() const noexcept; inline void assert_at_child() const noexcept; inline void assert_at_next() const noexcept; inline void assert_at_non_root_start() const noexcept; /** Get the starting position of this value */ simdjson_inline token_position start_position() const noexcept; /** @copydoc error_code json_iterator::position() const noexcept; */ simdjson_inline token_position position() const noexcept; /** @copydoc error_code json_iterator::end_position() const noexcept; */ simdjson_inline token_position last_position() const noexcept; /** @copydoc error_code json_iterator::end_position() const noexcept; */ simdjson_inline token_position end_position() const noexcept; /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ simdjson_inline error_code report_error(error_code error, const char *message) noexcept; friend class document; friend class object; friend class array; friend class value; }; // value_iterator } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public haswell::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(haswell::ondemand::value_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H /* end file simdjson/generic/ondemand/value_iterator.h for haswell */ /* including simdjson/generic/ondemand/value.h for haswell: #include "simdjson/generic/ondemand/value.h" */ /* begin file simdjson/generic/ondemand/value.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { /** * An ephemeral JSON value returned during iteration. It is only valid for as long as you do * not access more data in the JSON document. */ class value { public: /** * Create a new invalid value. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline value() noexcept = default; /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); } /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template simdjson_inline error_code get(T &out) noexcept; /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @returns INCORRECT_TYPE If the JSON value is not an array. */ simdjson_inline simdjson_result get_array() noexcept; /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @returns INCORRECT_TYPE If the JSON value is not an object. */ simdjson_inline simdjson_result get_object() noexcept; /** * Cast this JSON value to an unsigned integer. * * @returns A unsigned 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64() noexcept; /** * Cast this JSON value (inside string) to a unsigned integer. * * @returns A unsigned 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64_in_string() noexcept; /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64() noexcept; /** * Cast this JSON value (inside string) to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64_in_string() noexcept; /** * Cast this JSON value to a double. * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double() noexcept; /** * Cast this JSON value (inside string) to a double * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double_in_string() noexcept; /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). * * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; /** * Attempts to fill the provided std::string reference with the parsed value of the current string. * * The string is guaranteed to be valid UTF-8. * * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. * We recommend you avoid allocating an std::string unless you need to. * * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a "wobbly" string. * * The string is may not be a valid UTF-8 string. * See https://simonsapin.github.io/wtf-8/ * * Important: a value should be consumed once. Calling get_wobbly_string() twice on the same value * is an error. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_wobbly_string() noexcept; /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_raw_json_string() noexcept; /** * Cast this JSON value to a bool. * * @returns A bool value. * @returns INCORRECT_TYPE if the JSON value is not true or false. */ simdjson_inline simdjson_result get_bool() noexcept; /** * Checks if this JSON value is null. If and only if the value is * null, then it is consumed (we advance). If we find a token that * begins with 'n' but is not 'null', then an error is returned. * * @returns Whether the value is null. * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. */ simdjson_inline simdjson_result is_null() noexcept; #if SIMDJSON_EXCEPTIONS /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. */ simdjson_inline operator array() noexcept(false); /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. */ simdjson_inline operator object() noexcept(false); /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline operator uint64_t() noexcept(false); /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. */ simdjson_inline operator int64_t() noexcept(false); /** * Cast this JSON value to a double. * * @returns A double. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. */ simdjson_inline operator double() noexcept(false); /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator std::string_view() noexcept(false); /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator raw_json_string() noexcept(false); /** * Cast this JSON value to a bool. * * @returns A bool value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. */ simdjson_inline operator bool() noexcept(false); #endif /** * Begin array iteration. * * Part of the std::iterable interface. * * @returns INCORRECT_TYPE If the JSON value is not an array. */ simdjson_inline simdjson_result begin() & noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result end() & noexcept; /** * This method scans the array and counts the number of elements. * The count_elements method should always be called before you have begun * iterating through the array: it is expected that you are pointing at * the beginning of the array. * The runtime complexity is linear in the size of the array. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * Performance hint: You should only call count_elements() as a last * resort as it may require scanning the document twice or more. */ simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the object and counts the number of key-value pairs. * The count_fields method should always be called before you have begun * iterating through the object: it is expected that you are pointing at * the beginning of the object. * The runtime complexity is linear in the size of the object. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an object is empty, it is more performant to use * the is_empty() method on the object instance. * * Performance hint: You should only call count_fields() as a last * resort as it may require scanning the document twice or more. */ simdjson_inline simdjson_result count_fields() & noexcept; /** * Get the value at the given index in the array. This function has linear-time complexity. * This function should only be called once on an array instance since the array iterator is not reset between each call. * * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field(const char *key) noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](const char *key) noexcept; /** * Get the type of this JSON value. It does not validate or consume the value. * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * * @return The type of JSON value (json_type::array, json_type::object, json_type::string, * json_type::number, json_type::boolean, or json_type::null). * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result type() noexcept; /** * Checks whether the value is a scalar (string, number, null, Boolean). * Returns false when there it is an array or object. * * @returns true if the type is string, number, null, Boolean * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result is_scalar() noexcept; /** * Checks whether the value is a negative number. * * @returns true if the number if negative. */ simdjson_inline bool is_negative() noexcept; /** * Checks whether the value is an integer number. Note that * this requires to partially parse the number string. If * the value is determined to be an integer, it may still * not parse properly as an integer in subsequent steps * (e.g., it might overflow). * * Performance note: if you call this function systematically * before parsing a number, you may have fallen for a performance * anti-pattern. * * @returns true if the number if negative. */ simdjson_inline simdjson_result is_integer() noexcept; /** * Determine the number type (integer or floating-point number) as quickly * as possible. This function does not fully validate the input. It is * useful when you only need to classify the numbers, without parsing them. * * If you are planning to retrieve the value or you need full validation, * consider using the get_number() method instead: it will fully parse * and validate the input, and give you access to the type: * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have * an integer greater or equal to 9223372036854775808 * get_number_type() is number_type::signed_integer if we have an * integer that is less than 9223372036854775808 * Otherwise, get_number_type() has value number_type::floating_point_number * * This function requires processing the number string, but it is expected * to be faster than get_number().get_number_type() because it is does not * parse the number value. * * @returns the type of the number */ simdjson_inline simdjson_result get_number_type() noexcept; /** * Attempt to parse an ondemand::number. An ondemand::number may * contain an integer value or a floating-point value, the simdjson * library will autodetect the type. Thus it is a dynamically typed * number. Before accessing the value, you must determine the detected * type. * * number.get_number_type() is number_type::signed_integer if we have * an integer in [-9223372036854775808,9223372036854775808) * You can recover the value by calling number.get_int64() and you * have that number.is_int64() is true. * * number.get_number_type() is number_type::unsigned_integer if we have * an integer in [9223372036854775808,18446744073709551616) * You can recover the value by calling number.get_uint64() and you * have that number.is_uint64() is true. * * Otherwise, number.get_number_type() has value number_type::floating_point_number * and we have a binary64 number. * You can recover the value by calling number.get_double() and you * have that number.is_double() is true. * * You must check the type before accessing the value: it is an error * to call "get_int64()" when number.get_number_type() is not * number_type::signed_integer and when number.is_int64() is false. * * Performance note: this is designed with performance in mind. When * calling 'get_number()', you scan the number string only once, determining * efficiently the type and storing it in an efficient manner. */ simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; /** * Get the raw JSON for this token. * * The string_view will always point into the input buffer. * * The string_view will start at the beginning of the token, and include the entire token * *as well as all spaces until the next token (or EOF).* This means, for example, that a * string token always begins with a " and is always terminated by the final ", possibly * followed by a number of spaces. * * The string_view is *not* null-terminated. However, if this is a scalar (string, number, * boolean, or null), the character after the end of the string_view is guaranteed to be * a non-space token. * * Tokens include: * - { * - [ * - "a string (possibly with UTF-8 or backslashed characters like \\\")". * - -1.2e-100 * - true * - false * - null * * See also value::raw_json(). */ simdjson_inline std::string_view raw_json_token() noexcept; /** * Get a string_view pointing at this value in the JSON document. * If this element is an array or an object, it consumes the array or the object * and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. * If this element is a scalar (string, number, Boolean, null), it returns what * raw_json_token() would return. */ simdjson_inline simdjson_result raw_json() noexcept; /** * Returns the current location in the document if in bounds. */ simdjson_inline simdjson_result current_location() noexcept; /** * Returns the current depth in the document if in bounds. * * E.g., * 0 = finished with document * 1 = document root value (could be [ or {, not yet known) * 2 = , or } inside root array/object * 3 = key or value inside root array/object. */ simdjson_inline int32_t current_depth() const noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard. * * ondemand::parser parser; * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/foo/a/1") == 20 * * It is allowed for a key to be the empty string: * * ondemand::parser parser; * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * * Note that at_pointer() called on the document automatically calls the document's rewind * method between each call. It invalidates all previously accessed arrays, objects and values * that have not been consumed. * * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not * standardized (by RFC 6901). We provide some experimental support for JSON pointers * on non-document instances. Yet it is not the case when calling at_pointer on an array * or an object instance: there is no rewind and no invalidation. * * You may only call at_pointer on an array after it has been created, but before it has * been first accessed. When calling at_pointer on an array, the pointer is advanced to * the location indicated by the JSON pointer (in case of success). It is no longer possible * to call at_pointer on the same array. * * You may call at_pointer more than once on an object, but each time the pointer is advanced * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding * key (as well as the current key) can no longer be used with following JSON pointer calls. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; protected: /** * Create a value. */ simdjson_inline value(const value_iterator &iter) noexcept; /** * Skip this value, allowing iteration to continue. */ simdjson_inline void skip() noexcept; /** * Start a value at the current position. * * (It should already be started; this is just a self-documentation method.) */ static simdjson_inline value start(const value_iterator &iter) noexcept; /** * Resume a value. */ static simdjson_inline value resume(const value_iterator &iter) noexcept; /** * Get the object, starting or resuming it as necessary */ simdjson_inline simdjson_result start_or_resume_object() noexcept; // simdjson_inline void log_value(const char *type) const noexcept; // simdjson_inline void log_error(const char *message) const noexcept; value_iterator iter{}; friend class document; friend class array_iterator; friend class field; friend class object; friend struct simdjson_result; friend struct simdjson_result; }; } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public haswell::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(haswell::ondemand::value &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result get_array() noexcept; simdjson_inline simdjson_result get_object() noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() noexcept; template simdjson_inline error_code get(T &out) noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator haswell::ondemand::array() noexcept(false); simdjson_inline operator haswell::ondemand::object() noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator haswell::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field(const char *key) noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](const char *key) noexcept; /** * Get the type of this JSON value. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). */ simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result raw_json() noexcept; /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ simdjson_inline simdjson_result current_location() noexcept; /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H /* end file simdjson/generic/ondemand/value.h for haswell */ /* including simdjson/generic/ondemand/logger.h for haswell: #include "simdjson/generic/ondemand/logger.h" */ /* begin file simdjson/generic/ondemand/logger.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { // Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical // that the call to the log functions be side-effect free. Thus, for example, you should not // create temporary std::string instances. namespace logger { enum class log_level : int32_t { info = 0, error = 1 }; #if SIMDJSON_VERBOSE_LOGGING static constexpr const bool LOG_ENABLED = true; #else static constexpr const bool LOG_ENABLED = false; #endif // We do not want these functions to be 'really inlined' since real inlining is // for performance purposes and if you are using the loggers, you do not care about // performance (or should not). static inline void log_headers() noexcept; // If args are provided, title will be treated as format string template static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; template static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; } // namespace logger } // namespace ondemand } // namespace haswell } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H /* end file simdjson/generic/ondemand/logger.h for haswell */ /* including simdjson/generic/ondemand/token_iterator.h for haswell: #include "simdjson/generic/ondemand/token_iterator.h" */ /* begin file simdjson/generic/ondemand/token_iterator.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { /** * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) * detected by stage 1. * * @private This is not intended for external use. */ class token_iterator { public: /** * Create a new invalid token_iterator. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline token_iterator() noexcept = default; simdjson_inline token_iterator(token_iterator &&other) noexcept = default; simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; simdjson_inline token_iterator(const token_iterator &other) noexcept = default; simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; /** * Advance to the next token (returning the current one). */ simdjson_inline const uint8_t *return_current_and_advance() noexcept; /** * Reports the current offset in bytes from the start of the underlying buffer. */ simdjson_inline uint32_t current_offset() const noexcept; /** * Get the JSON text for a given token (relative). * * This is not null-terminated; it is a view into the JSON. * * @param delta The relative position of the token to retrieve. e.g. 0 = current token, * 1 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** * Get the maximum length of the JSON text for a given token. * * The length will include any whitespace at the end of the token. * * @param delta The relative position of the token to retrieve. e.g. 0 = current token, * 1 = next token, -1 = prev token. */ simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; /** * Get the JSON text for a given token. * * This is not null-terminated; it is a view into the JSON. * * @param position The position of the token. * */ simdjson_inline const uint8_t *peek(token_position position) const noexcept; /** * Get the maximum length of the JSON text for a given token. * * The length will include any whitespace at the end of the token. * * @param position The position of the token. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; /** * Return the current index. */ simdjson_inline token_position position() const noexcept; /** * Reset to a previously saved index. */ simdjson_inline void set_position(token_position target_position) noexcept; // NOTE: we don't support a full C++ iterator interface, because we expect people to make // different calls to advance the iterator based on *their own* state. simdjson_inline bool operator==(const token_iterator &other) const noexcept; simdjson_inline bool operator!=(const token_iterator &other) const noexcept; simdjson_inline bool operator>(const token_iterator &other) const noexcept; simdjson_inline bool operator>=(const token_iterator &other) const noexcept; simdjson_inline bool operator<(const token_iterator &other) const noexcept; simdjson_inline bool operator<=(const token_iterator &other) const noexcept; protected: simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; /** * Get the index of the JSON text for a given token (relative). * * This is not null-terminated; it is a view into the JSON. * * @param delta The relative position of the token to retrieve. e.g. 0 = current token, * 1 = next token, -1 = prev token. */ simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; /** * Get the index of the JSON text for a given token. * * This is not null-terminated; it is a view into the JSON. * * @param position The position of the token. * */ simdjson_inline uint32_t peek_index(token_position position) const noexcept; const uint8_t *buf{}; token_position _position{}; friend class json_iterator; friend class value_iterator; friend class object; template friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; template friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; }; } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public haswell::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(haswell::ondemand::token_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H /* end file simdjson/generic/ondemand/token_iterator.h for haswell */ /* including simdjson/generic/ondemand/json_iterator.h for haswell: #include "simdjson/generic/ondemand/json_iterator.h" */ /* begin file simdjson/generic/ondemand/json_iterator.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { /** * Iterates through JSON tokens, keeping track of depth and string buffer. * * @private This is not intended for external use. */ class json_iterator { protected: token_iterator token{}; ondemand::parser *parser{}; /** * Next free location in the string buffer. * * Used by raw_json_string::unescape() to have a place to unescape strings to. */ uint8_t *_string_buf_loc{}; /** * JSON error, if there is one. * * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. * * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If * this is not elided, we should make sure it's at least not using up a register. Failing that, * we should store it in document so there's only one of them. */ error_code error{SUCCESS}; /** * Depth of the current token in the JSON. * * - 0 = finished with document * - 1 = document root value (could be [ or {, not yet known) * - 2 = , or } inside root array/object * - 3 = key or value inside root array/object. */ depth_t _depth{}; /** * Beginning of the document indexes. * Normally we have root == parser->implementation->structural_indexes.get() * but this may differ, especially in streaming mode (where we have several * documents); */ token_position _root{}; /** * Normally, a json_iterator operates over a single document, but in * some cases, we may have a stream of documents. This attribute is meant * as meta-data: the json_iterator works the same irrespective of the * value of this attribute. */ bool _streaming{false}; public: simdjson_inline json_iterator() noexcept = default; simdjson_inline json_iterator(json_iterator &&other) noexcept; simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; /** * Skips a JSON value, whether it is a scalar, array or object. */ simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; /** * Tell whether the iterator is still at the start */ simdjson_inline bool at_root() const noexcept; /** * Tell whether we should be expected to run in streaming * mode (iterating over many documents). It is pure metadata * that does not affect how the iterator works. It is used by * start_root_array() and start_root_object(). */ simdjson_inline bool streaming() const noexcept; /** * Get the root value iterator */ simdjson_inline token_position root_position() const noexcept; /** * Assert that we are at the document depth (== 1) */ simdjson_inline void assert_at_document_depth() const noexcept; /** * Assert that we are at the root of the document */ simdjson_inline void assert_at_root() const noexcept; /** * Tell whether the iterator is at the EOF mark */ simdjson_inline bool at_end() const noexcept; /** * Tell whether the iterator is live (has not been moved). */ simdjson_inline bool is_alive() const noexcept; /** * Abandon this iterator, setting depth to 0 (as if the document is finished). */ simdjson_inline void abandon() noexcept; /** * Advance the current token without modifying depth. */ simdjson_inline const uint8_t *return_current_and_advance() noexcept; /** * Returns true if there is a single token in the index (i.e., it is * a JSON with a scalar value such as a single number). * * @return whether there is a single token */ simdjson_inline bool is_single_token() const noexcept; /** * Assert that there are at least the given number of tokens left. * * Has no effect in release builds. */ simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; /** * Assert that the given position addresses an actual token (is within bounds). * * Has no effect in release builds. */ simdjson_inline void assert_valid_position(token_position position) const noexcept; /** * Get the JSON text for a given token (relative). * * This is not null-terminated; it is a view into the JSON. * * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** * Get the maximum length of the JSON text for the current token (or relative). * * The length will include any whitespace at the end of the token. * * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. */ simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; /** * Get a pointer to the current location in the input buffer. * * This is not null-terminated; it is a view into the JSON. * * You may be pointing outside of the input buffer: it is not generally * safe to dereference this pointer. */ simdjson_inline const uint8_t *unsafe_pointer() const noexcept; /** * Get the JSON text for a given token. * * This is not null-terminated; it is a view into the JSON. * * @param position The position of the token to retrieve. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek(token_position position) const noexcept; /** * Get the maximum length of the JSON text for the current token (or relative). * * The length will include any whitespace at the end of the token. * * @param position The position of the token to retrieve. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; /** * Get the JSON text for the last token in the document. * * This is not null-terminated; it is a view into the JSON. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek_last() const noexcept; /** * Ascend one level. * * Validates that the depth - 1 == parent_depth. * * @param parent_depth the expected parent depth. */ simdjson_inline void ascend_to(depth_t parent_depth) noexcept; /** * Descend one level. * * Validates that the new depth == child_depth. * * @param child_depth the expected child depth. */ simdjson_inline void descend_to(depth_t child_depth) noexcept; simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; /** * Get current depth. */ simdjson_inline depth_t depth() const noexcept; /** * Get current (writeable) location in the string buffer. */ simdjson_inline uint8_t *&string_buf_loc() noexcept; /** * Report an unrecoverable error, preventing further iteration. * * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ simdjson_inline error_code report_error(error_code error, const char *message) noexcept; /** * Log error, but don't stop iteration. * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; /** * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with * N bytes of capacity. It will return false if N is too small (smaller than max_len) of if it is zero. * The buffer (tmpbuf) is padded with space characters. */ simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept; simdjson_inline token_position position() const noexcept; /** * Write the raw_json_string to the string buffer and return a string_view. * Each raw_json_string should be unescaped once, or else the string buffer might * overflow. */ simdjson_inline simdjson_result unescape(raw_json_string in, bool allow_replacement) noexcept; simdjson_inline simdjson_result unescape_wobbly(raw_json_string in) noexcept; simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; simdjson_inline error_code consume_character(char c) noexcept; #if SIMDJSON_DEVELOPMENT_CHECKS simdjson_inline token_position start_position(depth_t depth) const noexcept; simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; #endif /* Useful for debugging and logging purposes. */ inline std::string to_string() const noexcept; /** * Returns the current location in the document if in bounds. */ inline simdjson_result current_location() const noexcept; /** * Updates this json iterator so that it is back at the beginning of the document, * as if it had just been created. */ inline void rewind() noexcept; /** * This checks whether the {,},[,] are balanced so that the document * ends with proper zero depth. This requires scanning the whole document * and it may be expensive. It is expected that it will be rarely called. * It does not attempt to match { with } and [ with ]. */ inline bool balanced() const noexcept; protected: simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; /// The last token before the end simdjson_inline token_position last_position() const noexcept; /// The token *at* the end. This points at gibberish and should only be used for comparison. simdjson_inline token_position end_position() const noexcept; /// The end of the buffer. simdjson_inline token_position end() const noexcept; friend class document; friend class document_stream; friend class object; friend class array; friend class value; friend class raw_json_string; friend class parser; friend class value_iterator; template friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; template friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; }; // json_iterator } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public haswell::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(haswell::ondemand::json_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H /* end file simdjson/generic/ondemand/json_iterator.h for haswell */ /* including simdjson/generic/ondemand/json_type.h for haswell: #include "simdjson/generic/ondemand/json_type.h" */ /* begin file simdjson/generic/ondemand/json_type.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { /** * The type of a JSON value. */ enum class json_type { // Start at 1 to catch uninitialized / default values more easily array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) string, ///< A JSON string ( "a" or "hello world\n" ...) boolean, ///< A JSON boolean (true or false) null ///< A JSON null (null) }; /** * A type representing a JSON number. * The design of the struct is deliberately straight-forward. All * functions return standard values with no error check. */ struct number { /** * return the automatically determined type of * the number: number_type::floating_point_number, * number_type::signed_integer or number_type::unsigned_integer. * * enum class number_type { * floating_point_number=1, /// a binary64 number * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement * unsigned_integer /// a positive integer larger or equal to 1<<63 * }; */ simdjson_inline ondemand::number_type get_number_type() const noexcept; /** * return true if the automatically determined type of * the number is number_type::unsigned_integer. */ simdjson_inline bool is_uint64() const noexcept; /** * return the value as a uint64_t, only valid if is_uint64() is true. */ simdjson_inline uint64_t get_uint64() const noexcept; simdjson_inline operator uint64_t() const noexcept; /** * return true if the automatically determined type of * the number is number_type::signed_integer. */ simdjson_inline bool is_int64() const noexcept; /** * return the value as a int64_t, only valid if is_int64() is true. */ simdjson_inline int64_t get_int64() const noexcept; simdjson_inline operator int64_t() const noexcept; /** * return true if the automatically determined type of * the number is number_type::floating_point_number. */ simdjson_inline bool is_double() const noexcept; /** * return the value as a double, only valid if is_double() is true. */ simdjson_inline double get_double() const noexcept; simdjson_inline operator double() const noexcept; /** * Convert the number to a double. Though it always succeed, the conversion * may be lossy if the number cannot be represented exactly. */ simdjson_inline double as_double() const noexcept; protected: /** * The next block of declaration is designed so that we can call the number parsing * functions on a number type. They are protected and should never be used outside * of the core simdjson library. */ friend class value_iterator; template friend error_code numberparsing::slow_float_parsing(simdjson_unused const uint8_t * src, W writer); template friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); template friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); /** Store a signed 64-bit value to the number. */ simdjson_inline void append_s64(int64_t value) noexcept; /** Store an unsigned 64-bit value to the number. */ simdjson_inline void append_u64(uint64_t value) noexcept; /** Store a double value to the number. */ simdjson_inline void append_double(double value) noexcept; /** Specifies that the value is a double, but leave it undefined. */ simdjson_inline void skip_double() noexcept; /** * End of friend declarations. */ /** * Our attributes are a union type (size = 64 bits) * followed by a type indicator. */ union { double floating_point_number; int64_t signed_integer; uint64_t unsigned_integer; } payload{0}; number_type type{number_type::signed_integer}; }; /** * Write the JSON type to the output stream * * @param out The output stream. * @param type The json_type. */ inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; #if SIMDJSON_EXCEPTIONS /** * Send JSON type to an output stream. * * @param out The output stream. * @param type The json_type. * @throw simdjson_error if the result being printed has an error. If there is an error with the * underlying output stream, that error will be propagated (simdjson_error will not be * thrown). */ inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); #endif } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public haswell::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(haswell::ondemand::json_type &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H /* end file simdjson/generic/ondemand/json_type.h for haswell */ /* including simdjson/generic/ondemand/raw_json_string.h for haswell: #include "simdjson/generic/ondemand/raw_json_string.h" */ /* begin file simdjson/generic/ondemand/raw_json_string.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { /** * A string escaped per JSON rules, terminated with quote ("). They are used to represent * unescaped keys inside JSON documents. * * (In other words, a pointer to the beginning of a string, just after the start quote, inside a * JSON file.) * * This class is deliberately simplistic and has little functionality. You can * compare a raw_json_string instance with an unescaped C string, but * that is nearly all you can do. * * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser * instance. Doing so requires you to have a sufficiently large buffer. * * The raw_json_string instances originate typically from field instance which in turn represent * key-value pairs from object instances. From a field instance, you get the raw_json_string * instance by calling key(). You can, if you want a more usable string_view instance, call * the unescaped_key() method on the field instance. You may also create a raw_json_string from * any other string value, with the value.get_raw_json_string() method. Again, you can get * a more usable string_view instance by calling get_string(). * */ class raw_json_string { public: /** * Create a new invalid raw_json_string. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline raw_json_string() noexcept = default; /** * Create a new invalid raw_json_string pointed at the given location in the JSON. * * The given location must be just *after* the beginning quote (") in the JSON file. * * It *must* be terminated by a ", and be a valid JSON string. */ simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; /** * Get the raw pointer to the beginning of the string in the JSON (just after the "). * * It is possible for this function to return a null pointer if the instance * has outlived its existence. */ simdjson_inline const char * raw() const noexcept; /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done) on target.size() characters, * and if the raw_json_string instance has a quote character at byte index target.size(). * We never read more than length + 1 bytes in the raw_json_string instance. * If length is smaller than target.size(), this will return false. * * The std::string_view instance may contain any characters. However, the caller * is responsible for setting length so that length bytes may be read in the * raw_json_string. * * Performance: the comparison may be done using memcmp which may be efficient * for long strings. */ simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done). * The std::string_view instance should not contain unescaped quote characters: * the caller is responsible for this check. See is_free_from_unescaped_quote. * * Performance: the comparison is done byte-by-byte which might be inefficient for * long strings. * * If target is a compile-time constant, and your compiler likes you, * you should be able to do the following without performance penalty... * * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); * s.unsafe_is_equal(target); */ simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; /** * This compares the current instance to the C string target: returns true if * they are byte-by-byte equal (no escaping is done). * The provided C string should not contain an unescaped quote character: * the caller is responsible for this check. See is_free_from_unescaped_quote. * * If target is a compile-time constant, and your compiler likes you, * you should be able to do the following without performance penalty... * * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); * s.unsafe_is_equal(target); */ simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done). */ simdjson_inline bool is_equal(std::string_view target) const noexcept; /** * This compares the current instance to the C string target: returns true if * they are byte-by-byte equal (no escaping is done). */ simdjson_inline bool is_equal(const char* target) const noexcept; /** * Returns true if target is free from unescaped quote. If target is known at * compile-time, we might expect the computation to happen at compile time with * many compilers (not all!). */ static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; private: /** * This will set the inner pointer to zero, effectively making * this instance unusable. */ simdjson_inline void consume() noexcept { buf = nullptr; } /** * Checks whether the inner pointer is non-null and thus usable. */ simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. * The result will be a valid UTF-8. * * ## IMPORTANT: string_view lifetime * * The string_view is only valid until the next parse() call on the parser. * * @param iter A json_iterator, which contains a buffer where the string will be written. * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. */ simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter, bool allow_replacement) const noexcept; /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ * * ## IMPORTANT: string_view lifetime * * The string_view is only valid until the next parse() call on the parser. * * @param iter A json_iterator, which contains a buffer where the string will be written. */ simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(json_iterator &iter) const noexcept; const uint8_t * buf{}; friend class object; friend class field; friend class parser; friend struct simdjson_result; }; simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; /** * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. */ simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public haswell::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(haswell::ondemand::raw_json_string &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape(haswell::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(haswell::ondemand::json_iterator &iter) const noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H /* end file simdjson/generic/ondemand/raw_json_string.h for haswell */ /* including simdjson/generic/ondemand/parser.h for haswell: #include "simdjson/generic/ondemand/parser.h" */ /* begin file simdjson/generic/ondemand/parser.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include namespace simdjson { namespace haswell { namespace ondemand { /** * The default batch size for document_stream instances for this On Demand kernel. * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; /** * Some adversary might try to set the batch size to 0 or 1, which might cause problems. * We set a minimum of 32B since anything else is highly likely to be an error. In practice, * most users will want a much larger batch size. * * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. */ static constexpr size_t MINIMAL_BATCH_SIZE = 32; /** * A JSON fragment iterator. * * This holds the actual iterator as well as the buffer for writing strings. */ class parser { public: /** * Create a JSON parser. * * The new parser will have zero capacity. */ inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; inline parser(parser &&other) noexcept = default; simdjson_inline parser(const parser &other) = delete; simdjson_inline parser &operator=(const parser &other) = delete; simdjson_inline parser &operator=(parser &&other) noexcept = default; /** Deallocate the JSON parser. */ inline ~parser() noexcept = default; /** * Start iterating an on-demand JSON document. * * ondemand::parser parser; * document doc = parser.iterate(json); * * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. * Otherwise the iterate method may return an error. In particular, the whole input should be * valid: we do not attempt to tolerate incorrect content either before or after a JSON * document. If there is a UTF-8 BOM, the parser skips it. * * ### IMPORTANT: Validate what you use * * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to * iterate does not parse and validate the whole document. * * ### IMPORTANT: Buffer Lifetime * * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as * long as the document iteration. * * ### IMPORTANT: Document Lifetime * * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before * you call parse() again or destroy the parser. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * * @param json The JSON to parse. * @param len The length of the JSON. * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). * * @return The document, or an error: * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory * allocation fails. * - EMPTY if the document is all whitespace. * - UTF8_ERROR if the document is not valid UTF-8. * - UNESCAPED_CHARS if a string contains control characters that must be escaped * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; /** * @private * * Start iterating an on-demand JSON document. * * ondemand::parser parser; * json_iterator doc = parser.iterate(json); * * ### IMPORTANT: Buffer Lifetime * * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as * long as the document iteration. * * ### IMPORTANT: Document Lifetime * * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before * you call parse() again or destroy the parser. * * The ondemand::document instance holds the iterator. The document must remain in scope * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * * @param json The JSON to parse. * * @return The iterator, or an error: * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory * allocation fails. * - EMPTY if the document is all whitespace. * - UTF8_ERROR if the document is not valid UTF-8. * - UNESCAPED_CHARS if a string contains control characters that must be escaped * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; /** * Parse a buffer containing many JSON documents. * * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; * ondemand::parser parser; * ondemand::document_stream docs = parser.iterate_many(json); * for (auto & doc : docs) { * std::cout << doc["foo"] << std::endl; * } * // Prints 1 2 3 * * No copy of the input buffer is made. * * The function is lazy: it may be that no more than one JSON document at a time is parsed. * * The caller is responsabile to ensure that the input string data remains unchanged and is * not deleted during the loop. * * ### Format * * The buffer must contain a series of one or more JSON documents, concatenated into a single * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, * then starts parsing the next document at that point. (It does this with more parallelism and * lookahead than you might think, though.) * * documents that consist of an object or array may omit the whitespace between them, concatenating * with no separator. Documents that consist of a single primitive (i.e. documents that are not * arrays or objects) MUST be separated with ASCII whitespace. * * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). * If there is a UTF-8 BOM, the parser skips it. * * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * * ### Threads * * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the * hood to do some lookahead. * * ### Parser Capacity * * If the parser's current capacity is less than batch_size, it will allocate enough capacity * to handle it (up to max_capacity). * * @param buf The concatenated JSON to parse. * @param len The length of the concatenated JSON. * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet * spot is cache-related: small enough to fit in cache, yet big enough to * parse as many documents as possible in one tight loop. * Defaults to 10MB, which has been a reasonable sweet spot in our tests. * @param allow_comma_separated (defaults on false) This allows a mode where the documents are * separated by commas instead of whitespace. It comes with a performance * penalty because the entire document is indexed at once (and the document must be * less than 4 GB), and there is no multithreading. In this mode, the batch_size parameter * is effectively ignored, as it is set to at least the document size. * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: * - MEMALLOC if the parser does not have enough capacity and memory allocation fails * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. * - other json errors if parsing fails. You should not rely on these errors to always the same for the * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). */ inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** The capacity of this parser (the largest document it can process). */ simdjson_inline size_t capacity() const noexcept; /** The maximum capacity of this parser (the largest document it is allowed to process). */ simdjson_inline size_t max_capacity() const noexcept; simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; /** * The maximum depth of this parser (the most deeply nested objects and arrays it can process). * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. */ simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length * and `max_depth` depth. * * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. * * @param capacity The new capacity. * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. * @return The error, if there is one. */ simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; #ifdef SIMDJSON_THREADS_ENABLED /** * The parser instance can use threads when they are available to speed up some * operations. It is enabled by default. Changing this attribute will change the * behavior of the parser for future operations. */ bool threaded{true}; #endif /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. * The result must be valid UTF-8. * The provided pointer is advanced to the end of the string by reference, and a string_view instance * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. * * This unescape function is a low-level function. If you want a more user-friendly approach, you should * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() * instead of get_raw_json_string()). * * ## IMPORTANT: string_view lifetime * * The string_view is only valid as long as the bytes in dst. * * @param raw_json_string input * @param dst A pointer to a buffer at least large enough to write this string as well as * an additional SIMDJSON_PADDING bytes. * @param allow_replacement Whether we allow a replacement if the input string contains unmatched surrogate pairs. * @return A string_view pointing at the unescaped string in dst * @error STRING_ERROR if escapes are incorrect. */ simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement = false) const noexcept; /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. * The result may not be valid UTF-8. See https://simonsapin.github.io/wtf-8/ * The provided pointer is advanced to the end of the string by reference, and a string_view instance * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. * * This unescape function is a low-level function. If you want a more user-friendly approach, you should * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() * instead of get_raw_json_string()). * * ## IMPORTANT: string_view lifetime * * The string_view is only valid as long as the bytes in dst. * * @param raw_json_string input * @param dst A pointer to a buffer at least large enough to write this string as well as * an additional SIMDJSON_PADDING bytes. * @return A string_view pointing at the unescaped string in dst * @error STRING_ERROR if escapes are incorrect. */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; std::unique_ptr string_buf{}; #if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif friend class json_iterator; friend class document_stream; }; } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public haswell::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(haswell::ondemand::parser &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* end file simdjson/generic/ondemand/parser.h for haswell */ // All other declarations /* including simdjson/generic/ondemand/array.h for haswell: #include "simdjson/generic/ondemand/array.h" */ /* begin file simdjson/generic/ondemand/array.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { /** * A forward-only JSON array. */ class array { public: /** * Create a new invalid array. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline array() noexcept = default; /** * Begin array iteration. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result begin() noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result end() noexcept; /** * This method scans the array and counts the number of elements. * The count_elements method should always be called before you have begun * iterating through the array: it is expected that you are pointing at * the beginning of the array. * The runtime complexity is linear in the size of the array. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an array is empty, it is more performant to use * the is_empty() method. */ simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the beginning of the array and checks whether the * array is empty. * The runtime complexity is constant time. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. */ simdjson_inline simdjson_result is_empty() & noexcept; /** * Reset the iterator so that we are pointing back at the * beginning of the array. You should still consume values only once even if you * can iterate through the array more than once. If you unescape a string * within the array more than once, you have unsafe code. Note that rewinding * an array means that you may need to reparse it anew: it is not a free * operation. * * @returns true if the array contains some elements (not empty) */ inline simdjson_result reset() & noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node * as the root of its own JSON document. * * ondemand::parser parser; * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/0/foo/a/1") == 20 * * Note that at_pointer() called on the document automatically calls the document's rewind * method between each call. It invalidates all previously accessed arrays, objects and values * that have not been consumed. Yet it is not the case when calling at_pointer on an array * instance: there is no rewind and no invalidation. * * You may only call at_pointer on an array after it has been created, but before it has * been first accessed. When calling at_pointer on an array, the pointer is advanced to * the location indicated by the JSON pointer (in case of success). It is no longer possible * to call at_pointer on the same array. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; /** * Consumes the array and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. */ simdjson_inline simdjson_result raw_json() noexcept; /** * Get the value at the given index. This function has linear-time complexity. * This function should only be called once on an array instance since the array iterator is not reset between each call. * * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; protected: /** * Go to the end of the array, no matter where you are right now. */ simdjson_inline error_code consume() noexcept; /** * Begin array iteration. * * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the * resulting array. * @error INCORRECT_TYPE if the iterator is not at [. */ static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; /** * Begin array iteration from the root. * * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the * resulting array. * @error INCORRECT_TYPE if the iterator is not at [. * @error TAPE_ERROR if there is no closing ] at the end of the document. */ static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; /** * Begin array iteration. * * This version of the method should be called after the initial [ has been verified, and is * intended for use by switch statements that check the type of a value. * * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. */ static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; /** * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. * * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* * into the resulting array. */ simdjson_inline array(const value_iterator &iter) noexcept; /** * Iterator marking current position. * * iter.is_alive() == false indicates iteration is complete. */ value_iterator iter{}; friend class value; friend class document; friend struct simdjson_result; friend struct simdjson_result; friend class array_iterator; }; } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public haswell::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(haswell::ondemand::array &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; inline simdjson_result count_elements() & noexcept; inline simdjson_result is_empty() & noexcept; inline simdjson_result reset() & noexcept; simdjson_inline simdjson_result at(size_t index) noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result raw_json() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H /* end file simdjson/generic/ondemand/array.h for haswell */ /* including simdjson/generic/ondemand/array_iterator.h for haswell: #include "simdjson/generic/ondemand/array_iterator.h" */ /* begin file simdjson/generic/ondemand/array_iterator.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { /** * A forward-only JSON array. * * This is an input_iterator, meaning: * - It is forward-only * - * must be called exactly once per element. * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) */ class array_iterator { public: /** Create a new, invalid array iterator. */ simdjson_inline array_iterator() noexcept = default; // // Iterator interface // /** * Get the current element. * * Part of the std::iterator interface. */ simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * * Part of the std::iterator interface. * * @return true if there are no more elements in the JSON array. */ simdjson_inline bool operator==(const array_iterator &) const noexcept; /** * Check if there are more elements in the JSON array. * * Part of the std::iterator interface. * * @return true if there are more elements in the JSON array. */ simdjson_inline bool operator!=(const array_iterator &) const noexcept; /** * Move to the next element. * * Part of the std::iterator interface. */ simdjson_inline array_iterator &operator++() noexcept; private: value_iterator iter{}; simdjson_inline array_iterator(const value_iterator &iter) noexcept; friend class array; friend class value; friend struct simdjson_result; }; } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public haswell::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(haswell::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; // // Iterator interface // simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. simdjson_inline bool operator==(const simdjson_result &) const noexcept; simdjson_inline bool operator!=(const simdjson_result &) const noexcept; simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H /* end file simdjson/generic/ondemand/array_iterator.h for haswell */ /* including simdjson/generic/ondemand/document.h for haswell: #include "simdjson/generic/ondemand/document.h" */ /* begin file simdjson/generic/ondemand/document.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { /** * A JSON document. It holds a json_iterator instance. * * Used by tokens to get text, and string buffer location. * * You must keep the document around during iteration. */ class document { public: /** * Create a new invalid document. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline document() noexcept = default; simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy simdjson_inline document(document &&other) noexcept = default; simdjson_inline document &operator=(const document &other) noexcept = delete; simdjson_inline document &operator=(document &&other) noexcept = default; /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @returns INCORRECT_TYPE If the JSON value is not an array. */ simdjson_inline simdjson_result get_array() & noexcept; /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @returns INCORRECT_TYPE If the JSON value is not an object. */ simdjson_inline simdjson_result get_object() & noexcept; /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64() noexcept; /** * Cast this JSON value (inside string) to an unsigned integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64_in_string() noexcept; /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64() noexcept; /** * Cast this JSON value (inside string) to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64_in_string() noexcept; /** * Cast this JSON value to a double. * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double() noexcept; /** * Cast this JSON value (inside string) to a double. * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double_in_string() noexcept; /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * Important: Calling get_string() twice on the same document is an error. * * @param Whether to allow a replacement character for unmatched surrogate pairs. * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; /** * Attempts to fill the provided std::string reference with the parsed value of the current string. * * The string is guaranteed to be valid UTF-8. * * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. * We recommend you avoid allocating an std::string unless you need to. * * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a string. * * The string is not guaranteed to be valid UTF-8. See https://simonsapin.github.io/wtf-8/ * * Important: Calling get_wobbly_string() twice on the same document is an error. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_wobbly_string() noexcept; /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_raw_json_string() noexcept; /** * Cast this JSON value to a bool. * * @returns A bool value. * @returns INCORRECT_TYPE if the JSON value is not true or false. */ simdjson_inline simdjson_result get_bool() noexcept; /** * Cast this JSON value to a value when the document is an object or an array. * * You must not have begun iterating through the object or array. When * SIMDJSON_DEVELOPMENT_CHECKS is set to 1 (which is the case when building in Debug mode * by default), and you have already begun iterating, * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use * rewind() to reset the document to its initial state before calling this method. * * @returns A value if a JSON array or object cannot be found. * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). */ simdjson_inline simdjson_result get_value() noexcept; /** * Checks if this JSON value is null. If and only if the value is * null, then it is consumed (we advance). If we find a token that * begins with 'n' but is not 'null', then an error is returned. * * @returns Whether the value is null. * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. */ simdjson_inline simdjson_result is_null() noexcept; /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() & noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); } /** @overload template simdjson_result get() & noexcept */ template simdjson_inline simdjson_result get() && noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); } /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value * * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template simdjson_inline error_code get(T &out) & noexcept; /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. */ simdjson_inline operator array() & noexcept(false); /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. */ simdjson_inline operator object() & noexcept(false); /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline operator uint64_t() noexcept(false); /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. */ simdjson_inline operator int64_t() noexcept(false); /** * Cast this JSON value to a double. * * @returns A double. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. */ simdjson_inline operator double() noexcept(false); /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator std::string_view() noexcept(false); /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator raw_json_string() noexcept(false); /** * Cast this JSON value to a bool. * * @returns A bool value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. */ simdjson_inline operator bool() noexcept(false); /** * Cast this JSON value to a value when the document is an object or an array. * * You must not have begun iterating through the object or array. When * SIMDJSON_DEVELOPMENT_CHECKS is defined, and you have already begun iterating, * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use * rewind() to reset the document to its initial state before calling this method. * * @returns A value value if a JSON array or object cannot be found. * @exception SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). */ simdjson_inline operator value() noexcept(false); #endif /** * This method scans the array and counts the number of elements. * The count_elements method should always be called before you have begun * iterating through the array: it is expected that you are pointing at * the beginning of the array. * The runtime complexity is linear in the size of the array. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. */ simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the object and counts the number of key-value pairs. * The count_fields method should always be called before you have begun * iterating through the object: it is expected that you are pointing at * the beginning of the object. * The runtime complexity is linear in the size of the object. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an object is empty, it is more performant to use * the is_empty() method. */ simdjson_inline simdjson_result count_fields() & noexcept; /** * Get the value at the given index in the array. This function has linear-time complexity. * This function should only be called once on an array instance since the array iterator is not reset between each call. * * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) & noexcept; /** * Begin array iteration. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result begin() & noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result end() & noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. E.g., the array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() * is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field(const char *key) & noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. E.g., the array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to a key * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() * is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](const char *key) & noexcept; /** * Get the type of this JSON value. It does not validate or consume the value. * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result type() noexcept; /** * Checks whether the document is a scalar (string, number, null, Boolean). * Returns false when there it is an array or object. * * @returns true if the type is string, number, null, Boolean * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result is_scalar() noexcept; /** * Checks whether the document is a negative number. * * @returns true if the number if negative. */ simdjson_inline bool is_negative() noexcept; /** * Checks whether the document is an integer number. Note that * this requires to partially parse the number string. If * the value is determined to be an integer, it may still * not parse properly as an integer in subsequent steps * (e.g., it might overflow). * * @returns true if the number if negative. */ simdjson_inline simdjson_result is_integer() noexcept; /** * Determine the number type (integer or floating-point number) as quickly * as possible. This function does not fully validate the input. It is * useful when you only need to classify the numbers, without parsing them. * * If you are planning to retrieve the value or you need full validation, * consider using the get_number() method instead: it will fully parse * and validate the input, and give you access to the type: * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have * an integer greater or equal to 9223372036854775808 * get_number_type() is number_type::signed_integer if we have an * integer that is less than 9223372036854775808 * Otherwise, get_number_type() has value number_type::floating_point_number * * This function requires processing the number string, but it is expected * to be faster than get_number().get_number_type() because it is does not * parse the number value. * * @returns the type of the number */ simdjson_inline simdjson_result get_number_type() noexcept; /** * Attempt to parse an ondemand::number. An ondemand::number may * contain an integer value or a floating-point value, the simdjson * library will autodetect the type. Thus it is a dynamically typed * number. Before accessing the value, you must determine the detected * type. * * number.get_number_type() is number_type::signed_integer if we have * an integer in [-9223372036854775808,9223372036854775808) * You can recover the value by calling number.get_int64() and you * have that number.is_int64() is true. * * number.get_number_type() is number_type::unsigned_integer if we have * an integer in [9223372036854775808,18446744073709551616) * You can recover the value by calling number.get_uint64() and you * have that number.is_uint64() is true. * * Otherwise, number.get_number_type() has value number_type::floating_point_number * and we have a binary64 number. * You can recover the value by calling number.get_double() and you * have that number.is_double() is true. * * You must check the type before accessing the value: it is an error * to call "get_int64()" when number.get_number_type() is not * number_type::signed_integer and when number.is_int64() is false. */ simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; /** * Get the raw JSON for this token. * * The string_view will always point into the input buffer. * * The string_view will start at the beginning of the token, and include the entire token * *as well as all spaces until the next token (or EOF).* This means, for example, that a * string token always begins with a " and is always terminated by the final ", possibly * followed by a number of spaces. * * The string_view is *not* null-terminated. If this is a scalar (string, number, * boolean, or null), the character after the end of the string_view may be the padded buffer. * * Tokens include: * - { * - [ * - "a string (possibly with UTF-8 or backslashed characters like \\\")". * - -1.2e-100 * - true * - false * - null */ simdjson_inline simdjson_result raw_json_token() noexcept; /** * Reset the iterator inside the document instance so we are pointing back at the * beginning of the document, as if it had just been created. It invalidates all * values, objects and arrays that you have created so far (including unescaped strings). */ inline void rewind() noexcept; /** * Returns debugging information. */ inline std::string to_debug_string() noexcept; /** * Some unrecoverable error conditions may render the document instance unusable. * The is_alive() method returns true when the document is still suitable. */ inline bool is_alive() noexcept; /** * Returns the current location in the document if in bounds. */ inline simdjson_result current_location() const noexcept; /** * Returns true if this document has been fully parsed. * If you have consumed the whole document and at_end() returns * false, then there may be trailing content. */ inline bool at_end() const noexcept; /** * Returns the current depth in the document if in bounds. * * E.g., * 0 = finished with document * 1 = document root value (could be [ or {, not yet known) * 2 = , or } inside root array/object * 3 = key or value inside root array/object. */ simdjson_inline int32_t current_depth() const noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard. * * ondemand::parser parser; * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/foo/a/1") == 20 * * It is allowed for a key to be the empty string: * * ondemand::parser parser; * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * * Note that at_pointer() automatically calls rewind between each call. Thus * all values, objects and arrays that you have created so far (including unescaped strings) * are invalidated. After calling at_pointer, you need to consume the result: string values * should be stored in your own variables, arrays should be decoded and stored in your own array-like * structures and so forth. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; /** * Consumes the document and returns a string_view instance corresponding to the * document as represented in JSON. It points inside the original byte array containing * the JSON document. */ simdjson_inline simdjson_result raw_json() noexcept; protected: /** * Consumes the document. */ simdjson_inline error_code consume() noexcept; simdjson_inline document(ondemand::json_iterator &&iter) noexcept; simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; simdjson_inline value_iterator resume_value_iterator() noexcept; simdjson_inline value_iterator get_root_value_iterator() noexcept; simdjson_inline simdjson_result start_or_resume_object() noexcept; static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; // // Fields // json_iterator iter{}; ///< Current position in the document static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 friend class array_iterator; friend class value; friend class ondemand::parser; friend class object; friend class array; friend class field; friend class token; friend class document_stream; friend class document_reference; }; /** * A document_reference is a thin wrapper around a document reference instance. */ class document_reference { public: simdjson_inline document_reference() noexcept; simdjson_inline document_reference(document &d) noexcept; simdjson_inline document_reference(const document_reference &other) noexcept = default; simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; simdjson_inline void rewind() noexcept; simdjson_inline simdjson_result get_array() & noexcept; simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; simdjson_inline simdjson_result raw_json() noexcept; simdjson_inline operator document&() const noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator array() & noexcept(false); simdjson_inline operator object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); simdjson_inline operator value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) & noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; simdjson_inline bool is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; private: document *doc{nullptr}; }; } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public haswell::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(haswell::ondemand::document &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; simdjson_inline simdjson_result get_array() & noexcept; simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & noexcept; template simdjson_inline simdjson_result get() && noexcept; template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator haswell::ondemand::array() & noexcept(false); simdjson_inline operator haswell::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator haswell::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); simdjson_inline operator haswell::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) & noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; simdjson_inline bool at_end() const noexcept; simdjson_inline bool is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public haswell::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(haswell::ondemand::document_reference value, error_code error) noexcept; simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; simdjson_inline simdjson_result get_array() & noexcept; simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator haswell::ondemand::array() & noexcept(false); simdjson_inline operator haswell::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator haswell::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); simdjson_inline operator haswell::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) & noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H /* end file simdjson/generic/ondemand/document.h for haswell */ /* including simdjson/generic/ondemand/document_stream.h for haswell: #include "simdjson/generic/ondemand/document_stream.h" */ /* begin file simdjson/generic/ondemand/document_stream.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #ifdef SIMDJSON_THREADS_ENABLED #include #include #include #endif namespace simdjson { namespace haswell { namespace ondemand { #ifdef SIMDJSON_THREADS_ENABLED /** @private Custom worker class **/ struct stage1_worker { stage1_worker() noexcept = default; stage1_worker(const stage1_worker&) = delete; stage1_worker(stage1_worker&&) = delete; stage1_worker operator=(const stage1_worker&) = delete; ~stage1_worker(); /** * We only start the thread when it is needed, not at object construction, this may throw. * You should only call this once. **/ void start_thread(); /** * Start a stage 1 job. You should first call 'run', then 'finish'. * You must call start_thread once before. */ void run(document_stream * ds, parser * stage1, size_t next_batch_start); /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ void finish(); private: /** * Normally, we would never stop the thread. But we do in the destructor. * This function is only safe assuming that you are not waiting for results. You * should have called run, then finish, and be done. **/ void stop_thread(); std::thread thread{}; /** These three variables define the work done by the thread. **/ ondemand::parser * stage1_thread_parser{}; size_t _next_batch_start{}; document_stream * owner{}; /** * We have two state variables. This could be streamlined to one variable in the future but * we use two for clarity. */ bool has_work{false}; bool can_work{true}; /** * We lock using a mutex. */ std::mutex locking_mutex{}; std::condition_variable cond_var{}; friend class document_stream; }; #endif // SIMDJSON_THREADS_ENABLED /** * A forward-only stream of documents. * * Produced by parser::iterate_many. * */ class document_stream { public: /** * Construct an uninitialized document_stream. * * ```c++ * document_stream docs; * auto error = parser.iterate_many(json).get(docs); * ``` */ simdjson_inline document_stream() noexcept; /** Move one document_stream to another. */ simdjson_inline document_stream(document_stream &&other) noexcept = default; /** Move one document_stream to another. */ simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; simdjson_inline ~document_stream() noexcept; /** * Returns the input size in bytes. */ inline size_t size_in_bytes() const noexcept; /** * After iterating through the stream, this method * returns the number of bytes that were not parsed at the end * of the stream. If truncated_bytes() differs from zero, * then the input was truncated maybe because incomplete JSON * documents were found at the end of the stream. You * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). * * You should only call truncated_bytes() after streaming through all * documents, like so: * * document_stream stream = parser.iterate_many(json,window); * for(auto & doc : stream) { * // do something with doc * } * size_t truncated = stream.truncated_bytes(); * */ inline size_t truncated_bytes() const noexcept; class iterator { public: using value_type = simdjson_result; using reference = value_type; using difference_type = std::ptrdiff_t; using iterator_category = std::input_iterator_tag; /** * Default constructor. */ simdjson_inline iterator() noexcept; /** * Get the current document (or error). */ simdjson_inline simdjson_result operator*() noexcept; /** * Advance to the next document (prefix). */ inline iterator& operator++() noexcept; /** * Check if we're at the end yet. * @param other the end iterator to compare to. */ simdjson_inline bool operator!=(const iterator &other) const noexcept; /** * @private * * Gives the current index in the input document in bytes. * * document_stream stream = parser.parse_many(json,window); * for(auto i = stream.begin(); i != stream.end(); ++i) { * auto doc = *i; * size_t index = i.current_index(); * } * * This function (current_index()) is experimental and the usage * may change in future versions of simdjson: we find the API somewhat * awkward and we would like to offer something friendlier. */ simdjson_inline size_t current_index() const noexcept; /** * @private * * Gives a view of the current document at the current position. * * document_stream stream = parser.iterate_many(json,window); * for(auto i = stream.begin(); i != stream.end(); ++i) { * std::string_view v = i.source(); * } * * The returned string_view instance is simply a map to the (unparsed) * source string: it may thus include white-space characters and all manner * of padding. * * This function (source()) is experimental and the usage * may change in future versions of simdjson: we find the API somewhat * awkward and we would like to offer something friendlier. * */ simdjson_inline std::string_view source() const noexcept; /** * Returns error of the stream (if any). */ inline error_code error() const noexcept; private: simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ document_stream* stream; /** Whether we're finished or not. */ bool finished; friend class document; friend class document_stream; friend class json_iterator; }; /** * Start iterating the documents in the stream. */ simdjson_inline iterator begin() noexcept; /** * The end of the stream, for iterator comparison purposes. */ simdjson_inline iterator end() noexcept; private: document_stream &operator=(const document_stream &) = delete; // Disallow copying document_stream(const document_stream &other) = delete; // Disallow copying /** * Construct a document_stream. Does not allocate or parse anything until the iterator is * used. * * @param parser is a reference to the parser instance used to generate this document_stream * @param buf is the raw byte buffer we need to process * @param len is the length of the raw byte buffer in bytes * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) */ simdjson_inline document_stream( ondemand::parser &parser, const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated ) noexcept; /** * Parse the first document in the buffer. Used by begin(), to handle allocation and * initialization. */ inline void start() noexcept; /** * Parse the next document found in the buffer previously given to document_stream. * * The content should be a valid JSON document encoded as UTF-8. If there is a * UTF-8 BOM, the parser skips it. * * You do NOT need to pre-allocate a parser. This function takes care of * pre-allocating a capacity defined by the batch_size defined when creating the * document_stream object. * * The function returns simdjson::EMPTY if there is no more data to be parsed. * * The function returns simdjson::SUCCESS (as integer = 0) in case of success * and indicates that the buffer has successfully been parsed to the end. * Every document it contained has been parsed without error. * * The function returns an error code from simdjson/simdjson.h in case of failure * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; * the simdjson::error_message function converts these error codes into a string). * * You can also check validity by calling parser.is_valid(). The same parser can * and should be reused for the other documents in the buffer. */ inline void next() noexcept; /** Move the json_iterator of the document to the location of the next document in the stream. */ inline void next_document() noexcept; /** Get the next document index. */ inline size_t next_batch_start() const noexcept; /** Pass the next batch through stage 1 with the given parser. */ inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; // Fields ondemand::parser *parser; const uint8_t *buf; size_t len; size_t batch_size; bool allow_comma_separated; /** * We are going to use just one document instance. The document owns * the json_iterator. It implies that we only ever pass a reference * to the document to the users. */ document doc{}; /** The error (or lack thereof) from the current document. */ error_code error; size_t batch_start{0}; size_t doc_index{}; #ifdef SIMDJSON_THREADS_ENABLED /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ bool use_thread; inline void load_from_stage1_thread() noexcept; /** Start a thread to run stage 1 on the next batch. */ inline void start_stage1_thread() noexcept; /** Wait for the stage 1 thread to finish and capture the results. */ inline void finish_stage1_thread() noexcept; /** The error returned from the stage 1 thread. */ error_code stage1_thread_error{UNINITIALIZED}; /** The thread used to run stage 1 against the next batch in the background. */ std::unique_ptr worker{new(std::nothrow) stage1_worker()}; /** * The parser used to run stage 1 in the background. Will be swapped * with the regular parser when finished. */ ondemand::parser stage1_thread_parser{}; friend struct stage1_worker; #endif // SIMDJSON_THREADS_ENABLED friend class parser; friend class document; friend class json_iterator; friend struct simdjson_result; friend struct internal::simdjson_result_base; }; // document_stream } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public haswell::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(haswell::ondemand::document_stream &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H /* end file simdjson/generic/ondemand/document_stream.h for haswell */ /* including simdjson/generic/ondemand/field.h for haswell: #include "simdjson/generic/ondemand/field.h" */ /* begin file simdjson/generic/ondemand/field.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { /** * A JSON field (key/value pair) in an object. * * Returned from object iteration. * * Extends from std::pair so you can use C++ algorithms that rely on pairs. */ class field : public std::pair { public: /** * Create a new invalid field. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline field() noexcept; /** * Get the key as a string_view (for higher speed, consider raw_key). * We deliberately use a more cumbersome name (unescaped_key) to force users * to think twice about using it. * * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with * an unescaped C string: e.g., key() == "test". */ simdjson_inline raw_json_string key() const noexcept; /** * Get the field value. */ simdjson_inline ondemand::value &value() & noexcept; /** * @overload ondemand::value &ondemand::value() & noexcept */ simdjson_inline ondemand::value value() && noexcept; protected: simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; friend struct simdjson_result; friend class object_iterator; }; } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public haswell::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(haswell::ondemand::field &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; simdjson_inline simdjson_result key() noexcept; simdjson_inline simdjson_result value() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H /* end file simdjson/generic/ondemand/field.h for haswell */ /* including simdjson/generic/ondemand/object.h for haswell: #include "simdjson/generic/ondemand/object.h" */ /* begin file simdjson/generic/ondemand/object.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { /** * A forward-only JSON object field iterator. */ class object { public: /** * Create a new invalid object. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline object() noexcept = default; simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. The value instance you get * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to a * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() * is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. The value instance you get * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to a key * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node * as the root of its own JSON document. * * ondemand::parser parser; * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/foo/a/1") == 20 * * It is allowed for a key to be the empty string: * * ondemand::parser parser; * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * * Note that at_pointer() called on the document automatically calls the document's rewind * method between each call. It invalidates all previously accessed arrays, objects and values * that have not been consumed. Yet it is not the case when calling at_pointer on an object * instance: there is no rewind and no invalidation. * * You may call at_pointer more than once on an object, but each time the pointer is advanced * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding * key (as well as the current key) can no longer be used with following JSON pointer calls. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you * can iterate through the object more than once. If you unescape a string within * the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) */ inline simdjson_result reset() & noexcept; /** * This method scans the beginning of the object and checks whether the * object is empty. * The runtime complexity is constant time. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. */ inline simdjson_result is_empty() & noexcept; /** * This method scans the object and counts the number of key-value pairs. * The count_fields method should always be called before you have begun * iterating through the object: it is expected that you are pointing at * the beginning of the object. * The runtime complexity is linear in the size of the object. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an object is empty, it is more performant to use * the is_empty() method. * * Performance hint: You should only call count_fields() as a last * resort as it may require scanning the document twice or more. */ simdjson_inline simdjson_result count_fields() & noexcept; /** * Consumes the object and returns a string_view instance corresponding to the * object as represented in JSON. It points inside the original byte array containing * the JSON document. */ simdjson_inline simdjson_result raw_json() noexcept; protected: /** * Go to the end of the object, no matter where you are right now. */ simdjson_inline error_code consume() noexcept; static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; static simdjson_inline object resume(const value_iterator &iter) noexcept; simdjson_inline object(const value_iterator &iter) noexcept; simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; value_iterator iter{}; friend class value; friend class document; friend struct simdjson_result; }; } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public haswell::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(haswell::ondemand::object &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; inline simdjson_result raw_json() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H /* end file simdjson/generic/ondemand/object.h for haswell */ /* including simdjson/generic/ondemand/object_iterator.h for haswell: #include "simdjson/generic/ondemand/object_iterator.h" */ /* begin file simdjson/generic/ondemand/object_iterator.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { class object_iterator { public: /** * Create a new invalid object_iterator. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline object_iterator() noexcept = default; // // Iterator interface // // Reads key and value, yielding them to the user. // MUST ONLY BE CALLED ONCE PER ITERATION. simdjson_inline simdjson_result operator*() noexcept; // Assumes it's being compared with the end. true if depth < iter->depth. simdjson_inline bool operator==(const object_iterator &) const noexcept; // Assumes it's being compared with the end. true if depth >= iter->depth. simdjson_inline bool operator!=(const object_iterator &) const noexcept; // Checks for ']' and ',' simdjson_inline object_iterator &operator++() noexcept; private: /** * The underlying JSON iterator. * * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object * is first used, and never changes afterwards. */ value_iterator iter{}; simdjson_inline object_iterator(const value_iterator &iter) noexcept; friend struct simdjson_result; friend class object; }; } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public haswell::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(haswell::ondemand::object_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; // // Iterator interface // // Reads key and value, yielding them to the user. simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. // Assumes it's being compared with the end. true if depth < iter->depth. simdjson_inline bool operator==(const simdjson_result &) const noexcept; // Assumes it's being compared with the end. true if depth >= iter->depth. simdjson_inline bool operator!=(const simdjson_result &) const noexcept; // Checks for ']' and ',' simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H /* end file simdjson/generic/ondemand/object_iterator.h for haswell */ /* including simdjson/generic/ondemand/serialization.h for haswell: #include "simdjson/generic/ondemand/serialization.h" */ /* begin file simdjson/generic/ondemand/serialization.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { /** * Create a string-view instance out of a document instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ inline simdjson_result to_json_string(haswell::ondemand::document& x) noexcept; /** * Create a string-view instance out of a value instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. The value must * not have been accessed previously. It does not * validate the content. */ inline simdjson_result to_json_string(haswell::ondemand::value& x) noexcept; /** * Create a string-view instance out of an object instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ inline simdjson_result to_json_string(haswell::ondemand::object& x) noexcept; /** * Create a string-view instance out of an array instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ inline simdjson_result to_json_string(haswell::ondemand::array& x) noexcept; inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); } // namespace simdjson /** * We want to support argument-dependent lookup (ADL). * Hence we should define operator<< in the namespace * where the argument (here value, object, etc.) resides. * Credit: @madhur4127 * See https://github.com/simdjson/simdjson/issues/1768 */ namespace simdjson { namespace haswell { namespace ondemand { /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The element. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::value x); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The array. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::array value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The array. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document& value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document_reference& value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The object. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::object value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif }}} // namespace simdjson::haswell::ondemand #endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H /* end file simdjson/generic/ondemand/serialization.h for haswell */ // Inline definitions /* including simdjson/generic/ondemand/array-inl.h for haswell: #include "simdjson/generic/ondemand/array-inl.h" */ /* begin file simdjson/generic/ondemand/array-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { // // ### Live States // // While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is // always SUCCESS: // // - Start: This is the state when the array is first found and the iterator is just past the `{`. // In this state, at_start == true. // - Next: After we hand a scalar value to the user, or an array/object which they then fully // iterate over, the iterator is at the `,` before the next value (or `]`). In this state, // depth == iter->depth, at_start == false, and error == SUCCESS. // - Unfinished Business: When we hand an array/object to the user which they do not fully // iterate over, we need to finish that iteration by skipping child values until we reach the // Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. // // ## Error States // // In error states, we will yield exactly one more value before stopping. iter->depth == depth // and at_start is always false. We decrement after yielding the error, moving to the Finished // state. // // - Chained Error: When the array iterator is part of an error chain--for example, in // `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an // array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and // iter->depth == depth, and at_start == false. We decrement depth when we yield the error. // - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, // we flag that as an error and treat it exactly the same as a Chained Error. In this state, // error == TAPE_ERROR, iter->depth == depth, and at_start == false. // // ## Terminal State // // The terminal state has iter->depth < depth. at_start is always false. // // - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this // by decrementing depth. In this state, iter->depth < depth, at_start == false, and // error == SUCCESS. // simdjson_inline array::array(const value_iterator &_iter) noexcept : iter{_iter} { } simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { // We don't need to know if the array is empty to start iteration, but we do want to know if there // is an error--thus `simdjson_unused`. simdjson_unused bool has_value; SIMDJSON_TRY( iter.start_array().get(has_value) ); return array(iter); } simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { simdjson_unused bool has_value; SIMDJSON_TRY( iter.start_root_array().get(has_value) ); return array(iter); } simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { bool has_value; SIMDJSON_TRY(iter.started_array().get(has_value)); return array(iter); } simdjson_inline simdjson_result array::begin() noexcept { #if SIMDJSON_DEVELOPMENT_CHECKS if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif return array_iterator(iter); } simdjson_inline simdjson_result array::end() noexcept { return array_iterator(iter); } simdjson_inline error_code array::consume() noexcept { auto error = iter.json_iter().skip_child(iter.depth()-1); if(error) { iter.abandon(); } return error; } simdjson_inline simdjson_result array::raw_json() noexcept { const uint8_t * starting_point{iter.peek_start()}; auto error = consume(); if(error) { return error; } // After 'consume()', we could be left pointing just beyond the document, but that // is ok because we are not going to dereference the final pointer position, we just // use it to compute the length in bytes. const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_inline simdjson_result array::count_elements() & noexcept { size_t count{0}; // Important: we do not consume any of the values. for(simdjson_unused auto v : *this) { count++; } // The above loop will always succeed, but we want to report errors. if(iter.error()) { return iter.error(); } // We need to move back at the start because we expect users to iterate through // the array after counting the number of elements. iter.reset_array(); return count; } SIMDJSON_POP_DISABLE_WARNINGS simdjson_inline simdjson_result array::is_empty() & noexcept { bool is_not_empty; auto error = iter.reset_array().get(is_not_empty); if(error) { return error; } return !is_not_empty; } inline simdjson_result array::reset() & noexcept { return iter.reset_array(); } inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } json_pointer = json_pointer.substr(1); // - means "the append position" or "the element after the end of the array" // We don't support this, because we're returning a real element, not a position. if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } // Read the array index size_t array_index = 0; size_t i; for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { uint8_t digit = uint8_t(json_pointer[i] - '0'); // Check for non-digit in array index. If it's there, we're trying to get a field in an object if (digit > 9) { return INCORRECT_TYPE; } array_index = array_index*10 + digit; } // 0 followed by other digits is invalid if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" // Empty string is invalid; so is a "/" with no digits before it if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" // Get the child auto child = at(array_index); // If there is an error, it ends here if(child.error()) { return child; } // If there is a /, we're not done yet, call recursively. if (i < json_pointer.length()) { child = child.at_pointer(json_pointer.substr(i)); } return child; } simdjson_inline simdjson_result array::at(size_t index) noexcept { size_t i = 0; for (auto value : *this) { if (i == index) { return value; } i++; } return INDEX_OUT_OF_BOUNDS; } } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( haswell::ondemand::array &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result simdjson_result::begin() noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() noexcept { if (error()) { return error(); } return first.end(); } simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { if (error()) { return error(); } return first.is_empty(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H /* end file simdjson/generic/ondemand/array-inl.h for haswell */ /* including simdjson/generic/ondemand/array_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/array_iterator-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept : iter{_iter} {} simdjson_inline simdjson_result array_iterator::operator*() noexcept { if (iter.error()) { iter.abandon(); return iter.error(); } return value(iter.child()); } simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { return !(*this != other); } simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { return iter.is_open(); } simdjson_inline array_iterator &array_iterator::operator++() noexcept { error_code error; // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. if (( error = iter.error() )) { return *this; } if (( error = iter.skip_child() )) { return *this; } if (( error = iter.has_next_element().error() )) { return *this; } return *this; } } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( haswell::ondemand::array_iterator &&value ) noexcept : haswell::implementation_simdjson_result_base(std::forward(value)) { first.iter.assert_is_valid(); } simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : haswell::implementation_simdjson_result_base({}, error) { } simdjson_inline simdjson_result simdjson_result::operator*() noexcept { if (error()) { return error(); } return *first; } simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return !error(); } return first == other.first; } simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return error(); } return first != other.first; } simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { // Clear the error if there is one, so we don't yield it twice if (error()) { second = SUCCESS; return *this; } ++(first); return *this; } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H /* end file simdjson/generic/ondemand/array_iterator-inl.h for haswell */ /* including simdjson/generic/ondemand/document-inl.h for haswell: #include "simdjson/generic/ondemand/document-inl.h" */ /* begin file simdjson/generic/ondemand/document-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept : iter{std::forward(_iter)} { logger::log_start_value(iter, "document"); } simdjson_inline document document::start(json_iterator &&iter) noexcept { return document(std::forward(iter)); } inline void document::rewind() noexcept { iter.rewind(); } inline std::string document::to_debug_string() noexcept { return iter.to_string(); } inline simdjson_result document::current_location() const noexcept { return iter.current_location(); } inline int32_t document::current_depth() const noexcept { return iter.depth(); } inline bool document::at_end() const noexcept { return iter.at_end(); } inline bool document::is_alive() noexcept { return iter.is_alive(); } simdjson_inline value_iterator document::resume_value_iterator() noexcept { return value_iterator(&iter, 1, iter.root_position()); } simdjson_inline value_iterator document::get_root_value_iterator() noexcept { return resume_value_iterator(); } simdjson_inline simdjson_result document::start_or_resume_object() noexcept { if (iter.at_root()) { return get_object(); } else { return object::resume(resume_value_iterator()); } } simdjson_inline simdjson_result document::get_value() noexcept { // Make sure we start any arrays or objects before returning, so that start_root_() // gets called. // It is the convention throughout the code that the macro `SIMDJSON_DEVELOPMENT_CHECKS` determines whether // we check for OUT_OF_ORDER_ITERATION. Proper on::demand code should never trigger this error. #if SIMDJSON_DEVELOPMENT_CHECKS if (!iter.at_root()) { return OUT_OF_ORDER_ITERATION; } #endif // assert_at_root() serves two purposes: in Debug mode, whether or not // SIMDJSON_DEVELOPMENT_CHECKS is set or not, it checks that we are at the root of // the document (this will typically be redundant). In release mode, it generates // SIMDJSON_ASSUME statements to allow the compiler to make assumptions. iter.assert_at_root(); switch (*iter.peek()) { case '[': { // The following lines check that the document ends with ]. auto value_iterator = get_root_value_iterator(); auto error = value_iterator.check_root_array(); if(error) { return error; } return value(get_root_value_iterator()); } case '{': { // The following lines would check that the document ends with }. auto value_iterator = get_root_value_iterator(); auto error = value_iterator.check_root_object(); if(error) { return error; } return value(get_root_value_iterator()); } default: // Unfortunately, scalar documents are a special case in simdjson and they cannot // be safely converted to value instances. return SCALAR_DOCUMENT_AS_VALUE; } } simdjson_inline simdjson_result document::get_array() & noexcept { auto value = get_root_value_iterator(); return array::start_root(value); } simdjson_inline simdjson_result document::get_object() & noexcept { auto value = get_root_value_iterator(); return object::start_root(value); } /** * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should * give an error, so we check for trailing content. We want to disallow trailing * content. * Thus, in several implementations below, we pass a 'true' parameter value to * a get_root_value_iterator() method: this indicates that we disallow trailing content. */ simdjson_inline simdjson_result document::get_uint64() noexcept { return get_root_value_iterator().get_root_uint64(true); } simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { return get_root_value_iterator().get_root_uint64_in_string(true); } simdjson_inline simdjson_result document::get_int64() noexcept { return get_root_value_iterator().get_root_int64(true); } simdjson_inline simdjson_result document::get_int64_in_string() noexcept { return get_root_value_iterator().get_root_int64_in_string(true); } simdjson_inline simdjson_result document::get_double() noexcept { return get_root_value_iterator().get_root_double(true); } simdjson_inline simdjson_result document::get_double_in_string() noexcept { return get_root_value_iterator().get_root_double_in_string(true); } simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { return get_root_value_iterator().get_root_string(true, allow_replacement); } template simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); } simdjson_inline simdjson_result document::get_wobbly_string() noexcept { return get_root_value_iterator().get_root_wobbly_string(true); } simdjson_inline simdjson_result document::get_raw_json_string() noexcept { return get_root_value_iterator().get_root_raw_json_string(true); } simdjson_inline simdjson_result document::get_bool() noexcept { return get_root_value_iterator().get_root_bool(true); } simdjson_inline simdjson_result document::is_null() noexcept { return get_root_value_iterator().is_root_null(true); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } template simdjson_inline error_code document::get(T &out) & noexcept { return get().get(out); } template simdjson_inline error_code document::get(T &out) && noexcept { return std::forward(*this).get().get(out); } #if SIMDJSON_EXCEPTIONS simdjson_inline document::operator array() & noexcept(false) { return get_array(); } simdjson_inline document::operator object() & noexcept(false) { return get_object(); } simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document::operator double() noexcept(false) { return get_double(); } simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document::operator value() noexcept(false) { return get_value(); } #endif simdjson_inline simdjson_result document::count_elements() & noexcept { auto a = get_array(); simdjson_result answer = a.count_elements(); /* If there was an array, we are now left pointing at its first element. */ if(answer.error() == SUCCESS) { rewind(); } return answer; } simdjson_inline simdjson_result document::count_fields() & noexcept { auto a = get_object(); simdjson_result answer = a.count_fields(); /* If there was an object, we are now left pointing at its first element. */ if(answer.error() == SUCCESS) { rewind(); } return answer; } simdjson_inline simdjson_result document::at(size_t index) & noexcept { auto a = get_array(); return a.at(index); } simdjson_inline simdjson_result document::begin() & noexcept { return get_array().begin(); } simdjson_inline simdjson_result document::end() & noexcept { return {}; } simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { return start_or_resume_object()[key]; } simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { return start_or_resume_object()[key]; } simdjson_inline error_code document::consume() noexcept { auto error = iter.skip_child(0); if(error) { iter.abandon(); } return error; } simdjson_inline simdjson_result document::raw_json() noexcept { auto _iter = get_root_value_iterator(); const uint8_t * starting_point{_iter.peek_start()}; auto error = consume(); if(error) { return error; } // After 'consume()', we could be left pointing just beyond the document, but that // is ok because we are not going to dereference the final pointer position, we just // use it to compute the length in bytes. const uint8_t * final_point{iter.unsafe_pointer()}; return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } simdjson_inline simdjson_result document::type() noexcept { return get_root_value_iterator().type(); } simdjson_inline simdjson_result document::is_scalar() noexcept { json_type this_type; auto error = type().get(this_type); if(error) { return error; } return ! ((this_type == json_type::array) || (this_type == json_type::object)); } simdjson_inline bool document::is_negative() noexcept { return get_root_value_iterator().is_root_negative(); } simdjson_inline simdjson_result document::is_integer() noexcept { return get_root_value_iterator().is_root_integer(true); } simdjson_inline simdjson_result document::get_number_type() noexcept { return get_root_value_iterator().get_root_number_type(true); } simdjson_inline simdjson_result document::get_number() noexcept { return get_root_value_iterator().get_root_number(true); } simdjson_inline simdjson_result document::raw_json_token() noexcept { auto _iter = get_root_value_iterator(); return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); } simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { rewind(); // Rewind the document each time at_pointer is called if (json_pointer.empty()) { return this->get_value(); } json_type t; SIMDJSON_TRY(type().get(t)); switch (t) { case json_type::array: return (*this).get_array().at_pointer(json_pointer); case json_type::object: return (*this).get_object().at_pointer(json_pointer); default: return INVALID_JSON_POINTER; } } } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( haswell::ondemand::document &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base( error ) { } simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } template simdjson_inline simdjson_result simdjson_result::get() & noexcept { if (error()) { return error(); } return first.get(); } template simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first).get(); } template simdjson_inline error_code simdjson_result::get(T &out) & noexcept { if (error()) { return error(); } return first.get(out); } template simdjson_inline error_code simdjson_result::get(T &out) && noexcept { if (error()) { return error(); } return std::forward(first).get(out); } template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first); } template<> simdjson_inline error_code simdjson_result::get(haswell::ondemand::document &out) & noexcept = delete; template<> simdjson_inline error_code simdjson_result::get(haswell::ondemand::document &out) && noexcept { if (error()) { return error(); } out = std::forward(first); return SUCCESS; } simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } simdjson_inline bool simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS simdjson_inline simdjson_result::operator haswell::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator haswell::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator haswell::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator haswell::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } simdjson_inline bool simdjson_result::at_end() const noexcept { if (error()) { return error(); } return first.at_end(); } simdjson_inline int32_t simdjson_result::current_depth() const noexcept { if (error()) { return error(); } return first.current_depth(); } simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } } // namespace simdjson namespace simdjson { namespace haswell { namespace ondemand { simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } /** * The document_reference instances are used primarily/solely for streams of JSON * documents. * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should * give an error, so we check for trailing content. * * However, for streams of JSON documents, we want to be able to start from * "321" "321" "321" * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() * successfully each time. * * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: * this indicates that we allow trailing content. */ simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } template simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } #if SIMDJSON_EXCEPTIONS simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } #endif simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} simdjson_inline document_reference::operator document&() const noexcept { return *doc; } } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::document_reference value, error_code error) noexcept : implementation_simdjson_result_base(std::forward(value), error) {} simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS simdjson_inline simdjson_result::operator haswell::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator haswell::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator haswell::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator haswell::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H /* end file simdjson/generic/ondemand/document-inl.h for haswell */ /* including simdjson/generic/ondemand/document_stream-inl.h for haswell: #include "simdjson/generic/ondemand/document_stream-inl.h" */ /* begin file simdjson/generic/ondemand/document_stream-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #include namespace simdjson { namespace haswell { namespace ondemand { #ifdef SIMDJSON_THREADS_ENABLED inline void stage1_worker::finish() { // After calling "run" someone would call finish() to wait // for the end of the processing. // This function will wait until either the thread has done // the processing or, else, the destructor has been called. std::unique_lock lock(locking_mutex); cond_var.wait(lock, [this]{return has_work == false;}); } inline stage1_worker::~stage1_worker() { // The thread may never outlive the stage1_worker instance // and will always be stopped/joined before the stage1_worker // instance is gone. stop_thread(); } inline void stage1_worker::start_thread() { std::unique_lock lock(locking_mutex); if(thread.joinable()) { return; // This should never happen but we never want to create more than one thread. } thread = std::thread([this]{ while(true) { std::unique_lock thread_lock(locking_mutex); // We wait for either "run" or "stop_thread" to be called. cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); // If, for some reason, the stop_thread() method was called (i.e., the // destructor of stage1_worker is called, then we want to immediately destroy // the thread (and not do any more processing). if(!can_work) { break; } this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, this->_next_batch_start); this->has_work = false; // The condition variable call should be moved after thread_lock.unlock() for performance // reasons but thread sanitizers may report it as a data race if we do. // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock cond_var.notify_one(); // will notify "finish" thread_lock.unlock(); } } ); } inline void stage1_worker::stop_thread() { std::unique_lock lock(locking_mutex); // We have to make sure that all locks can be released. can_work = false; has_work = false; cond_var.notify_all(); lock.unlock(); if(thread.joinable()) { thread.join(); } } inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { std::unique_lock lock(locking_mutex); owner = ds; _next_batch_start = next_batch_start; stage1_thread_parser = stage1; has_work = true; // The condition variable call should be moved after thread_lock.unlock() for performance // reasons but thread sanitizers may report it as a data race if we do. // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock cond_var.notify_one(); // will notify the thread lock that we have work lock.unlock(); } #endif // SIMDJSON_THREADS_ENABLED simdjson_inline document_stream::document_stream( ondemand::parser &_parser, const uint8_t *_buf, size_t _len, size_t _batch_size, bool _allow_comma_separated ) noexcept : parser{&_parser}, buf{_buf}, len{_len}, batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, allow_comma_separated{_allow_comma_separated}, error{SUCCESS} #ifdef SIMDJSON_THREADS_ENABLED , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change #endif { #ifdef SIMDJSON_THREADS_ENABLED if(worker.get() == nullptr) { error = MEMALLOC; } #endif } simdjson_inline document_stream::document_stream() noexcept : parser{nullptr}, buf{nullptr}, len{0}, batch_size{0}, allow_comma_separated{false}, error{UNINITIALIZED} #ifdef SIMDJSON_THREADS_ENABLED , use_thread(false) #endif { } simdjson_inline document_stream::~document_stream() noexcept { #ifdef SIMDJSON_THREADS_ENABLED worker.reset(); #endif } inline size_t document_stream::size_in_bytes() const noexcept { return len; } inline size_t document_stream::truncated_bytes() const noexcept { if(error == CAPACITY) { return len - batch_start; } return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; } simdjson_inline document_stream::iterator::iterator() noexcept : stream{nullptr}, finished{true} { } simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept : stream{_stream}, finished{is_end} { } simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { //if(stream->error) { return stream->error; } return simdjson_result(stream->doc, stream->error); } simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { // If there is an error, then we want the iterator // to be finished, no matter what. (E.g., we do not // keep generating documents with errors, or go beyond // a document with errors.) // // Users do not have to call "operator*()" when they use operator++, // so we need to end the stream in the operator++ function. // // Note that setting finished = true is essential otherwise // we would enter an infinite loop. if (stream->error) { finished = true; } // Note that stream->error() is guarded against error conditions // (it will immediately return if stream->error casts to false). // In effect, this next function does nothing when (stream->error) // is true (hence the risk of an infinite loop). stream->next(); // If that was the last document, we're finished. // It is the only type of error we do not want to appear // in operator*. if (stream->error == EMPTY) { finished = true; } // If we had any other kind of error (not EMPTY) then we want // to pass it along to the operator* and we cannot mark the result // as "finished" just yet. return *this; } simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { return finished != other.finished; } simdjson_inline document_stream::iterator document_stream::begin() noexcept { start(); // If there are no documents, we're finished. return iterator(this, error == EMPTY); } simdjson_inline document_stream::iterator document_stream::end() noexcept { return iterator(this, true); } inline void document_stream::start() noexcept { if (error) { return; } error = parser->allocate(batch_size); if (error) { return; } // Always run the first stage 1 parse immediately batch_start = 0; error = run_stage1(*parser, batch_start); while(error == EMPTY) { // In exceptional cases, we may start with an empty block batch_start = next_batch_start(); if (batch_start >= len) { return; } error = run_stage1(*parser, batch_start); } if (error) { return; } doc_index = batch_start; doc = document(json_iterator(&buf[batch_start], parser)); doc.iter._streaming = true; #ifdef SIMDJSON_THREADS_ENABLED if (use_thread && next_batch_start() < len) { // Kick off the first thread on next batch if needed error = stage1_thread_parser.allocate(batch_size); if (error) { return; } worker->start_thread(); start_stage1_thread(); if (error) { return; } } #endif // SIMDJSON_THREADS_ENABLED } inline void document_stream::next() noexcept { // We always enter at once once in an error condition. if (error) { return; } next_document(); if (error) { return; } auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; // Check if at end of structural indexes (i.e. at end of batch) if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { error = EMPTY; // Load another batch (if available) while (error == EMPTY) { batch_start = next_batch_start(); if (batch_start >= len) { break; } #ifdef SIMDJSON_THREADS_ENABLED if(use_thread) { load_from_stage1_thread(); } else { error = run_stage1(*parser, batch_start); } #else error = run_stage1(*parser, batch_start); #endif /** * Whenever we move to another window, we need to update all pointers to make * it appear as if the input buffer started at the beginning of the window. * * Take this input: * * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] * * Say you process the following window... * * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' * * When you do so, the json_iterator has a pointer at the beginning of the memory region * (pointing at the beginning of '{"z"...'. * * When you move to the window that starts at... * * '[7, 10, 9] [15, 11, 12, 13] ... * * then it is not sufficient to just run stage 1. You also need to re-anchor the * json_iterator so that it believes we are starting at '[7, 10, 9]...'. * * Under the DOM front-end, this gets done automatically because the parser owns * the pointer the data, and when you call stage1 and then stage2 on the same * parser, then stage2 will run on the pointer acquired by stage1. * * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that * we used. But json_iterator has no callback when stage1 is called on the parser. * In fact, I think that the parser is unaware of json_iterator. * * * So we need to re-anchor the json_iterator after each call to stage 1 so that * all of the pointers are in sync. */ doc.iter = json_iterator(&buf[batch_start], parser); doc.iter._streaming = true; /** * End of resync. */ if (error) { continue; } // If the error was EMPTY, we may want to load another batch. doc_index = batch_start; } } } inline void document_stream::next_document() noexcept { // Go to next place where depth=0 (document depth) error = doc.iter.skip_child(0); if (error) { return; } // Always set depth=1 at the start of document doc.iter._depth = 1; // consume comma if comma separated is allowed if (allow_comma_separated) { doc.iter.consume_character(','); } // Resets the string buffer at the beginning, thus invalidating the strings. doc.iter._string_buf_loc = parser->string_buf.get(); doc.iter._root = doc.iter.position(); } inline size_t document_stream::next_batch_start() const noexcept { return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; } inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { // This code only updates the structural index in the parser, it does not update any json_iterator // instance. size_t remaining = len - _batch_start; if (remaining <= batch_size) { return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); } else { return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); } } simdjson_inline size_t document_stream::iterator::current_index() const noexcept { return stream->doc_index; } simdjson_inline std::string_view document_stream::iterator::source() const noexcept { auto depth = stream->doc.iter.depth(); auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); // If at root, process the first token to determine if scalar value if (stream->doc.iter.at_root()) { switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { case '{': case '[': // Depth=1 already at start of document break; case '}': case ']': depth--; break; default: // Scalar value document // TODO: Remove any trailing whitespaces // This returns a string spanning from start of value to the beginning of the next document (excluded) return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1); } cur_struct_index++; } while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { case '{': case '[': depth++; break; case '}': case ']': depth--; break; } if (depth == 0) { break; } cur_struct_index++; } return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; } inline error_code document_stream::iterator::error() const noexcept { return stream->error; } #ifdef SIMDJSON_THREADS_ENABLED inline void document_stream::load_from_stage1_thread() noexcept { worker->finish(); // Swap to the parser that was loaded up in the thread. Make sure the parser has // enough memory to swap to, as well. std::swap(stage1_thread_parser,*parser); error = stage1_thread_error; if (error) { return; } // If there's anything left, start the stage 1 thread! if (next_batch_start() < len) { start_stage1_thread(); } } inline void document_stream::start_stage1_thread() noexcept { // we call the thread on a lambda that will update // this->stage1_thread_error // there is only one thread that may write to this value // TODO this is NOT exception-safe. this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error size_t _next_batch_start = this->next_batch_start(); worker->run(this, & this->stage1_thread_parser, _next_batch_start); } #endif // SIMDJSON_THREADS_ENABLED } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result::simdjson_result( haswell::ondemand::document_stream &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } } #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H /* end file simdjson/generic/ondemand/document_stream-inl.h for haswell */ /* including simdjson/generic/ondemand/field-inl.h for haswell: #include "simdjson/generic/ondemand/field-inl.h" */ /* begin file simdjson/generic/ondemand/field-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { // clang 6 doesn't think the default constructor can be noexcept, so we make it explicit simdjson_inline field::field() noexcept : std::pair() {} simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept : std::pair(key, std::forward(value)) { } simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { raw_json_string key; SIMDJSON_TRY( parent_iter.field_key().get(key) ); SIMDJSON_TRY( parent_iter.field_value() ); return field::start(parent_iter, key); } simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { return field(key, parent_iter.child()); } simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); first.consume(); return answer; } simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; } simdjson_inline value &field::value() & noexcept { return second; } simdjson_inline value field::value() && noexcept { return std::forward(*this).second; } } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( haswell::ondemand::field &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result simdjson_result::key() noexcept { if (error()) { return error(); } return first.key(); } simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { if (error()) { return error(); } return first.unescaped_key(allow_replacement); } simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H /* end file simdjson/generic/ondemand/field-inl.h for haswell */ /* including simdjson/generic/ondemand/json_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/json_iterator-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept : token(std::forward(other.token)), parser{other.parser}, _string_buf_loc{other._string_buf_loc}, error{other.error}, _depth{other._depth}, _root{other._root}, _streaming{other._streaming} { other.parser = nullptr; } simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { token = other.token; parser = other.parser; _string_buf_loc = other._string_buf_loc; error = other.error; _depth = other._depth; _root = other._root; _streaming = other._streaming; other.parser = nullptr; return *this; } simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept : token(buf, &_parser->implementation->structural_indexes[0]), parser{_parser}, _string_buf_loc{parser->string_buf.get()}, _depth{1}, _root{parser->implementation->structural_indexes.get()}, _streaming{false} { logger::log_headers(); #if SIMDJSON_CHECK_EOF assert_more_tokens(); #endif } inline void json_iterator::rewind() noexcept { token.set_position( root_position() ); logger::log_headers(); // We start again _string_buf_loc = parser->string_buf.get(); _depth = 1; } inline bool json_iterator::balanced() const noexcept { token_iterator ti(token); int32_t count{0}; ti.set_position( root_position() ); while(ti.peek() <= peek_last()) { switch (*ti.return_current_and_advance()) { case '[': case '{': count++; break; case ']': case '}': count--; break; default: break; } } return count == 0; } // GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller // relating depth and parent_depth, which is a desired effect. The warning does not show up if the // skip_child() function is not marked inline). SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { if (depth() <= parent_depth) { return SUCCESS; } switch (*return_current_and_advance()) { // TODO consider whether matching braces is a requirement: if non-matching braces indicates // *missing* braces, then future lookups are not in the object/arrays they think they are, // violating the rule "validate enough structure that the user can be confident they are // looking at the right values." // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth // For the first open array/object in a value, we've already incremented depth, so keep it the same // We never stop at colon, but if we did, it wouldn't affect depth case '[': case '{': case ':': logger::log_start_value(*this, "skip"); break; // If there is a comma, we have just finished a value in an array/object, and need to get back in case ',': logger::log_value(*this, "skip"); break; // ] or } means we just finished a value and need to jump out of the array/object case ']': case '}': logger::log_end_value(*this, "skip"); _depth--; if (depth() <= parent_depth) { return SUCCESS; } #if SIMDJSON_CHECK_EOF // If there are no more tokens, the parent is incomplete. if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } #endif // SIMDJSON_CHECK_EOF break; case '"': if(*peek() == ':') { // We are at a key!!! // This might happen if you just started an object and you skip it immediately. // Performance note: it would be nice to get rid of this check as it is somewhat // expensive. // https://github.com/simdjson/simdjson/issues/1742 logger::log_value(*this, "key"); return_current_and_advance(); // eat up the ':' break; // important!!! } simdjson_fallthrough; // Anything else must be a scalar value default: // For the first scalar, we will have incremented depth already, so we decrement it here. logger::log_value(*this, "skip"); _depth--; if (depth() <= parent_depth) { return SUCCESS; } break; } // Now that we've considered the first value, we only increment/decrement for arrays/objects while (position() < end_position()) { switch (*return_current_and_advance()) { case '[': case '{': logger::log_start_value(*this, "skip"); _depth++; break; // TODO consider whether matching braces is a requirement: if non-matching braces indicates // *missing* braces, then future lookups are not in the object/arrays they think they are, // violating the rule "validate enough structure that the user can be confident they are // looking at the right values." // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth case ']': case '}': logger::log_end_value(*this, "skip"); _depth--; if (depth() <= parent_depth) { return SUCCESS; } break; default: logger::log_value(*this, "skip", ""); break; } } return report_error(TAPE_ERROR, "not enough close braces"); } SIMDJSON_POP_DISABLE_WARNINGS simdjson_inline bool json_iterator::at_root() const noexcept { return position() == root_position(); } simdjson_inline bool json_iterator::is_single_token() const noexcept { return parser->implementation->n_structural_indexes == 1; } simdjson_inline bool json_iterator::streaming() const noexcept { return _streaming; } simdjson_inline token_position json_iterator::root_position() const noexcept { return _root; } simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { SIMDJSON_ASSUME( _depth == 1 ); } simdjson_inline void json_iterator::assert_at_root() const noexcept { SIMDJSON_ASSUME( _depth == 1 ); #ifndef SIMDJSON_CLANG_VISUAL_STUDIO // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument // has side effects that will be discarded. SIMDJSON_ASSUME( token.position() == _root ); #endif } simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { assert_valid_position(token._position + required_tokens - 1); } simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { #ifndef SIMDJSON_CLANG_VISUAL_STUDIO SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); #endif } simdjson_inline bool json_iterator::at_end() const noexcept { return position() == end_position(); } simdjson_inline token_position json_iterator::end_position() const noexcept { uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; return &parser->implementation->structural_indexes[n_structural_indexes]; } inline std::string json_iterator::to_string() const noexcept { if( !is_alive() ) { return "dead json_iterator instance"; } const char * current_structural = reinterpret_cast(token.peek()); return std::string("json_iterator [ depth : ") + std::to_string(_depth) + std::string(", structural : '") + std::string(current_structural,1) + std::string("', offset : ") + std::to_string(token.current_offset()) + std::string("', error : ") + error_message(error) + std::string(" ]"); } inline simdjson_result json_iterator::current_location() const noexcept { if (!is_alive()) { // Unrecoverable error if (!at_root()) { return reinterpret_cast(token.peek(-1)); } else { return reinterpret_cast(token.peek()); } } if (at_end()) { return OUT_OF_BOUNDS; } return reinterpret_cast(token.peek()); } simdjson_inline bool json_iterator::is_alive() const noexcept { return parser; } simdjson_inline void json_iterator::abandon() noexcept { parser = nullptr; _depth = 0; } simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(); #endif // SIMDJSON_CHECK_EOF return token.return_current_and_advance(); } simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { // deliberately done without safety guard: return token.peek(); } simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(delta+1); #endif // SIMDJSON_CHECK_EOF return token.peek(delta); } simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(delta+1); #endif // #if SIMDJSON_CHECK_EOF return token.peek_length(delta); } simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { // todo: currently we require end-of-string buffering, but the following // assert_valid_position should be turned on if/when we lift that condition. // assert_valid_position(position); // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF // is ON by default, we have no choice but to disable it for real with a comment. return token.peek(position); } simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { #if SIMDJSON_CHECK_EOF assert_valid_position(position); #endif // SIMDJSON_CHECK_EOF return token.peek_length(position); } simdjson_inline token_position json_iterator::last_position() const noexcept { // The following line fails under some compilers... // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); // since it has side-effects. uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; SIMDJSON_ASSUME(n_structural_indexes > 0); return &parser->implementation->structural_indexes[n_structural_indexes - 1]; } simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { return token.peek(last_position()); } simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); SIMDJSON_ASSUME(_depth == parent_depth + 1); _depth = parent_depth; } simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); SIMDJSON_ASSUME(_depth == child_depth - 1); _depth = child_depth; } simdjson_inline depth_t json_iterator::depth() const noexcept { return _depth; } simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { return _string_buf_loc; } simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); logger::log_error(*this, message); error = _error; return error; } simdjson_inline token_position json_iterator::position() const noexcept { return token.position(); } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { return parser->unescape(in, _string_buf_loc, allow_replacement); } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { return parser->unescape_wobbly(in, _string_buf_loc); } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); SIMDJSON_ASSUME(_depth == child_depth - 1); #if SIMDJSON_DEVELOPMENT_CHECKS #ifndef SIMDJSON_CLANG_VISUAL_STUDIO SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); #endif #endif token.set_position(position); _depth = child_depth; } simdjson_inline error_code json_iterator::consume_character(char c) noexcept { if (*peek() == c) { return_current_and_advance(); return SUCCESS; } return TAPE_ERROR; } #if SIMDJSON_DEVELOPMENT_CHECKS simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; } simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } } #endif simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); logger::log_error(*this, message); return _error; } simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { // This function is not expected to be called in performance-sensitive settings. // Let us guard against silly cases: if((N < max_len) || (N == 0)) { return false; } // Copy to the buffer. std::memcpy(tmpbuf, json, max_len); if(N > max_len) { // We pad whatever remains with ' '. std::memset(tmpbuf + max_len, ' ', N - max_len); } return true; } } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::json_iterator &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H /* end file simdjson/generic/ondemand/json_iterator-inl.h for haswell */ /* including simdjson/generic/ondemand/json_type-inl.h for haswell: #include "simdjson/generic/ondemand/json_type-inl.h" */ /* begin file simdjson/generic/ondemand/json_type-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { switch (type) { case json_type::array: out << "array"; break; case json_type::object: out << "object"; break; case json_type::number: out << "number"; break; case json_type::string: out << "string"; break; case json_type::boolean: out << "boolean"; break; case json_type::null: out << "null"; break; default: SIMDJSON_UNREACHABLE(); } return out; } #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { return out << type.value(); } #endif simdjson_inline number_type number::get_number_type() const noexcept { return type; } simdjson_inline bool number::is_uint64() const noexcept { return get_number_type() == number_type::unsigned_integer; } simdjson_inline uint64_t number::get_uint64() const noexcept { return payload.unsigned_integer; } simdjson_inline number::operator uint64_t() const noexcept { return get_uint64(); } simdjson_inline bool number::is_int64() const noexcept { return get_number_type() == number_type::signed_integer; } simdjson_inline int64_t number::get_int64() const noexcept { return payload.signed_integer; } simdjson_inline number::operator int64_t() const noexcept { return get_int64(); } simdjson_inline bool number::is_double() const noexcept { return get_number_type() == number_type::floating_point_number; } simdjson_inline double number::get_double() const noexcept { return payload.floating_point_number; } simdjson_inline number::operator double() const noexcept { return get_double(); } simdjson_inline double number::as_double() const noexcept { if(is_double()) { return payload.floating_point_number; } if(is_int64()) { return double(payload.signed_integer); } return double(payload.unsigned_integer); } simdjson_inline void number::append_s64(int64_t value) noexcept { payload.signed_integer = value; type = number_type::signed_integer; } simdjson_inline void number::append_u64(uint64_t value) noexcept { payload.unsigned_integer = value; type = number_type::unsigned_integer; } simdjson_inline void number::append_double(double value) noexcept { payload.floating_point_number = value; type = number_type::floating_point_number; } simdjson_inline void number::skip_double() noexcept { type = number_type::floating_point_number; } } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::json_type &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H /* end file simdjson/generic/ondemand/json_type-inl.h for haswell */ /* including simdjson/generic/ondemand/logger-inl.h for haswell: #include "simdjson/generic/ondemand/logger-inl.h" */ /* begin file simdjson/generic/ondemand/logger-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #include namespace simdjson { namespace haswell { namespace ondemand { namespace logger { static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; static constexpr const int LOG_EVENT_LEN = 20; static constexpr const int LOG_BUFFER_LEN = 30; static constexpr const int LOG_SMALL_BUFFER_LEN = 10; static int log_depth = 0; // Not threadsafe. Log only. // Helper to turn unprintable or newline characters into spaces static inline char printable_char(char c) { if (c >= 0x20) { return c; } else { return ' '; } } template static inline std::string string_format(const std::string& format, const Args&... args) { SIMDJSON_PUSH_DISABLE_ALL_WARNINGS int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; auto size = static_cast(size_s); if (size <= 0) return std::string(); std::unique_ptr buf(new char[size]); std::snprintf(buf.get(), size, format.c_str(), args...); SIMDJSON_POP_DISABLE_WARNINGS return std::string(buf.get(), buf.get() + size - 1); } static inline log_level get_log_level_from_env() { SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe char *lvl = getenv("SIMDJSON_LOG_LEVEL"); SIMDJSON_POP_DISABLE_WARNINGS if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } return log_level::info; } static inline log_level log_threshold() { static log_level threshold = get_log_level_from_env(); return threshold; } static inline bool should_log(log_level level) { return level >= log_threshold(); } inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { log_line(iter, index, depth, "", type, detail, log_level::info); } inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { log_line(iter, index, depth, "+", type, detail, log_level::info); if (LOG_ENABLED) { log_depth++; } } inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { log_line(iter, "+", type, "", delta, depth_delta, log_level::info); if (LOG_ENABLED) { log_depth++; } } inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { if (LOG_ENABLED) { log_depth--; } log_line(iter, "-", type, "", delta, depth_delta, log_level::info); } inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); } inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); } inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_event(iter.json_iter(), type, detail, delta, depth_delta); } inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_value(iter.json_iter(), type, detail, delta, depth_delta); } inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { log_start_value(iter.json_iter(), type, delta, depth_delta); } inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { log_end_value(iter.json_iter(), type, delta, depth_delta); } inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { log_error(iter.json_iter(), error, detail, delta, depth_delta); } inline void log_headers() noexcept { if (LOG_ENABLED) { if (simdjson_unlikely(should_log(log_level::info))) { // Technically a static variable is not thread-safe, but if you are using threads and logging... well... static bool displayed_hint{false}; log_depth = 0; printf("\n"); if (!displayed_hint) { // We only print this helpful header once. printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); printf("# +array says 'this is where we were when we discovered the start array'\n"); printf( "# -array says 'this is where we were when we ended the array'\n"); printf("# skip says 'this is a structural or value I am skipping'\n"); printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); printf("#\n"); printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); printf("# in addition to the depth being displayed.\n"); printf("#\n"); printf("# Every token in the document has a single depth determined by the tokens before it,\n"); printf("# and is not affected by what the token actually is.\n"); printf("#\n"); printf("# Not all structural elements are presented as tokens in the logs.\n"); printf("#\n"); printf("# We never give control to the user within an empty array or an empty object.\n"); printf("#\n"); printf("# Inside an array, having a depth greater than the array's depth means that\n"); printf("# we are pointing inside a value.\n"); printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); displayed_hint = true; } printf("\n"); printf("| %-*s ", LOG_EVENT_LEN, "Event"); printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); // printf("| %-*s ", 5, "Next#"); printf("| %-*s ", 5, "Depth"); printf("| Detail "); printf("|\n"); printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); // printf("|%.*s", 5+2, DASHES); printf("|%.*s", 5 + 2, DASHES); printf("|--------"); printf("|\n"); fflush(stdout); } } } template inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); } template inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { if (LOG_ENABLED) { if (simdjson_unlikely(should_log(level))) { const int indent = depth * 2; const auto buf = iter.token.buf; auto msg = string_format(title, std::forward(args)...); printf("| %*s%s%-*s ", indent, "", title_prefix, LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); { // Print the current structural. printf("| "); // Before we begin, the index might point right before the document. // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 if (index < iter._root) { printf("%*s", LOG_BUFFER_LEN, ""); } else { auto current_structural = &buf[*index]; for (int i = 0; i < LOG_BUFFER_LEN; i++) { printf("%c", printable_char(current_structural[i])); } } printf(" "); } { // Print the next structural. printf("| "); auto next_structural = &buf[*(index + 1)]; for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { printf("%c", printable_char(next_structural[i])); } printf(" "); } // printf("| %5u ", *(index+1)); printf("| %5i ", depth); printf("| %6.*s ", int(detail.size()), detail.data()); printf("|\n"); fflush(stdout); } } } } // namespace logger } // namespace ondemand } // namespace haswell } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* end file simdjson/generic/ondemand/logger-inl.h for haswell */ /* including simdjson/generic/ondemand/object-inl.h for haswell: #include "simdjson/generic/ondemand/object-inl.h" */ /* begin file simdjson/generic/ondemand/object-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { return find_field_unordered(key); } simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { return std::forward(*this).find_field_unordered(key); } simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { SIMDJSON_TRY( iter.start_object().error() ); return object(iter); } simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { SIMDJSON_TRY( iter.start_root_object().error() ); return object(iter); } simdjson_inline error_code object::consume() noexcept { if(iter.is_at_key()) { /** * whenever you are pointing at a key, calling skip_child() is * unsafe because you will hit a string and you will assume that * it is string value, and this mistake will lead you to make bad * depth computation. */ /** * We want to 'consume' the key. We could really * just do _json_iter->return_current_and_advance(); at this * point, but, for clarity, we will use the high-level API to * eat the key. We assume that the compiler optimizes away * most of the work. */ simdjson_unused raw_json_string actual_key; auto error = iter.field_key().get(actual_key); if (error) { iter.abandon(); return error; }; // Let us move to the value while we are at it. if ((error = iter.field_value())) { iter.abandon(); return error; } } auto error_skip = iter.json_iter().skip_child(iter.depth()-1); if(error_skip) { iter.abandon(); } return error_skip; } simdjson_inline simdjson_result object::raw_json() noexcept { const uint8_t * starting_point{iter.peek_start()}; auto error = consume(); if(error) { return error; } const uint8_t * final_point{iter._json_iter->peek()}; return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { SIMDJSON_TRY( iter.started_object().error() ); return object(iter); } simdjson_inline object object::resume(const value_iterator &iter) noexcept { return iter; } simdjson_inline object::object(const value_iterator &_iter) noexcept : iter{_iter} { } simdjson_inline simdjson_result object::begin() noexcept { #if SIMDJSON_DEVELOPMENT_CHECKS if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif return object_iterator(iter); } simdjson_inline simdjson_result object::end() noexcept { return object_iterator(iter); } inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } json_pointer = json_pointer.substr(1); size_t slash = json_pointer.find('/'); std::string_view key = json_pointer.substr(0, slash); // Grab the child with the given key simdjson_result child; // If there is an escape character in the key, unescape it and then get the child. size_t escape = key.find('~'); if (escape != std::string_view::npos) { // Unescape the key std::string unescaped(key); do { switch (unescaped[escape+1]) { case '0': unescaped.replace(escape, 2, "~"); break; case '1': unescaped.replace(escape, 2, "/"); break; default: return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); } escape = unescaped.find('~', escape+1); } while (escape != std::string::npos); child = find_field(unescaped); // Take note find_field does not unescape keys when matching } else { child = find_field(key); } if(child.error()) { return child; // we do not continue if there was an error } // If there is a /, we have to recurse and look up more of the path if (slash != std::string_view::npos) { child = child.at_pointer(json_pointer.substr(slash)); } return child; } simdjson_inline simdjson_result object::count_fields() & noexcept { size_t count{0}; // Important: we do not consume any of the values. for(simdjson_unused auto v : *this) { count++; } // The above loop will always succeed, but we want to report errors. if(iter.error()) { return iter.error(); } // We need to move back at the start because we expect users to iterate through // the object after counting the number of elements. iter.reset_object(); return count; } simdjson_inline simdjson_result object::is_empty() & noexcept { bool is_not_empty; auto error = iter.reset_object().get(is_not_empty); if(error) { return error; } return !is_not_empty; } simdjson_inline simdjson_result object::reset() & noexcept { return iter.reset_object(); } } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::object &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} simdjson_inline simdjson_result simdjson_result::begin() noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() noexcept { if (error()) { return error(); } return first.end(); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first).find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first)[key]; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first).find_field(key); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } inline simdjson_result simdjson_result::reset() noexcept { if (error()) { return error(); } return first.reset(); } inline simdjson_result simdjson_result::is_empty() noexcept { if (error()) { return error(); } return first.is_empty(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* end file simdjson/generic/ondemand/object-inl.h for haswell */ /* including simdjson/generic/ondemand/object_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/object_iterator-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { // // object_iterator // simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept : iter{_iter} {} simdjson_inline simdjson_result object_iterator::operator*() noexcept { error_code error = iter.error(); if (error) { iter.abandon(); return error; } auto result = field::start(iter); // TODO this is a safety rail ... users should exit loops as soon as they receive an error. // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. if (result.error()) { iter.abandon(); } return result; } simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { return !(*this != other); } simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { return iter.is_open(); } SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_inline object_iterator &object_iterator::operator++() noexcept { // TODO this is a safety rail ... users should exit loops as soon as they receive an error. // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error simdjson_unused error_code error; if ((error = iter.skip_child() )) { return *this; } simdjson_unused bool has_value; if ((error = iter.has_next_field().get(has_value) )) { return *this; }; return *this; } SIMDJSON_POP_DISABLE_WARNINGS // // ### Live States // // While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is // always SUCCESS: // // - Start: This is the state when the object is first found and the iterator is just past the {. // In this state, at_start == true. // - Next: After we hand a scalar value to the user, or an array/object which they then fully // iterate over, the iterator is at the , or } before the next value. In this state, // depth == iter.depth, at_start == false, and error == SUCCESS. // - Unfinished Business: When we hand an array/object to the user which they do not fully // iterate over, we need to finish that iteration by skipping child values until we reach the // Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. // // ## Error States // // In error states, we will yield exactly one more value before stopping. iter.depth == depth // and at_start is always false. We decrement after yielding the error, moving to the Finished // state. // // - Chained Error: When the object iterator is part of an error chain--for example, in // `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an // object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and // iter.depth == depth, and at_start == false. We decrement depth when we yield the error. // - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, // we flag that as an error and treat it exactly the same as a Chained Error. In this state, // error == TAPE_ERROR, iter.depth == depth, and at_start == false. // // Errors that occur while reading a field to give to the user (such as when the key is not a // string or the field is missing a colon) are yielded immediately. Depth is then decremented, // moving to the Finished state without transitioning through an Error state at all. // // ## Terminal State // // The terminal state has iter.depth < depth. at_start is always false. // // - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. // In this state, iter.depth < depth, at_start == false, and error == SUCCESS. // } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( haswell::ondemand::object_iterator &&value ) noexcept : implementation_simdjson_result_base(std::forward(value)) { first.iter.assert_is_valid(); } simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base({}, error) { } simdjson_inline simdjson_result simdjson_result::operator*() noexcept { if (error()) { return error(); } return *first; } // If we're iterating and there is an error, return the error once. simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return !error(); } return first == other.first; } // If we're iterating and there is an error, return the error once. simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return error(); } return first != other.first; } // Checks for ']' and ',' simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { // Clear the error if there is one, so we don't yield it twice if (error()) { second = SUCCESS; return *this; } ++first; return *this; } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H /* end file simdjson/generic/ondemand/object_iterator-inl.h for haswell */ /* including simdjson/generic/ondemand/parser-inl.h for haswell: #include "simdjson/generic/ondemand/parser-inl.h" */ /* begin file simdjson/generic/ondemand/parser-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ /* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { simdjson_inline parser::parser(size_t max_capacity) noexcept : _max_capacity{max_capacity} { } simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { if (new_capacity > max_capacity()) { return CAPACITY; } if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } // string_capacity copied from document::allocate _capacity = 0; size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); #if SIMDJSON_DEVELOPMENT_CHECKS start_positions.reset(new (std::nothrow) token_position[new_max_depth]); #endif if (implementation) { SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); } else { SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); } _capacity = new_capacity; _max_depth = new_max_depth; return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); // Allocate if needed if (capacity() < json.length() || !string_buf) { SIMDJSON_TRY( allocate(json.length(), max_depth()) ); } // Run stage 1. SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); return document::start({ reinterpret_cast(json.data()), this }); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { return iterate(padded_string_view(json, allocated)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { if(json.capacity() - json.size() < SIMDJSON_PADDING) { json.reserve(json.size() + SIMDJSON_PADDING); } return iterate(padded_string_view(json)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { return iterate(padded_string_view(json)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception SIMDJSON_TRY( result.error() ); padded_string_view json = result.value_unsafe(); return iterate(json); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception SIMDJSON_TRY( result.error() ); const padded_string &json = result.value_unsafe(); return iterate(json); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); // Allocate if needed if (capacity() < json.length()) { SIMDJSON_TRY( allocate(json.length(), max_depth()) ); } // Run stage 1. SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); return json_iterator(reinterpret_cast(json.data()), this); } inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { buf += 3; len -= 3; } if(allow_comma_separated && batch_size < len) { batch_size = len; } return document_stream(*this, buf, len, batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } simdjson_inline size_t parser::max_depth() const noexcept { return _max_depth; } simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { _max_capacity = max_capacity; } else { _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; } } simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); if (!end) { return STRING_ERROR; } std::string_view result(reinterpret_cast(dst), end-dst); dst = end; return result; } simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); if (!end) { return STRING_ERROR; } std::string_view result(reinterpret_cast(dst), end-dst); dst = end; return result; } } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::parser &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* end file simdjson/generic/ondemand/parser-inl.h for haswell */ /* including simdjson/generic/ondemand/raw_json_string-inl.h for haswell: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ /* begin file simdjson/generic/ondemand/raw_json_string-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; // if the content has no escape character, just scan through it quickly! for(;pos < target.size() && target[pos] != '\\';pos++) {} // slow path may begin. bool escaping{false}; for(;pos < target.size();pos++) { if((target[pos] == '"') && !escaping) { return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } return true; } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { size_t pos{0}; // if the content has no escape character, just scan through it quickly! for(;target[pos] && target[pos] != '\\';pos++) {} // slow path may begin. bool escaping{false}; for(;target[pos];pos++) { if((target[pos] == '"') && !escaping) { return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } return true; } simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { // If we are going to call memcmp, then we must know something about the length of the raw_json_string. return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { // Assumptions: does not contain unescaped quote characters, and // the raw content is quote terminated within a valid JSON string. if(target.size() <= SIMDJSON_PADDING) { return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } const char * r{raw()}; size_t pos{0}; for(;pos < target.size();pos++) { if(r[pos] != target[pos]) { return false; } } if(r[pos] != '"') { return false; } return true; } simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { const char * r{raw()}; size_t pos{0}; bool escaping{false}; for(;pos < target.size();pos++) { if(r[pos] != target[pos]) { return false; } // if target is a compile-time constant and it is free from // quotes, then the next part could get optimized away through // inlining. if((target[pos] == '"') && !escaping) { // We have reached the end of the raw_json_string but // the target is not done. return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } if(r[pos] != '"') { return false; } return true; } simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and // the raw content is quote terminated within a valid JSON string. const char * r{raw()}; size_t pos{0}; for(;target[pos];pos++) { if(r[pos] != target[pos]) { return false; } } if(r[pos] != '"') { return false; } return true; } simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { // Assumptions: does not contain unescaped quote characters, and // the raw content is quote terminated within a valid JSON string. const char * r{raw()}; size_t pos{0}; bool escaping{false}; for(;target[pos];pos++) { if(r[pos] != target[pos]) { return false; } // if target is a compile-time constant and it is free from // quotes, then the next part could get optimized away through // inlining. if((target[pos] == '"') && !escaping) { // We have reached the end of the raw_json_string but // the target is not done. return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } if(r[pos] != '"') { return false; } return true; } simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { return a.unsafe_is_equal(c); } simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { return a == c; } simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { return !(a == c); } simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { return !(a == c); } simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { return iter.unescape(*this, allow_replacement); } simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { return iter.unescape_wobbly(*this); } simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { bool in_escape = false; const char *s = str.raw(); while (true) { switch (*s) { case '\\': in_escape = !in_escape; break; case '"': if (in_escape) { in_escape = false; } else { return out; } break; default: if (in_escape) { in_escape = false; } } out << *s; s++; } } } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::raw_json_string &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} simdjson_inline simdjson_result simdjson_result::raw() const noexcept { if (error()) { return error(); } return first.raw(); } simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(haswell::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { if (error()) { return error(); } return first.unescape(iter, allow_replacement); } simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(haswell::ondemand::json_iterator &iter) const noexcept { if (error()) { return error(); } return first.unescape_wobbly(iter); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* end file simdjson/generic/ondemand/raw_json_string-inl.h for haswell */ /* including simdjson/generic/ondemand/serialization-inl.h for haswell: #include "simdjson/generic/ondemand/serialization-inl.h" */ /* begin file simdjson/generic/ondemand/serialization-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { inline std::string_view trim(const std::string_view str) noexcept { // We can almost surely do better by rolling our own find_first_not_of function. size_t first = str.find_first_not_of(" \t\n\r"); // If we have the empty string (just white space), then no trimming is possible, and // we return the empty string_view. if (std::string_view::npos == first) { return std::string_view(); } size_t last = str.find_last_not_of(" \t\n\r"); return str.substr(first, (last - first + 1)); } inline simdjson_result to_json_string(haswell::ondemand::document& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(haswell::ondemand::document_reference& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(haswell::ondemand::value& x) noexcept { /** * If we somehow receive a value that has already been consumed, * then the following code could be in trouble. E.g., we create * an array as needed, but if an array was already created, then * it could be bad. */ using namespace haswell::ondemand; haswell::ondemand::json_type t; auto error = x.type().get(t); if(error != SUCCESS) { return error; } switch (t) { case json_type::array: { haswell::ondemand::array array; error = x.get_array().get(array); if(error) { return error; } return to_json_string(array); } case json_type::object: { haswell::ondemand::object object; error = x.get_object().get(object); if(error) { return error; } return to_json_string(object); } default: return trim(x.raw_json_token()); } } inline simdjson_result to_json_string(haswell::ondemand::object& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(haswell::ondemand::array& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } } // namespace simdjson namespace simdjson { namespace haswell { namespace ondemand { #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::value x) { std::string_view v; auto error = simdjson::to_json_string(x).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::value x) { std::string_view v; auto error = simdjson::to_json_string(x).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::array value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::array value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document_reference& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::object value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::object value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif }}} // namespace simdjson::haswell::ondemand #endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* end file simdjson/generic/ondemand/serialization-inl.h for haswell */ /* including simdjson/generic/ondemand/token_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/token_iterator-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { simdjson_inline token_iterator::token_iterator( const uint8_t *_buf, token_position position ) noexcept : buf{_buf}, _position{position} { } simdjson_inline uint32_t token_iterator::current_offset() const noexcept { return *(_position); } simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { return &buf[*(_position++)]; } simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { return &buf[*position]; } simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { return *position; } simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { return *(position+1) - *position; } simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { return &buf[*(_position+delta)]; } simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { return *(_position+delta); } simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { return *(_position+delta+1) - *(_position+delta); } simdjson_inline token_position token_iterator::position() const noexcept { return _position; } simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { _position = target_position; } simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { return _position == other._position; } simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { return _position != other._position; } simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { return _position > other._position; } simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { return _position >= other._position; } simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { return _position < other._position; } simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { return _position <= other._position; } } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::token_iterator &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H /* end file simdjson/generic/ondemand/token_iterator-inl.h for haswell */ /* including simdjson/generic/ondemand/value-inl.h for haswell: #include "simdjson/generic/ondemand/value-inl.h" */ /* begin file simdjson/generic/ondemand/value-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { simdjson_inline value::value(const value_iterator &_iter) noexcept : iter{_iter} { } simdjson_inline value value::start(const value_iterator &iter) noexcept { return iter; } simdjson_inline value value::resume(const value_iterator &iter) noexcept { return iter; } simdjson_inline simdjson_result value::get_array() noexcept { return array::start(iter); } simdjson_inline simdjson_result value::get_object() noexcept { return object::start(iter); } simdjson_inline simdjson_result value::start_or_resume_object() noexcept { if (iter.at_start()) { return get_object(); } else { return object::resume(iter); } } simdjson_inline simdjson_result value::get_raw_json_string() noexcept { return iter.get_raw_json_string(); } simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { return iter.get_string(allow_replacement); } template simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { return iter.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result value::get_wobbly_string() noexcept { return iter.get_wobbly_string(); } simdjson_inline simdjson_result value::get_double() noexcept { return iter.get_double(); } simdjson_inline simdjson_result value::get_double_in_string() noexcept { return iter.get_double_in_string(); } simdjson_inline simdjson_result value::get_uint64() noexcept { return iter.get_uint64(); } simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { return iter.get_uint64_in_string(); } simdjson_inline simdjson_result value::get_int64() noexcept { return iter.get_int64(); } simdjson_inline simdjson_result value::get_int64_in_string() noexcept { return iter.get_int64_in_string(); } simdjson_inline simdjson_result value::get_bool() noexcept { return iter.get_bool(); } simdjson_inline simdjson_result value::is_null() noexcept { return iter.is_null(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } template simdjson_inline error_code value::get(T &out) noexcept { return get().get(out); } #if SIMDJSON_EXCEPTIONS simdjson_inline value::operator array() noexcept(false) { return get_array(); } simdjson_inline value::operator object() noexcept(false) { return get_object(); } simdjson_inline value::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline value::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline value::operator double() noexcept(false) { return get_double(); } simdjson_inline value::operator std::string_view() noexcept(false) { return get_string(false); } simdjson_inline value::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } simdjson_inline value::operator bool() noexcept(false) { return get_bool(); } #endif simdjson_inline simdjson_result value::begin() & noexcept { return get_array().begin(); } simdjson_inline simdjson_result value::end() & noexcept { return {}; } simdjson_inline simdjson_result value::count_elements() & noexcept { simdjson_result answer; auto a = get_array(); answer = a.count_elements(); // count_elements leaves you pointing inside the array, at the first element. // We need to move back so that the user can create a new array (which requires that // we point at '['). iter.move_at_start(); return answer; } simdjson_inline simdjson_result value::count_fields() & noexcept { simdjson_result answer; auto a = get_object(); answer = a.count_fields(); iter.move_at_start(); return answer; } simdjson_inline simdjson_result value::at(size_t index) noexcept { auto a = get_array(); return a.at(index); } simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result value::find_field(const char *key) noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { return start_or_resume_object()[key]; } simdjson_inline simdjson_result value::operator[](const char *key) noexcept { return start_or_resume_object()[key]; } simdjson_inline simdjson_result value::type() noexcept { return iter.type(); } simdjson_inline simdjson_result value::is_scalar() noexcept { json_type this_type; auto error = type().get(this_type); if(error) { return error; } return ! ((this_type == json_type::array) || (this_type == json_type::object)); } simdjson_inline bool value::is_negative() noexcept { return iter.is_negative(); } simdjson_inline simdjson_result value::is_integer() noexcept { return iter.is_integer(); } simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { return iter.get_number_type(); } simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { return iter.get_number(); } simdjson_inline std::string_view value::raw_json_token() noexcept { return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); } simdjson_inline simdjson_result value::raw_json() noexcept { json_type t; SIMDJSON_TRY(type().get(t)); switch (t) { case json_type::array: { ondemand::array array; SIMDJSON_TRY(get_array().get(array)); return array.raw_json(); } case json_type::object: { ondemand::object object; SIMDJSON_TRY(get_object().get(object)); return object.raw_json(); } default: return raw_json_token(); } } simdjson_inline simdjson_result value::current_location() noexcept { return iter.json_iter().current_location(); } simdjson_inline int32_t value::current_depth() const noexcept{ return iter.json_iter().depth(); } simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { json_type t; SIMDJSON_TRY(type().get(t)); switch (t) { case json_type::array: return (*this).get_array().at_pointer(json_pointer); case json_type::object: return (*this).get_object().at_pointer(json_pointer); default: return INVALID_JSON_POINTER; } } } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( haswell::ondemand::value &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() & noexcept { if (error()) { return error(); } return {}; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::get_array() noexcept { if (error()) { return error(); } return first.get_array(); } simdjson_inline simdjson_result simdjson_result::get_object() noexcept { if (error()) { return error(); } return first.get_object(); } simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } template simdjson_inline simdjson_result simdjson_result::get() noexcept { if (error()) { return error(); } return first.get(); } template simdjson_inline error_code simdjson_result::get(T &out) noexcept { if (error()) { return error(); } return first.get(out); } template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { if (error()) { return error(); } return std::move(first); } template<> simdjson_inline error_code simdjson_result::get(haswell::ondemand::value &out) noexcept { if (error()) { return error(); } out = first; return SUCCESS; } simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS simdjson_inline simdjson_result::operator haswell::ondemand::array() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator haswell::ondemand::object() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator haswell::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); } simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { if (error()) { return error(); } return first.current_depth(); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H /* end file simdjson/generic/ondemand/value-inl.h for haswell */ /* including simdjson/generic/ondemand/value_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/value_iterator-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { simdjson_inline value_iterator::value_iterator( json_iterator *json_iter, depth_t depth, token_position start_position ) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} { } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { SIMDJSON_TRY( start_container('{', "Not an object", "object") ); return started_object(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { SIMDJSON_TRY( start_container('{', "Not an object", "object") ); return started_root_object(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { assert_at_container_start(); #if SIMDJSON_DEVELOPMENT_CHECKS _json_iter->set_start_position(_depth, start_position()); #endif if (*_json_iter->peek() == '}') { logger::log_value(*_json_iter, "empty object"); _json_iter->return_current_and_advance(); end_container(); return false; } return true; } simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { // When in streaming mode, we cannot expect peek_last() to be the last structural element of the // current document. It only works in the normal mode where we have indexed a single document. // Note that adding a check for 'streaming' is not expensive since we only have at most // one root element. if ( ! _json_iter->streaming() ) { // The following lines do not fully protect against garbage content within the // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should // call `at_end()` on the document instance at the end of the processing to // ensure that the processing has finished at the end. // if (*_json_iter->peek_last() != '}') { _json_iter->abandon(); return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); } // If the last character is } *and* the first gibberish character is also '}' // then on-demand could accidentally go over. So we need additional checks. // https://github.com/simdjson/simdjson/issues/1834 // Checking that the document is balanced requires a full scan which is potentially // expensive, but it only happens in edge cases where the first padding character is // a closing bracket. if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { _json_iter->abandon(); // The exact error would require more work. It will typically be an unclosed object. return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); } } return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { auto error = check_root_object(); if(error) { return error; } return started_object(); } simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { #if SIMDJSON_CHECK_EOF if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } #endif // SIMDJSON_CHECK_EOF _json_iter->ascend_to(depth()-1); return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { assert_at_next(); // It's illegal to call this unless there are more tokens: anything that ends in } or ] is // obligated to verify there are more tokens if they are not the top level. switch (*_json_iter->return_current_and_advance()) { case '}': logger::log_end_value(*_json_iter, "object"); SIMDJSON_TRY( end_container() ); return false; case ',': return true; default: return report_error(TAPE_ERROR, "Missing comma between object fields"); } } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { error_code error; bool has_value; // // Initially, the object can be in one of a few different places: // // 1. The start of the object, at the first field: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2, index 1) // ``` if (at_first_field()) { has_value = true; // // 2. When a previous search did not yield a value or the object is empty: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 0) // { } // ^ (depth 0, index 2) // ``` // } else if (!is_open()) { #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif return false; // 3. When a previous search found a field or an iterator yielded a value: // // ``` // // When a field was not fully consumed (or not even touched at all) // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2) // // When a field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // // When the last field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // ``` // } else { if ((error = skip_child() )) { abandon(); return error; } if ((error = has_next_field().get(has_value) )) { abandon(); return error; } #if SIMDJSON_DEVELOPMENT_CHECKS if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif } while (has_value) { // Get the key and colon, stopping at the value. raw_json_string actual_key; // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. // field_key() advances the pointer and checks that '"' is found (corresponding to a key). // The depth is left unchanged by field_key(). if ((error = field_key().get(actual_key) )) { abandon(); return error; }; // field_value() will advance and check that we find a ':' separating the // key and the value. It will also increment the depth by one. if ((error = field_value() )) { abandon(); return error; } // If it matches, stop and return // We could do it this way if we wanted to allow arbitrary // key content (including escaped quotes). //if (actual_key.unsafe_is_equal(max_key_length, key)) { // Instead we do the following which may trigger buffer overruns if the // user provides an adversarial key (containing a well placed unescaped quote // character and being longer than the number of bytes remaining in the JSON // input). if (actual_key.unsafe_is_equal(key)) { logger::log_event(*this, "match", key, -2); // If we return here, then we return while pointing at the ':' that we just checked. return true; } // No match: skip the value and see if , or } is next logger::log_event(*this, "no match", key, -2); // The call to skip_child is meant to skip over the value corresponding to the key. // After skip_child(), we are right before the next comma (',') or the final brace ('}'). SIMDJSON_TRY( skip_child() ); // Skip the value entirely // The has_next_field() advances the pointer and check that either ',' or '}' is found. // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, // then we are in error and we abort. if ((error = has_next_field().get(has_value) )) { abandon(); return error; } } // If the loop ended, we're out of fields to look at. return false; } SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { /** * When find_field_unordered_raw is called, we can either be pointing at the * first key, pointing outside (at the closing brace) or if a key was matched * we can be either pointing right afterthe ':' right before the value (that we need skip), * or we may have consumed the value and we might be at a comma or at the * final brace (ready for a call to has_next_field()). */ error_code error; bool has_value; // First, we scan from that point to the end. // If we don't find a match, we may loop back around, and scan from the beginning to that point. token_position search_start = _json_iter->position(); // We want to know whether we need to go back to the beginning. bool at_first = at_first_field(); /////////////// // Initially, the object can be in one of a few different places: // // 1. At the first key: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2, index 1) // ``` // if (at_first) { has_value = true; // 2. When a previous search did not yield a value or the object is empty: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 0) // { } // ^ (depth 0, index 2) // ``` // } else if (!is_open()) { #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif SIMDJSON_TRY(reset_object().get(has_value)); at_first = true; // 3. When a previous search found a field or an iterator yielded a value: // // ``` // // When a field was not fully consumed (or not even touched at all) // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2) // // When a field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // // When the last field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // ``` // } else { // If someone queried a key but they not did access the value, then we are left pointing // at the ':' and we need to move forward through the value... If the value was // processed then skip_child() does not move the iterator (but may adjust the depth). if ((error = skip_child() )) { abandon(); return error; } search_start = _json_iter->position(); if ((error = has_next_field().get(has_value) )) { abandon(); return error; } #if SIMDJSON_DEVELOPMENT_CHECKS if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif } // After initial processing, we will be in one of two states: // // ``` // // At the beginning of a field // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // // At the end of the object // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 0) // ``` // // Next, we find a match starting from the current position. while (has_value) { SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field // Get the key and colon, stopping at the value. raw_json_string actual_key; // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. // field_key() advances the pointer and checks that '"' is found (corresponding to a key). // The depth is left unchanged by field_key(). if ((error = field_key().get(actual_key) )) { abandon(); return error; }; // field_value() will advance and check that we find a ':' separating the // key and the value. It will also increment the depth by one. if ((error = field_value() )) { abandon(); return error; } // If it matches, stop and return // We could do it this way if we wanted to allow arbitrary // key content (including escaped quotes). // if (actual_key.unsafe_is_equal(max_key_length, key)) { // Instead we do the following which may trigger buffer overruns if the // user provides an adversarial key (containing a well placed unescaped quote // character and being longer than the number of bytes remaining in the JSON // input). if (actual_key.unsafe_is_equal(key)) { logger::log_event(*this, "match", key, -2); // If we return here, then we return while pointing at the ':' that we just checked. return true; } // No match: skip the value and see if , or } is next logger::log_event(*this, "no match", key, -2); // The call to skip_child is meant to skip over the value corresponding to the key. // After skip_child(), we are right before the next comma (',') or the final brace ('}'). SIMDJSON_TRY( skip_child() ); // The has_next_field() advances the pointer and check that either ',' or '}' is found. // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, // then we are in error and we abort. if ((error = has_next_field().get(has_value) )) { abandon(); return error; } } // Performance note: it maybe wasteful to rewind to the beginning when there might be // no other query following. Indeed, it would require reskipping the whole object. // Instead, you can just stay where you are. If there is a new query, there is always time // to rewind. if(at_first) { return false; } // If we reach the end without finding a match, search the rest of the fields starting at the // beginning of the object. // (We have already run through the object before, so we've already validated its structure. We // don't check errors in this bit.) SIMDJSON_TRY(reset_object().get(has_value)); while (true) { SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field // Get the key and colon, stopping at the value. raw_json_string actual_key; // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. // field_key() advances the pointer and checks that '"' is found (corresponding to a key). // The depth is left unchanged by field_key(). error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); // field_value() will advance and check that we find a ':' separating the // key and the value. It will also increment the depth by one. error = field_value(); SIMDJSON_ASSUME(!error); // If it matches, stop and return // We could do it this way if we wanted to allow arbitrary // key content (including escaped quotes). // if (actual_key.unsafe_is_equal(max_key_length, key)) { // Instead we do the following which may trigger buffer overruns if the // user provides an adversarial key (containing a well placed unescaped quote // character and being longer than the number of bytes remaining in the JSON // input). if (actual_key.unsafe_is_equal(key)) { logger::log_event(*this, "match", key, -2); // If we return here, then we return while pointing at the ':' that we just checked. return true; } // No match: skip the value and see if , or } is next logger::log_event(*this, "no match", key, -2); // The call to skip_child is meant to skip over the value corresponding to the key. // After skip_child(), we are right before the next comma (',') or the final brace ('}'). SIMDJSON_TRY( skip_child() ); // If we reached the end of the key-value pair we started from, then we know // that the key is not there so we return false. We are either right before // the next comma or the final brace. if(_json_iter->position() == search_start) { return false; } // The has_next_field() advances the pointer and check that either ',' or '}' is found. // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, // then we are in error and we abort. error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); // If we make the mistake of exiting here, then we could be left pointing at a key // in the middle of an object. That's not an allowable state. } // If the loop ended, we're out of fields to look at. The program should // never reach this point. return false; } SIMDJSON_POP_DISABLE_WARNINGS simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { assert_at_next(); const uint8_t *key = _json_iter->return_current_and_advance(); if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } return raw_json_string(key); } simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { assert_at_next(); if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } _json_iter->descend_to(depth()+1); return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { SIMDJSON_TRY( start_container('[', "Not an array", "array") ); return started_array(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { SIMDJSON_TRY( start_container('[', "Not an array", "array") ); return started_root_array(); } inline std::string value_iterator::to_string() const noexcept { auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); if(_json_iter != nullptr) { answer += _json_iter->to_string(); } answer += std::string(" ]"); return answer; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { assert_at_container_start(); if (*_json_iter->peek() == ']') { logger::log_value(*_json_iter, "empty array"); _json_iter->return_current_and_advance(); SIMDJSON_TRY( end_container() ); return false; } _json_iter->descend_to(depth()+1); #if SIMDJSON_DEVELOPMENT_CHECKS _json_iter->set_start_position(_depth, start_position()); #endif return true; } simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { // When in streaming mode, we cannot expect peek_last() to be the last structural element of the // current document. It only works in the normal mode where we have indexed a single document. // Note that adding a check for 'streaming' is not expensive since we only have at most // one root element. if ( ! _json_iter->streaming() ) { // The following lines do not fully protect against garbage content within the // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should // also call `at_end()` on the document instance at the end of the processing to // ensure that the processing has finished at the end. // if (*_json_iter->peek_last() != ']') { _json_iter->abandon(); return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); } // If the last character is ] *and* the first gibberish character is also ']' // then on-demand could accidentally go over. So we need additional checks. // https://github.com/simdjson/simdjson/issues/1834 // Checking that the document is balanced requires a full scan which is potentially // expensive, but it only happens in edge cases where the first padding character is // a closing bracket. if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { _json_iter->abandon(); // The exact error would require more work. It will typically be an unclosed array. return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); } } return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { auto error = check_root_array(); if (error) { return error; } return started_array(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { assert_at_next(); logger::log_event(*this, "has_next_element"); switch (*_json_iter->return_current_and_advance()) { case ']': logger::log_end_value(*_json_iter, "array"); SIMDJSON_TRY( end_container() ); return false; case ',': _json_iter->descend_to(depth()+1); return true; default: return report_error(TAPE_ERROR, "Missing comma between array elements"); } } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { auto not_true = atomparsing::str4ncmp(json, "true"); auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); if (error) { return incorrect_type_error("Not a boolean"); } return simdjson_result(!not_true); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); // if we start with 'n', we must be a null if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } return is_null_string; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { return get_raw_json_string().unescape(json_iter(), allow_replacement); } template simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { std::string_view content; auto err = get_string(allow_replacement).get(content); if (err) { return err; } receiver = content; return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { return get_raw_json_string().unescape_wobbly(json_iter()); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { auto json = peek_scalar("string"); if (*json != '"') { return incorrect_type_error("Not a string"); } advance_scalar("string"); return raw_json_string(json+1); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { auto result = numberparsing::parse_double(peek_non_root_scalar("double")); if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { auto result = parse_bool(peek_non_root_scalar("bool")); if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } return result; } simdjson_inline simdjson_result value_iterator::is_null() noexcept { bool is_null_value; SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); if(is_null_value) { advance_non_root_scalar("null"); } return is_null_value; } simdjson_inline bool value_iterator::is_negative() noexcept { return numberparsing::is_negative(peek_non_root_scalar("numbersign")); } simdjson_inline bool value_iterator::is_root_negative() noexcept { return numberparsing::is_negative(peek_root_scalar("numbersign")); } simdjson_inline simdjson_result value_iterator::is_integer() noexcept { return numberparsing::is_integer(peek_non_root_scalar("integer")); } simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { return numberparsing::get_number_type(peek_non_root_scalar("integer")); } simdjson_inline simdjson_result value_iterator::get_number() noexcept { number num; error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); if(error) { return error; } return num; } simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("is_root_integer"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { return false; // if there are more than 20 characters, it cannot be represented as an integer. } auto answer = numberparsing::is_integer(tmpbuf); // If the parsing was a success, we must still check that it is // a single scalar. Note that we parse first because of cases like '[]' where // getting TRAILING_CONTENT is wrong. if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } return answer; } simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } auto answer = numberparsing::get_number_type(tmpbuf); if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } return answer; } simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } number num; error_code error = numberparsing::parse_number(tmpbuf, num); if(error) { return error; } if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("number"); return num; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); } template simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { std::string_view content; auto err = get_root_string(check_trailing, allow_replacement).get(content); if (err) { return err; } receiver = content; return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { auto json = peek_scalar("string"); if (*json != '"') { return incorrect_type_error("Not a string"); } if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_scalar("string"); return raw_json_string(json+1); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_unsigned(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_unsigned_in_string(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_integer(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_integer_in_string(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_double(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_double_in_string(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("bool"); uint8_t tmpbuf[5+1+1]; // +1 for null termination tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } auto result = parse_bool(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("bool"); } return result; } simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("null"); bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); if(result) { // we have something that looks like a null. if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("null"); } return result; } simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); return _json_iter->skip_child(depth()); } simdjson_inline value_iterator value_iterator::child() const noexcept { assert_at_child(); return { _json_iter, depth()+1, _json_iter->token.position() }; } // GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller // relating depth and iterator depth, which is a desired effect. It does not happen if is_open is // marked non-inline. SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_inline bool value_iterator::is_open() const noexcept { return _json_iter->depth() >= depth(); } SIMDJSON_POP_DISABLE_WARNINGS simdjson_inline bool value_iterator::at_end() const noexcept { return _json_iter->at_end(); } simdjson_inline bool value_iterator::at_start() const noexcept { return _json_iter->token.position() == start_position(); } simdjson_inline bool value_iterator::at_first_field() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); return _json_iter->token.position() == start_position() + 1; } simdjson_inline void value_iterator::abandon() noexcept { _json_iter->abandon(); } simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { return _depth; } simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { return _json_iter->error; } simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { return _json_iter->string_buf_loc(); } simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { return *_json_iter; } simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { return *_json_iter; } simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { return _json_iter->peek(start_position()); } simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { return _json_iter->peek_length(start_position()); } simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. if (!is_at_start()) { return peek_start(); } // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. assert_at_start(); return _json_iter->peek(); } simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. if (!is_at_start()) { return; } // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. assert_at_start(); _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); } simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { logger::log_start_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. const uint8_t *json; if (!is_at_start()) { #if SIMDJSON_DEVELOPMENT_CHECKS if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif json = peek_start(); if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } } else { assert_at_start(); /** * We should be prudent. Let us peek. If it is not the right type, we * return an error. Only once we have determined that we have the right * type are we allowed to advance! */ json = _json_iter->peek(); if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } _json_iter->return_current_and_advance(); } return SUCCESS; } simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return peek_start(); } assert_at_root(); return _json_iter->peek(); } simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return peek_start(); } assert_at_non_root_start(); return _json_iter->peek(); } simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return; } assert_at_root(); _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); } simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return; } assert_at_non_root_start(); _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); } simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { logger::log_error(*_json_iter, start_position(), depth(), message); return INCORRECT_TYPE; } simdjson_inline bool value_iterator::is_at_start() const noexcept { return position() == start_position(); } simdjson_inline bool value_iterator::is_at_key() const noexcept { // Keys are at the same depth as the object. // Note here that we could be safer and check that we are within an object, // but we do not. return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; } simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). auto delta = position() - start_position(); return delta == 1 || delta == 2; } inline void value_iterator::assert_at_start() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth ); SIMDJSON_ASSUME( _depth > 0 ); } inline void value_iterator::assert_at_container_start() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); SIMDJSON_ASSUME( _json_iter->_depth == _depth ); SIMDJSON_ASSUME( _depth > 0 ); } inline void value_iterator::assert_at_next() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth ); SIMDJSON_ASSUME( _depth > 0 ); } simdjson_inline void value_iterator::move_at_start() noexcept { _json_iter->_depth = _depth; _json_iter->token.set_position(_start_position); } simdjson_inline void value_iterator::move_at_container_start() noexcept { _json_iter->_depth = _depth; _json_iter->token.set_position(_start_position + 1); } simdjson_inline simdjson_result value_iterator::reset_array() noexcept { if(error()) { return error(); } move_at_container_start(); return started_array(); } simdjson_inline simdjson_result value_iterator::reset_object() noexcept { if(error()) { return error(); } move_at_container_start(); return started_object(); } inline void value_iterator::assert_at_child() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); SIMDJSON_ASSUME( _depth > 0 ); } inline void value_iterator::assert_at_root() const noexcept { assert_at_start(); SIMDJSON_ASSUME( _depth == 1 ); } inline void value_iterator::assert_at_non_root_start() const noexcept { assert_at_start(); SIMDJSON_ASSUME( _depth > 1 ); } inline void value_iterator::assert_is_valid() const noexcept { SIMDJSON_ASSUME( _json_iter != nullptr ); } simdjson_inline bool value_iterator::is_valid() const noexcept { return _json_iter != nullptr; } simdjson_inline simdjson_result value_iterator::type() const noexcept { switch (*peek_start()) { case '{': return json_type::object; case '[': return json_type::array; case '"': return json_type::string; case 'n': return json_type::null; case 't': case 'f': return json_type::boolean; case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return json_type::number; default: return TAPE_ERROR; } } simdjson_inline token_position value_iterator::start_position() const noexcept { return _start_position; } simdjson_inline token_position value_iterator::position() const noexcept { return _json_iter->position(); } simdjson_inline token_position value_iterator::end_position() const noexcept { return _json_iter->end_position(); } simdjson_inline token_position value_iterator::last_position() const noexcept { return _json_iter->last_position(); } simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { return _json_iter->report_error(error, message); } } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::value_iterator &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* end file simdjson/generic/ondemand/value_iterator-inl.h for haswell */ /* end file simdjson/generic/ondemand/amalgamated.h for haswell */ /* including simdjson/haswell/end.h: #include "simdjson/haswell/end.h" */ /* begin file simdjson/haswell/end.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL SIMDJSON_UNTARGET_REGION #endif /* undefining SIMDJSON_IMPLEMENTATION from "haswell" */ #undef SIMDJSON_IMPLEMENTATION /* end file simdjson/haswell/end.h */ #endif // SIMDJSON_HASWELL_ONDEMAND_H /* end file simdjson/haswell/ondemand.h */ #elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(icelake) /* including simdjson/icelake/ondemand.h: #include "simdjson/icelake/ondemand.h" */ /* begin file simdjson/icelake/ondemand.h */ #ifndef SIMDJSON_ICELAKE_ONDEMAND_H #define SIMDJSON_ICELAKE_ONDEMAND_H /* including simdjson/icelake/begin.h: #include "simdjson/icelake/begin.h" */ /* begin file simdjson/icelake/begin.h */ /* defining SIMDJSON_IMPLEMENTATION to "icelake" */ #define SIMDJSON_IMPLEMENTATION icelake /* including simdjson/icelake/base.h: #include "simdjson/icelake/base.h" */ /* begin file simdjson/icelake/base.h */ #ifndef SIMDJSON_ICELAKE_BASE_H #define SIMDJSON_ICELAKE_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ // The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE namespace simdjson { /** * Implementation for Icelake (Intel AVX512). */ namespace icelake { class implementation; } // namespace icelake } // namespace simdjson #endif // SIMDJSON_ICELAKE_BASE_H /* end file simdjson/icelake/base.h */ /* including simdjson/icelake/intrinsics.h: #include "simdjson/icelake/intrinsics.h" */ /* begin file simdjson/icelake/intrinsics.h */ #ifndef SIMDJSON_ICELAKE_INTRINSICS_H #define SIMDJSON_ICELAKE_INTRINSICS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #if SIMDJSON_VISUAL_STUDIO // under clang within visual studio, this will include #include // visual studio or clang #else #include // elsewhere #endif // SIMDJSON_VISUAL_STUDIO #if SIMDJSON_CLANG_VISUAL_STUDIO /** * You are not supposed, normally, to include these * headers directly. Instead you should either include intrin.h * or x86intrin.h. However, when compiling with clang * under Windows (i.e., when _MSC_VER is set), these headers * only get included *if* the corresponding features are detected * from macros: * e.g., if __AVX2__ is set... in turn, we normally set these * macros by compiling against the corresponding architecture * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole * software with these advanced instructions. In simdjson, we * want to compile the whole program for a generic target, * and only target our specific kernels. As a workaround, * we directly include the needed headers. These headers would * normally guard against such usage, but we carefully included * (or ) before, so the headers * are fooled. */ #include // for _blsr_u64 #include // for __lzcnt64 #include // for most things (AVX2, AVX512, _popcnt64) #include #include #include #include #include // for _mm_clmulepi64_si128 // Important: we need the AVX-512 headers: #include #include #include #include #include #include #include // unfortunately, we may not get _blsr_u64, but, thankfully, clang // has it as a macro. #ifndef _blsr_u64 // we roll our own #define _blsr_u64(n) ((n - 1) & n) #endif // _blsr_u64 #endif // SIMDJSON_CLANG_VISUAL_STUDIO static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake"); #endif // SIMDJSON_ICELAKE_INTRINSICS_H /* end file simdjson/icelake/intrinsics.h */ #if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt,popcnt") #endif /* including simdjson/icelake/bitmanipulation.h: #include "simdjson/icelake/bitmanipulation.h" */ /* begin file simdjson/icelake/bitmanipulation.h */ #ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H #define SIMDJSON_ICELAKE_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace { // We sometimes call trailing_zero on inputs that are zero, // but the algorithms do not end up using the returned value. // Sadly, sanitizers are not smart enough to figure it out. SIMDJSON_NO_SANITIZE_UNDEFINED // This function can be used safely even if not all bytes have been // initialized. // See issue https://github.com/simdjson/simdjson/issues/1965 SIMDJSON_NO_SANITIZE_MEMORY simdjson_inline int trailing_zeroes(uint64_t input_num) { #if SIMDJSON_REGULAR_VISUAL_STUDIO return (int)_tzcnt_u64(input_num); #else // SIMDJSON_REGULAR_VISUAL_STUDIO //////// // You might expect the next line to be equivalent to // return (int)_tzcnt_u64(input_num); // but the generated code differs and might be less efficient? //////// return __builtin_ctzll(input_num); #endif // SIMDJSON_REGULAR_VISUAL_STUDIO } /* result might be undefined when input_num is zero */ simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return _blsr_u64(input_num); } /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { return int(_lzcnt_u64(input_num)); } #if SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { // note: we do not support legacy 32-bit Windows return __popcnt64(input_num);// Visual Studio wants two underscores } #else simdjson_inline long long int count_ones(uint64_t input_num) { return _popcnt64(input_num); } #endif simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { #if SIMDJSON_REGULAR_VISUAL_STUDIO return _addcarry_u64(0, value1, value2, reinterpret_cast(result)); #else return __builtin_uaddll_overflow(value1, value2, reinterpret_cast(result)); #endif } } // unnamed namespace } // namespace icelake } // namespace simdjson #endif // SIMDJSON_ICELAKE_BITMANIPULATION_H /* end file simdjson/icelake/bitmanipulation.h */ /* including simdjson/icelake/bitmask.h: #include "simdjson/icelake/bitmask.h" */ /* begin file simdjson/icelake/bitmask.h */ #ifndef SIMDJSON_ICELAKE_BITMASK_H #define SIMDJSON_ICELAKE_BITMASK_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace { // // Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. // // For example, prefix_xor(00100100) == 00011100 // simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { // There should be no such thing with a processor supporting avx2 // but not clmul. __m128i all_ones = _mm_set1_epi8('\xFF'); __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); return _mm_cvtsi128_si64(result); } } // unnamed namespace } // namespace icelake } // namespace simdjson #endif // SIMDJSON_ICELAKE_BITMASK_H /* end file simdjson/icelake/bitmask.h */ /* including simdjson/icelake/simd.h: #include "simdjson/icelake/simd.h" */ /* begin file simdjson/icelake/simd.h */ #ifndef SIMDJSON_ICELAKE_SIMD_H #define SIMDJSON_ICELAKE_SIMD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #if defined(__GNUC__) && !defined(__clang__) #if __GNUC__ == 8 #define SIMDJSON_GCC8 1 #endif // __GNUC__ == 8 #endif // defined(__GNUC__) && !defined(__clang__) #if SIMDJSON_GCC8 /** * GCC 8 fails to provide _mm512_set_epi8. We roll our own. */ inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); } #endif // SIMDJSON_GCC8 namespace simdjson { namespace icelake { namespace { namespace simd { // Forward-declared so they can be used by splat and friends. template struct base { __m512i value; // Zero constructor simdjson_inline base() : value{__m512i()} {} // Conversion from SIMD register simdjson_inline base(const __m512i _value) : value(_value) {} // Conversion to SIMD register simdjson_inline operator const __m512i&() const { return this->value; } simdjson_inline operator __m512i&() { return this->value; } // Bit operations simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } }; // Forward-declared so they can be used by splat and friends. template struct simd8; template> struct base8: base> { typedef uint32_t bitmask_t; typedef uint64_t bitmask2_t; simdjson_inline base8() : base>() {} simdjson_inline base8(const __m512i _value) : base>(_value) {} friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { return _mm512_cmpeq_epi8_mask(lhs, rhs); } static const int SIZE = sizeof(base::value); template simdjson_inline simd8 prev(const simd8 prev_chunk) const { // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) constexpr int shift = 16 - N; return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift); } }; // SIMD byte mask type (returned by things like eq and gt) template<> struct simd8: base8 { static simdjson_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } simdjson_inline simd8() : base8() {} simdjson_inline simd8(const __m512i _value) : base8(_value) {} // Splat constructor simdjson_inline simd8(bool _value) : base8(splat(_value)) {} simdjson_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } simdjson_inline simd8 operator~() const { return *this ^ true; } }; template struct base8_numeric: base8 { static simdjson_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } static simdjson_inline simd8 zero() { return _mm512_setzero_si512(); } static simdjson_inline simd8 load(const T values[64]) { return _mm512_loadu_si512(reinterpret_cast(values)); } // Repeat 16 values as many times as necessary (usually for lookup tables) static simdjson_inline simd8 repeat_16( T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } simdjson_inline base8_numeric() : base8() {} simdjson_inline base8_numeric(const __m512i _value) : base8(_value) {} // Store to array simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } // Addition/subtraction are the same for signed and unsigned simdjson_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } simdjson_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } // Override to distinguish from bool version simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) template simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return _mm512_shuffle_epi8(lookup_table, *this); } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes // get written. // Design consideration: it seems like a function with the // signature simd8 compress(uint32_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template simdjson_inline void compress(uint64_t mask, L * output) const { _mm512_mask_compressstoreu_epi8 (output,~mask,*this); } template simdjson_inline simd8 lookup_16( L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, L replace12, L replace13, L replace14, L replace15) const { return lookup_16(simd8::repeat_16( replace0, replace1, replace2, replace3, replace4, replace5, replace6, replace7, replace8, replace9, replace10, replace11, replace12, replace13, replace14, replace15 )); } }; // Signed bytes template<> struct simd8 : base8_numeric { simdjson_inline simd8() : base8_numeric() {} simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31, int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39, int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47, int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55, int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63 ) : simd8(_mm512_set_epi8( v63, v62, v61, v60, v59, v58, v57, v56, v55, v54, v53, v52, v51, v50, v49, v48, v47, v46, v45, v44, v43, v42, v41, v40, v39, v38, v37, v36, v35, v34, v33, v32, v31, v30, v29, v28, v27, v26, v25, v24, v23, v22, v21, v20, v19, v18, v17, v16, v15, v14, v13, v12, v11, v10, v9, v8, v7, v6, v5, v4, v3, v2, v1, v0 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Order-sensitive comparisons simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } simdjson_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } simdjson_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } }; // Unsigned bytes template<> struct simd8: base8_numeric { simdjson_inline simd8() : base8_numeric() {} simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Array constructor simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31, uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39, uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47, uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55, uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63 ) : simd8(_mm512_set_epi8( v63, v62, v61, v60, v59, v58, v57, v56, v55, v54, v53, v52, v51, v50, v49, v48, v47, v46, v45, v44, v43, v42, v41, v40, v39, v38, v37, v36, v35, v34, v33, v32, v31, v30, v29, v28, v27, v26, v25, v24, v23, v22, v21, v20, v19, v18, v17, v16, v15, v14, v13, v12, v11, v10, v9, v8, v7, v6, v5, v4, v3, v2, v1, v0 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Saturated math simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } // Order-specific operations simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } // Same as >, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } // Same as <, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } simdjson_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } simdjson_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } // Bit-specific operations simdjson_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } simdjson_inline bool bits_not_set_anywhere() const { return !_mm512_test_epi8_mask(*this, *this); } simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } template simdjson_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } template simdjson_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } // Get one of the bits and make a bitmask out of it. // e.g. value.get_bit<7>() gets the high bit template simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } }; template struct simd8x64 { static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block."); const simd8 chunks[NUM_CHUNKS]; simd8x64(const simd8x64& o) = delete; // no copy allowed simd8x64& operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} simdjson_inline simd8x64(const simd8 chunk0) : chunks{chunk0} {} simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr)} {} simdjson_inline uint64_t compress(uint64_t mask, T * output) const { this->chunks[0].compress(mask, output); return 64 - count_ones(mask); } simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr+sizeof(simd8)*0); } simdjson_inline simd8 reduce_or() const { return this->chunks[0]; } simdjson_inline simd8x64 bit_or(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] | mask ); } simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return this->chunks[0] == mask; } simdjson_inline uint64_t eq(const simd8x64 &other) const { return this->chunks[0] == other.chunks[0]; } simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return this->chunks[0] <= mask; } }; // struct simd8x64 } // namespace simd } // unnamed namespace } // namespace icelake } // namespace simdjson #endif // SIMDJSON_ICELAKE_SIMD_H /* end file simdjson/icelake/simd.h */ /* including simdjson/icelake/stringparsing_defs.h: #include "simdjson/icelake/stringparsing_defs.h" */ /* begin file simdjson/icelake/stringparsing_defs.h */ #ifndef SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H #define SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/simd.h" */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace { using namespace simd; // Holds backslashes and quotes locations. struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 64; simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } uint64_t bs_bits; uint64_t quote_bits; }; // struct backslash_and_quote simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { // this can read up to 15 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); simd8 v(src); // store to dest unconditionally - we can overwrite the bits we don't like later v.store(dst); return { static_cast(v == '\\'), // bs_bits static_cast(v == '"'), // quote_bits }; } } // unnamed namespace } // namespace icelake } // namespace simdjson #endif // SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H /* end file simdjson/icelake/stringparsing_defs.h */ /* including simdjson/icelake/numberparsing_defs.h: #include "simdjson/icelake/numberparsing_defs.h" */ /* begin file simdjson/icelake/numberparsing_defs.h */ #ifndef SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H #define SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace numberparsing { static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { // this actually computes *16* values so we are being wasteful. const __m128i ascii0 = _mm_set1_epi8('0'); const __m128i mul_1_10 = _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); const __m128i mul_1_10000 = _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); const __m128i input = _mm_sub_epi8( _mm_loadu_si128(reinterpret_cast(chars)), ascii0); const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); const __m128i t3 = _mm_packus_epi32(t2, t2); const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); return _mm_cvtsi128_si32( t4); // only captures the sum of the first 8 digits, drop the rest } /** @private */ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { internal::value128 answer; #if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS #ifdef _M_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); answer.low = value1 * value2; #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 #endif // _M_ARM64 #else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); #endif return answer; } } // namespace numberparsing } // namespace icelake } // namespace simdjson #define SIMDJSON_SWAR_NUMBER_PARSING 1 #endif // SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H /* end file simdjson/icelake/numberparsing_defs.h */ /* end file simdjson/icelake/begin.h */ /* including simdjson/generic/ondemand/amalgamated.h for icelake: #include "simdjson/generic/ondemand/amalgamated.h" */ /* begin file simdjson/generic/ondemand/amalgamated.h for icelake */ #if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) #error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! #endif // Stuff other things depend on /* including simdjson/generic/ondemand/base.h for icelake: #include "simdjson/generic/ondemand/base.h" */ /* begin file simdjson/generic/ondemand/base.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { /** * A fast, simple, DOM-like interface that parses JSON as you use it. * * Designed for maximum speed and a lower memory profile. */ namespace ondemand { /** Represents the depth of a JSON value (number of nested arrays/objects). */ using depth_t = int32_t; /** @copydoc simdjson::icelake::number_type */ using number_type = simdjson::icelake::number_type; /** @private Position in the JSON buffer indexes */ using token_position = const uint32_t *; class array; class array_iterator; class document; class document_reference; class document_stream; class field; class json_iterator; enum class json_type; struct number; class object; class object_iterator; class parser; class raw_json_string; class token_iterator; class value; class value_iterator; } // namespace ondemand } // namespace icelake } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H /* end file simdjson/generic/ondemand/base.h for icelake */ /* including simdjson/generic/ondemand/value_iterator.h for icelake: #include "simdjson/generic/ondemand/value_iterator.h" */ /* begin file simdjson/generic/ondemand/value_iterator.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { /** * Iterates through a single JSON value at a particular depth. * * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects * the caller to call the right ones. * * @private This is not intended for external use. */ class value_iterator { protected: /** The underlying JSON iterator */ json_iterator *_json_iter{}; /** The depth of this value */ depth_t _depth{}; /** * The starting token index for this value */ token_position _start_position{}; public: simdjson_inline value_iterator() noexcept = default; /** * Denote that we're starting a document. */ simdjson_inline void start_document() noexcept; /** * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. * * Optimized for scalars. */ simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; /** * Tell whether the iterator is at the EOF mark */ simdjson_inline bool at_end() const noexcept; /** * Tell whether the iterator is at the start of the value */ simdjson_inline bool at_start() const noexcept; /** * Tell whether the value is open--if the value has not been used, or the array/object is still open. */ simdjson_inline bool is_open() const noexcept; /** * Tell whether the value is at an object's first field (just after the {). */ simdjson_inline bool at_first_field() const noexcept; /** * Abandon all iteration. */ simdjson_inline void abandon() noexcept; /** * Get the child value as a value_iterator. */ simdjson_inline value_iterator child_value() const noexcept; /** * Get the depth of this value. */ simdjson_inline int32_t depth() const noexcept; /** * Get the JSON type of this value. * * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result type() const noexcept; /** * @addtogroup object Object iteration * * Methods to iterate and find object fields. These methods generally *assume* the value is * actually an object; the caller is responsible for keeping track of that fact. * * @{ */ /** * Start an object iteration. * * @returns Whether the object had any fields (returns false for empty). * @error INCORRECT_TYPE if there is no opening { */ simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; /** * Start an object iteration from the root. * * @returns Whether the object had any fields (returns false for empty). * @error INCORRECT_TYPE if there is no opening { * @error TAPE_ERROR if there is no matching } at end of document */ simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; /** * Checks whether an object could be started from the root. May be called by start_root_object. * * @returns SUCCESS if it is possible to safely start an object from the root (document level). * @error INCORRECT_TYPE if there is no opening { * @error TAPE_ERROR if there is no matching } at end of document */ simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; /** * Start an object iteration after the user has already checked and moved past the {. * * Does not move the iterator unless the object is empty ({}). * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** * Start an object iteration from the root, after the user has already checked and moved past the {. * * Does not move the iterator unless the object is empty ({}). * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; /** * Moves to the next field in an object. * * Looks for , and }. If } is found, the object is finished and the iterator advances past it. * Otherwise, it advances to the next value. * * @return whether there is another field in the object. * @error TAPE_ERROR If there is a comma missing between fields. * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. */ simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; /** * Get the current field's key. */ simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; /** * Pass the : in the field and move to its value. */ simdjson_warn_unused simdjson_inline error_code field_value() noexcept; /** * Find the next field with the given key. * * Assumes you have called next_field() or otherwise matched the previous value. * * This means the iterator must be sitting at the next key: * * ``` * { "a": 1, "b": 2 } * ^ * ``` * * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; /** * Find the next field with the given key, *without* unescaping. This assumes object order: it * will not find the field if it was already passed when looking for some *other* field. * * Assumes you have called next_field() or otherwise matched the previous value. * * This means the iterator must be sitting at the next key: * * ``` * { "a": 1, "b": 2 } * ^ * ``` * * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; /** * Find the field with the given key without regard to order, and *without* unescaping. * * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. * * Assumes you have called next_field() or otherwise matched the previous value. * * This means the iterator must be sitting at the next key: * * ``` * { "a": 1, "b": 2 } * ^ * ``` * * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; /** @} */ /** * @addtogroup array Array iteration * Methods to iterate over array elements. These methods generally *assume* the value is actually * an object; the caller is responsible for keeping track of that fact. * @{ */ /** * Check for an opening [ and start an array iteration. * * @returns Whether the array had any elements (returns false for empty). * @error INCORRECT_TYPE If there is no [. */ simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; /** * Check for an opening [ and start an array iteration while at the root. * * @returns Whether the array had any elements (returns false for empty). * @error INCORRECT_TYPE If there is no [. * @error TAPE_ERROR if there is no matching ] at end of document */ simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; /** * Checks whether an array could be started from the root. May be called by start_root_array. * * @returns SUCCESS if it is possible to safely start an array from the root (document level). * @error INCORRECT_TYPE If there is no [. * @error TAPE_ERROR if there is no matching ] at end of document */ simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; /** * Start an array iteration, after the user has already checked and moved past the [. * * Does not move the iterator unless the array is empty ([]). * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** * Start an array iteration from the root, after the user has already checked and moved past the [. * * Does not move the iterator unless the array is empty ([]). * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; /** * Moves to the next element in an array. * * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. * Otherwise, it advances to the next value. * * @return Whether there is another element in the array. * @error TAPE_ERROR If there is a comma missing between elements. */ simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; /** * Get a child value iterator. */ simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; /** @} */ /** * @defgroup scalar Scalar values * @addtogroup scalar * @{ */ simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; template simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; simdjson_warn_unused simdjson_inline bool is_negative() noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; template simdjson_warn_unused simdjson_inline error_code get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; simdjson_inline error_code error() const noexcept; simdjson_inline uint8_t *&string_buf_loc() noexcept; simdjson_inline const json_iterator &json_iter() const noexcept; simdjson_inline json_iterator &json_iter() noexcept; simdjson_inline void assert_is_valid() const noexcept; simdjson_inline bool is_valid() const noexcept; /** @} */ protected: /** * Restarts an array iteration. * @returns Whether the array has any elements (returns false for empty). */ simdjson_inline simdjson_result reset_array() noexcept; /** * Restarts an object iteration. * @returns Whether the object has any fields (returns false for empty). */ simdjson_inline simdjson_result reset_object() noexcept; /** * move_at_start(): moves us so that we are pointing at the beginning of * the container. It updates the index so that at_start() is true and it * syncs the depth. The user can then create a new container instance. * * Usage: used with value::count_elements(). **/ simdjson_inline void move_at_start() noexcept; /** * move_at_container_start(): moves us so that we are pointing at the beginning of * the container so that assert_at_container_start() passes. * * Usage: used with reset_array() and reset_object(). **/ simdjson_inline void move_at_container_start() noexcept; /* Useful for debugging and logging purposes. */ inline std::string to_string() const noexcept; simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; simdjson_inline const uint8_t *peek_start() const noexcept; simdjson_inline uint32_t peek_start_length() const noexcept; /** * The general idea of the advance_... methods and the peek_* methods * is that you first peek and check that you have desired type. If you do, * and only if you do, then you advance. * * We used to unconditionally advance. But this made reasoning about our * current state difficult. * Suppose you always advance. Look at the 'value' matching the key * "shadowable" in the following example... * * ({"globals":{"a":{"shadowable":[}}}}) * * If the user thinks it is a Boolean and asks for it, then we check the '[', * decide it is not a Boolean, but still move into the next character ('}'). Now * we are left pointing at '}' right after a '['. And we have not yet reported * an error, only that we do not have a Boolean. * * If, instead, you just stand your ground until it is content that you know, then * you will only even move beyond the '[' if the user tells you that you have an * array. So you will be at the '}' character inside the array and, hopefully, you * will then catch the error because an array cannot start with '}', but the code * processing Boolean values does not know this. * * So the contract is: first call 'peek_...' and then call 'advance_...' only * if you have determined that it is a type you can handle. * * Unfortunately, it makes the code more verbose, longer and maybe more error prone. */ simdjson_inline void advance_scalar(const char *type) noexcept; simdjson_inline void advance_root_scalar(const char *type) noexcept; simdjson_inline void advance_non_root_scalar(const char *type) noexcept; simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; simdjson_inline error_code end_container() noexcept; /** * Advance to a place expecting a value (increasing depth). * * @return The current token (the one left behind). * @error TAPE_ERROR If the document ended early. */ simdjson_inline simdjson_result advance_to_value() noexcept; simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; simdjson_inline bool is_at_start() const noexcept; /** * is_at_iterator_start() returns true on an array or object after it has just been * created, whether the instance is empty or not. * * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) */ simdjson_inline bool is_at_iterator_start() const noexcept; /** * Assuming that we are within an object, this returns true if we * are pointing at a key. * * Usage: the skip_child() method should never be used while we are pointing * at a key inside an object. */ simdjson_inline bool is_at_key() const noexcept; inline void assert_at_start() const noexcept; inline void assert_at_container_start() const noexcept; inline void assert_at_root() const noexcept; inline void assert_at_child() const noexcept; inline void assert_at_next() const noexcept; inline void assert_at_non_root_start() const noexcept; /** Get the starting position of this value */ simdjson_inline token_position start_position() const noexcept; /** @copydoc error_code json_iterator::position() const noexcept; */ simdjson_inline token_position position() const noexcept; /** @copydoc error_code json_iterator::end_position() const noexcept; */ simdjson_inline token_position last_position() const noexcept; /** @copydoc error_code json_iterator::end_position() const noexcept; */ simdjson_inline token_position end_position() const noexcept; /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ simdjson_inline error_code report_error(error_code error, const char *message) noexcept; friend class document; friend class object; friend class array; friend class value; }; // value_iterator } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public icelake::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(icelake::ondemand::value_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H /* end file simdjson/generic/ondemand/value_iterator.h for icelake */ /* including simdjson/generic/ondemand/value.h for icelake: #include "simdjson/generic/ondemand/value.h" */ /* begin file simdjson/generic/ondemand/value.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { /** * An ephemeral JSON value returned during iteration. It is only valid for as long as you do * not access more data in the JSON document. */ class value { public: /** * Create a new invalid value. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline value() noexcept = default; /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); } /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template simdjson_inline error_code get(T &out) noexcept; /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @returns INCORRECT_TYPE If the JSON value is not an array. */ simdjson_inline simdjson_result get_array() noexcept; /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @returns INCORRECT_TYPE If the JSON value is not an object. */ simdjson_inline simdjson_result get_object() noexcept; /** * Cast this JSON value to an unsigned integer. * * @returns A unsigned 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64() noexcept; /** * Cast this JSON value (inside string) to a unsigned integer. * * @returns A unsigned 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64_in_string() noexcept; /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64() noexcept; /** * Cast this JSON value (inside string) to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64_in_string() noexcept; /** * Cast this JSON value to a double. * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double() noexcept; /** * Cast this JSON value (inside string) to a double * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double_in_string() noexcept; /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). * * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; /** * Attempts to fill the provided std::string reference with the parsed value of the current string. * * The string is guaranteed to be valid UTF-8. * * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. * We recommend you avoid allocating an std::string unless you need to. * * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a "wobbly" string. * * The string is may not be a valid UTF-8 string. * See https://simonsapin.github.io/wtf-8/ * * Important: a value should be consumed once. Calling get_wobbly_string() twice on the same value * is an error. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_wobbly_string() noexcept; /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_raw_json_string() noexcept; /** * Cast this JSON value to a bool. * * @returns A bool value. * @returns INCORRECT_TYPE if the JSON value is not true or false. */ simdjson_inline simdjson_result get_bool() noexcept; /** * Checks if this JSON value is null. If and only if the value is * null, then it is consumed (we advance). If we find a token that * begins with 'n' but is not 'null', then an error is returned. * * @returns Whether the value is null. * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. */ simdjson_inline simdjson_result is_null() noexcept; #if SIMDJSON_EXCEPTIONS /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. */ simdjson_inline operator array() noexcept(false); /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. */ simdjson_inline operator object() noexcept(false); /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline operator uint64_t() noexcept(false); /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. */ simdjson_inline operator int64_t() noexcept(false); /** * Cast this JSON value to a double. * * @returns A double. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. */ simdjson_inline operator double() noexcept(false); /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator std::string_view() noexcept(false); /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator raw_json_string() noexcept(false); /** * Cast this JSON value to a bool. * * @returns A bool value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. */ simdjson_inline operator bool() noexcept(false); #endif /** * Begin array iteration. * * Part of the std::iterable interface. * * @returns INCORRECT_TYPE If the JSON value is not an array. */ simdjson_inline simdjson_result begin() & noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result end() & noexcept; /** * This method scans the array and counts the number of elements. * The count_elements method should always be called before you have begun * iterating through the array: it is expected that you are pointing at * the beginning of the array. * The runtime complexity is linear in the size of the array. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * Performance hint: You should only call count_elements() as a last * resort as it may require scanning the document twice or more. */ simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the object and counts the number of key-value pairs. * The count_fields method should always be called before you have begun * iterating through the object: it is expected that you are pointing at * the beginning of the object. * The runtime complexity is linear in the size of the object. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an object is empty, it is more performant to use * the is_empty() method on the object instance. * * Performance hint: You should only call count_fields() as a last * resort as it may require scanning the document twice or more. */ simdjson_inline simdjson_result count_fields() & noexcept; /** * Get the value at the given index in the array. This function has linear-time complexity. * This function should only be called once on an array instance since the array iterator is not reset between each call. * * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field(const char *key) noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](const char *key) noexcept; /** * Get the type of this JSON value. It does not validate or consume the value. * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * * @return The type of JSON value (json_type::array, json_type::object, json_type::string, * json_type::number, json_type::boolean, or json_type::null). * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result type() noexcept; /** * Checks whether the value is a scalar (string, number, null, Boolean). * Returns false when there it is an array or object. * * @returns true if the type is string, number, null, Boolean * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result is_scalar() noexcept; /** * Checks whether the value is a negative number. * * @returns true if the number if negative. */ simdjson_inline bool is_negative() noexcept; /** * Checks whether the value is an integer number. Note that * this requires to partially parse the number string. If * the value is determined to be an integer, it may still * not parse properly as an integer in subsequent steps * (e.g., it might overflow). * * Performance note: if you call this function systematically * before parsing a number, you may have fallen for a performance * anti-pattern. * * @returns true if the number if negative. */ simdjson_inline simdjson_result is_integer() noexcept; /** * Determine the number type (integer or floating-point number) as quickly * as possible. This function does not fully validate the input. It is * useful when you only need to classify the numbers, without parsing them. * * If you are planning to retrieve the value or you need full validation, * consider using the get_number() method instead: it will fully parse * and validate the input, and give you access to the type: * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have * an integer greater or equal to 9223372036854775808 * get_number_type() is number_type::signed_integer if we have an * integer that is less than 9223372036854775808 * Otherwise, get_number_type() has value number_type::floating_point_number * * This function requires processing the number string, but it is expected * to be faster than get_number().get_number_type() because it is does not * parse the number value. * * @returns the type of the number */ simdjson_inline simdjson_result get_number_type() noexcept; /** * Attempt to parse an ondemand::number. An ondemand::number may * contain an integer value or a floating-point value, the simdjson * library will autodetect the type. Thus it is a dynamically typed * number. Before accessing the value, you must determine the detected * type. * * number.get_number_type() is number_type::signed_integer if we have * an integer in [-9223372036854775808,9223372036854775808) * You can recover the value by calling number.get_int64() and you * have that number.is_int64() is true. * * number.get_number_type() is number_type::unsigned_integer if we have * an integer in [9223372036854775808,18446744073709551616) * You can recover the value by calling number.get_uint64() and you * have that number.is_uint64() is true. * * Otherwise, number.get_number_type() has value number_type::floating_point_number * and we have a binary64 number. * You can recover the value by calling number.get_double() and you * have that number.is_double() is true. * * You must check the type before accessing the value: it is an error * to call "get_int64()" when number.get_number_type() is not * number_type::signed_integer and when number.is_int64() is false. * * Performance note: this is designed with performance in mind. When * calling 'get_number()', you scan the number string only once, determining * efficiently the type and storing it in an efficient manner. */ simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; /** * Get the raw JSON for this token. * * The string_view will always point into the input buffer. * * The string_view will start at the beginning of the token, and include the entire token * *as well as all spaces until the next token (or EOF).* This means, for example, that a * string token always begins with a " and is always terminated by the final ", possibly * followed by a number of spaces. * * The string_view is *not* null-terminated. However, if this is a scalar (string, number, * boolean, or null), the character after the end of the string_view is guaranteed to be * a non-space token. * * Tokens include: * - { * - [ * - "a string (possibly with UTF-8 or backslashed characters like \\\")". * - -1.2e-100 * - true * - false * - null * * See also value::raw_json(). */ simdjson_inline std::string_view raw_json_token() noexcept; /** * Get a string_view pointing at this value in the JSON document. * If this element is an array or an object, it consumes the array or the object * and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. * If this element is a scalar (string, number, Boolean, null), it returns what * raw_json_token() would return. */ simdjson_inline simdjson_result raw_json() noexcept; /** * Returns the current location in the document if in bounds. */ simdjson_inline simdjson_result current_location() noexcept; /** * Returns the current depth in the document if in bounds. * * E.g., * 0 = finished with document * 1 = document root value (could be [ or {, not yet known) * 2 = , or } inside root array/object * 3 = key or value inside root array/object. */ simdjson_inline int32_t current_depth() const noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard. * * ondemand::parser parser; * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/foo/a/1") == 20 * * It is allowed for a key to be the empty string: * * ondemand::parser parser; * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * * Note that at_pointer() called on the document automatically calls the document's rewind * method between each call. It invalidates all previously accessed arrays, objects and values * that have not been consumed. * * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not * standardized (by RFC 6901). We provide some experimental support for JSON pointers * on non-document instances. Yet it is not the case when calling at_pointer on an array * or an object instance: there is no rewind and no invalidation. * * You may only call at_pointer on an array after it has been created, but before it has * been first accessed. When calling at_pointer on an array, the pointer is advanced to * the location indicated by the JSON pointer (in case of success). It is no longer possible * to call at_pointer on the same array. * * You may call at_pointer more than once on an object, but each time the pointer is advanced * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding * key (as well as the current key) can no longer be used with following JSON pointer calls. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; protected: /** * Create a value. */ simdjson_inline value(const value_iterator &iter) noexcept; /** * Skip this value, allowing iteration to continue. */ simdjson_inline void skip() noexcept; /** * Start a value at the current position. * * (It should already be started; this is just a self-documentation method.) */ static simdjson_inline value start(const value_iterator &iter) noexcept; /** * Resume a value. */ static simdjson_inline value resume(const value_iterator &iter) noexcept; /** * Get the object, starting or resuming it as necessary */ simdjson_inline simdjson_result start_or_resume_object() noexcept; // simdjson_inline void log_value(const char *type) const noexcept; // simdjson_inline void log_error(const char *message) const noexcept; value_iterator iter{}; friend class document; friend class array_iterator; friend class field; friend class object; friend struct simdjson_result; friend struct simdjson_result; }; } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public icelake::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(icelake::ondemand::value &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result get_array() noexcept; simdjson_inline simdjson_result get_object() noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() noexcept; template simdjson_inline error_code get(T &out) noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator icelake::ondemand::array() noexcept(false); simdjson_inline operator icelake::ondemand::object() noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator icelake::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field(const char *key) noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](const char *key) noexcept; /** * Get the type of this JSON value. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). */ simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result raw_json() noexcept; /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ simdjson_inline simdjson_result current_location() noexcept; /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H /* end file simdjson/generic/ondemand/value.h for icelake */ /* including simdjson/generic/ondemand/logger.h for icelake: #include "simdjson/generic/ondemand/logger.h" */ /* begin file simdjson/generic/ondemand/logger.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { // Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical // that the call to the log functions be side-effect free. Thus, for example, you should not // create temporary std::string instances. namespace logger { enum class log_level : int32_t { info = 0, error = 1 }; #if SIMDJSON_VERBOSE_LOGGING static constexpr const bool LOG_ENABLED = true; #else static constexpr const bool LOG_ENABLED = false; #endif // We do not want these functions to be 'really inlined' since real inlining is // for performance purposes and if you are using the loggers, you do not care about // performance (or should not). static inline void log_headers() noexcept; // If args are provided, title will be treated as format string template static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; template static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; } // namespace logger } // namespace ondemand } // namespace icelake } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H /* end file simdjson/generic/ondemand/logger.h for icelake */ /* including simdjson/generic/ondemand/token_iterator.h for icelake: #include "simdjson/generic/ondemand/token_iterator.h" */ /* begin file simdjson/generic/ondemand/token_iterator.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { /** * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) * detected by stage 1. * * @private This is not intended for external use. */ class token_iterator { public: /** * Create a new invalid token_iterator. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline token_iterator() noexcept = default; simdjson_inline token_iterator(token_iterator &&other) noexcept = default; simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; simdjson_inline token_iterator(const token_iterator &other) noexcept = default; simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; /** * Advance to the next token (returning the current one). */ simdjson_inline const uint8_t *return_current_and_advance() noexcept; /** * Reports the current offset in bytes from the start of the underlying buffer. */ simdjson_inline uint32_t current_offset() const noexcept; /** * Get the JSON text for a given token (relative). * * This is not null-terminated; it is a view into the JSON. * * @param delta The relative position of the token to retrieve. e.g. 0 = current token, * 1 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** * Get the maximum length of the JSON text for a given token. * * The length will include any whitespace at the end of the token. * * @param delta The relative position of the token to retrieve. e.g. 0 = current token, * 1 = next token, -1 = prev token. */ simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; /** * Get the JSON text for a given token. * * This is not null-terminated; it is a view into the JSON. * * @param position The position of the token. * */ simdjson_inline const uint8_t *peek(token_position position) const noexcept; /** * Get the maximum length of the JSON text for a given token. * * The length will include any whitespace at the end of the token. * * @param position The position of the token. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; /** * Return the current index. */ simdjson_inline token_position position() const noexcept; /** * Reset to a previously saved index. */ simdjson_inline void set_position(token_position target_position) noexcept; // NOTE: we don't support a full C++ iterator interface, because we expect people to make // different calls to advance the iterator based on *their own* state. simdjson_inline bool operator==(const token_iterator &other) const noexcept; simdjson_inline bool operator!=(const token_iterator &other) const noexcept; simdjson_inline bool operator>(const token_iterator &other) const noexcept; simdjson_inline bool operator>=(const token_iterator &other) const noexcept; simdjson_inline bool operator<(const token_iterator &other) const noexcept; simdjson_inline bool operator<=(const token_iterator &other) const noexcept; protected: simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; /** * Get the index of the JSON text for a given token (relative). * * This is not null-terminated; it is a view into the JSON. * * @param delta The relative position of the token to retrieve. e.g. 0 = current token, * 1 = next token, -1 = prev token. */ simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; /** * Get the index of the JSON text for a given token. * * This is not null-terminated; it is a view into the JSON. * * @param position The position of the token. * */ simdjson_inline uint32_t peek_index(token_position position) const noexcept; const uint8_t *buf{}; token_position _position{}; friend class json_iterator; friend class value_iterator; friend class object; template friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; template friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; }; } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public icelake::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(icelake::ondemand::token_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H /* end file simdjson/generic/ondemand/token_iterator.h for icelake */ /* including simdjson/generic/ondemand/json_iterator.h for icelake: #include "simdjson/generic/ondemand/json_iterator.h" */ /* begin file simdjson/generic/ondemand/json_iterator.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { /** * Iterates through JSON tokens, keeping track of depth and string buffer. * * @private This is not intended for external use. */ class json_iterator { protected: token_iterator token{}; ondemand::parser *parser{}; /** * Next free location in the string buffer. * * Used by raw_json_string::unescape() to have a place to unescape strings to. */ uint8_t *_string_buf_loc{}; /** * JSON error, if there is one. * * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. * * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If * this is not elided, we should make sure it's at least not using up a register. Failing that, * we should store it in document so there's only one of them. */ error_code error{SUCCESS}; /** * Depth of the current token in the JSON. * * - 0 = finished with document * - 1 = document root value (could be [ or {, not yet known) * - 2 = , or } inside root array/object * - 3 = key or value inside root array/object. */ depth_t _depth{}; /** * Beginning of the document indexes. * Normally we have root == parser->implementation->structural_indexes.get() * but this may differ, especially in streaming mode (where we have several * documents); */ token_position _root{}; /** * Normally, a json_iterator operates over a single document, but in * some cases, we may have a stream of documents. This attribute is meant * as meta-data: the json_iterator works the same irrespective of the * value of this attribute. */ bool _streaming{false}; public: simdjson_inline json_iterator() noexcept = default; simdjson_inline json_iterator(json_iterator &&other) noexcept; simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; /** * Skips a JSON value, whether it is a scalar, array or object. */ simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; /** * Tell whether the iterator is still at the start */ simdjson_inline bool at_root() const noexcept; /** * Tell whether we should be expected to run in streaming * mode (iterating over many documents). It is pure metadata * that does not affect how the iterator works. It is used by * start_root_array() and start_root_object(). */ simdjson_inline bool streaming() const noexcept; /** * Get the root value iterator */ simdjson_inline token_position root_position() const noexcept; /** * Assert that we are at the document depth (== 1) */ simdjson_inline void assert_at_document_depth() const noexcept; /** * Assert that we are at the root of the document */ simdjson_inline void assert_at_root() const noexcept; /** * Tell whether the iterator is at the EOF mark */ simdjson_inline bool at_end() const noexcept; /** * Tell whether the iterator is live (has not been moved). */ simdjson_inline bool is_alive() const noexcept; /** * Abandon this iterator, setting depth to 0 (as if the document is finished). */ simdjson_inline void abandon() noexcept; /** * Advance the current token without modifying depth. */ simdjson_inline const uint8_t *return_current_and_advance() noexcept; /** * Returns true if there is a single token in the index (i.e., it is * a JSON with a scalar value such as a single number). * * @return whether there is a single token */ simdjson_inline bool is_single_token() const noexcept; /** * Assert that there are at least the given number of tokens left. * * Has no effect in release builds. */ simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; /** * Assert that the given position addresses an actual token (is within bounds). * * Has no effect in release builds. */ simdjson_inline void assert_valid_position(token_position position) const noexcept; /** * Get the JSON text for a given token (relative). * * This is not null-terminated; it is a view into the JSON. * * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** * Get the maximum length of the JSON text for the current token (or relative). * * The length will include any whitespace at the end of the token. * * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. */ simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; /** * Get a pointer to the current location in the input buffer. * * This is not null-terminated; it is a view into the JSON. * * You may be pointing outside of the input buffer: it is not generally * safe to dereference this pointer. */ simdjson_inline const uint8_t *unsafe_pointer() const noexcept; /** * Get the JSON text for a given token. * * This is not null-terminated; it is a view into the JSON. * * @param position The position of the token to retrieve. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek(token_position position) const noexcept; /** * Get the maximum length of the JSON text for the current token (or relative). * * The length will include any whitespace at the end of the token. * * @param position The position of the token to retrieve. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; /** * Get the JSON text for the last token in the document. * * This is not null-terminated; it is a view into the JSON. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek_last() const noexcept; /** * Ascend one level. * * Validates that the depth - 1 == parent_depth. * * @param parent_depth the expected parent depth. */ simdjson_inline void ascend_to(depth_t parent_depth) noexcept; /** * Descend one level. * * Validates that the new depth == child_depth. * * @param child_depth the expected child depth. */ simdjson_inline void descend_to(depth_t child_depth) noexcept; simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; /** * Get current depth. */ simdjson_inline depth_t depth() const noexcept; /** * Get current (writeable) location in the string buffer. */ simdjson_inline uint8_t *&string_buf_loc() noexcept; /** * Report an unrecoverable error, preventing further iteration. * * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ simdjson_inline error_code report_error(error_code error, const char *message) noexcept; /** * Log error, but don't stop iteration. * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; /** * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with * N bytes of capacity. It will return false if N is too small (smaller than max_len) of if it is zero. * The buffer (tmpbuf) is padded with space characters. */ simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept; simdjson_inline token_position position() const noexcept; /** * Write the raw_json_string to the string buffer and return a string_view. * Each raw_json_string should be unescaped once, or else the string buffer might * overflow. */ simdjson_inline simdjson_result unescape(raw_json_string in, bool allow_replacement) noexcept; simdjson_inline simdjson_result unescape_wobbly(raw_json_string in) noexcept; simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; simdjson_inline error_code consume_character(char c) noexcept; #if SIMDJSON_DEVELOPMENT_CHECKS simdjson_inline token_position start_position(depth_t depth) const noexcept; simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; #endif /* Useful for debugging and logging purposes. */ inline std::string to_string() const noexcept; /** * Returns the current location in the document if in bounds. */ inline simdjson_result current_location() const noexcept; /** * Updates this json iterator so that it is back at the beginning of the document, * as if it had just been created. */ inline void rewind() noexcept; /** * This checks whether the {,},[,] are balanced so that the document * ends with proper zero depth. This requires scanning the whole document * and it may be expensive. It is expected that it will be rarely called. * It does not attempt to match { with } and [ with ]. */ inline bool balanced() const noexcept; protected: simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; /// The last token before the end simdjson_inline token_position last_position() const noexcept; /// The token *at* the end. This points at gibberish and should only be used for comparison. simdjson_inline token_position end_position() const noexcept; /// The end of the buffer. simdjson_inline token_position end() const noexcept; friend class document; friend class document_stream; friend class object; friend class array; friend class value; friend class raw_json_string; friend class parser; friend class value_iterator; template friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; template friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; }; // json_iterator } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public icelake::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(icelake::ondemand::json_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H /* end file simdjson/generic/ondemand/json_iterator.h for icelake */ /* including simdjson/generic/ondemand/json_type.h for icelake: #include "simdjson/generic/ondemand/json_type.h" */ /* begin file simdjson/generic/ondemand/json_type.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { /** * The type of a JSON value. */ enum class json_type { // Start at 1 to catch uninitialized / default values more easily array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) string, ///< A JSON string ( "a" or "hello world\n" ...) boolean, ///< A JSON boolean (true or false) null ///< A JSON null (null) }; /** * A type representing a JSON number. * The design of the struct is deliberately straight-forward. All * functions return standard values with no error check. */ struct number { /** * return the automatically determined type of * the number: number_type::floating_point_number, * number_type::signed_integer or number_type::unsigned_integer. * * enum class number_type { * floating_point_number=1, /// a binary64 number * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement * unsigned_integer /// a positive integer larger or equal to 1<<63 * }; */ simdjson_inline ondemand::number_type get_number_type() const noexcept; /** * return true if the automatically determined type of * the number is number_type::unsigned_integer. */ simdjson_inline bool is_uint64() const noexcept; /** * return the value as a uint64_t, only valid if is_uint64() is true. */ simdjson_inline uint64_t get_uint64() const noexcept; simdjson_inline operator uint64_t() const noexcept; /** * return true if the automatically determined type of * the number is number_type::signed_integer. */ simdjson_inline bool is_int64() const noexcept; /** * return the value as a int64_t, only valid if is_int64() is true. */ simdjson_inline int64_t get_int64() const noexcept; simdjson_inline operator int64_t() const noexcept; /** * return true if the automatically determined type of * the number is number_type::floating_point_number. */ simdjson_inline bool is_double() const noexcept; /** * return the value as a double, only valid if is_double() is true. */ simdjson_inline double get_double() const noexcept; simdjson_inline operator double() const noexcept; /** * Convert the number to a double. Though it always succeed, the conversion * may be lossy if the number cannot be represented exactly. */ simdjson_inline double as_double() const noexcept; protected: /** * The next block of declaration is designed so that we can call the number parsing * functions on a number type. They are protected and should never be used outside * of the core simdjson library. */ friend class value_iterator; template friend error_code numberparsing::slow_float_parsing(simdjson_unused const uint8_t * src, W writer); template friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); template friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); /** Store a signed 64-bit value to the number. */ simdjson_inline void append_s64(int64_t value) noexcept; /** Store an unsigned 64-bit value to the number. */ simdjson_inline void append_u64(uint64_t value) noexcept; /** Store a double value to the number. */ simdjson_inline void append_double(double value) noexcept; /** Specifies that the value is a double, but leave it undefined. */ simdjson_inline void skip_double() noexcept; /** * End of friend declarations. */ /** * Our attributes are a union type (size = 64 bits) * followed by a type indicator. */ union { double floating_point_number; int64_t signed_integer; uint64_t unsigned_integer; } payload{0}; number_type type{number_type::signed_integer}; }; /** * Write the JSON type to the output stream * * @param out The output stream. * @param type The json_type. */ inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; #if SIMDJSON_EXCEPTIONS /** * Send JSON type to an output stream. * * @param out The output stream. * @param type The json_type. * @throw simdjson_error if the result being printed has an error. If there is an error with the * underlying output stream, that error will be propagated (simdjson_error will not be * thrown). */ inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); #endif } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public icelake::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(icelake::ondemand::json_type &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H /* end file simdjson/generic/ondemand/json_type.h for icelake */ /* including simdjson/generic/ondemand/raw_json_string.h for icelake: #include "simdjson/generic/ondemand/raw_json_string.h" */ /* begin file simdjson/generic/ondemand/raw_json_string.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { /** * A string escaped per JSON rules, terminated with quote ("). They are used to represent * unescaped keys inside JSON documents. * * (In other words, a pointer to the beginning of a string, just after the start quote, inside a * JSON file.) * * This class is deliberately simplistic and has little functionality. You can * compare a raw_json_string instance with an unescaped C string, but * that is nearly all you can do. * * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser * instance. Doing so requires you to have a sufficiently large buffer. * * The raw_json_string instances originate typically from field instance which in turn represent * key-value pairs from object instances. From a field instance, you get the raw_json_string * instance by calling key(). You can, if you want a more usable string_view instance, call * the unescaped_key() method on the field instance. You may also create a raw_json_string from * any other string value, with the value.get_raw_json_string() method. Again, you can get * a more usable string_view instance by calling get_string(). * */ class raw_json_string { public: /** * Create a new invalid raw_json_string. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline raw_json_string() noexcept = default; /** * Create a new invalid raw_json_string pointed at the given location in the JSON. * * The given location must be just *after* the beginning quote (") in the JSON file. * * It *must* be terminated by a ", and be a valid JSON string. */ simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; /** * Get the raw pointer to the beginning of the string in the JSON (just after the "). * * It is possible for this function to return a null pointer if the instance * has outlived its existence. */ simdjson_inline const char * raw() const noexcept; /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done) on target.size() characters, * and if the raw_json_string instance has a quote character at byte index target.size(). * We never read more than length + 1 bytes in the raw_json_string instance. * If length is smaller than target.size(), this will return false. * * The std::string_view instance may contain any characters. However, the caller * is responsible for setting length so that length bytes may be read in the * raw_json_string. * * Performance: the comparison may be done using memcmp which may be efficient * for long strings. */ simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done). * The std::string_view instance should not contain unescaped quote characters: * the caller is responsible for this check. See is_free_from_unescaped_quote. * * Performance: the comparison is done byte-by-byte which might be inefficient for * long strings. * * If target is a compile-time constant, and your compiler likes you, * you should be able to do the following without performance penalty... * * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); * s.unsafe_is_equal(target); */ simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; /** * This compares the current instance to the C string target: returns true if * they are byte-by-byte equal (no escaping is done). * The provided C string should not contain an unescaped quote character: * the caller is responsible for this check. See is_free_from_unescaped_quote. * * If target is a compile-time constant, and your compiler likes you, * you should be able to do the following without performance penalty... * * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); * s.unsafe_is_equal(target); */ simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done). */ simdjson_inline bool is_equal(std::string_view target) const noexcept; /** * This compares the current instance to the C string target: returns true if * they are byte-by-byte equal (no escaping is done). */ simdjson_inline bool is_equal(const char* target) const noexcept; /** * Returns true if target is free from unescaped quote. If target is known at * compile-time, we might expect the computation to happen at compile time with * many compilers (not all!). */ static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; private: /** * This will set the inner pointer to zero, effectively making * this instance unusable. */ simdjson_inline void consume() noexcept { buf = nullptr; } /** * Checks whether the inner pointer is non-null and thus usable. */ simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. * The result will be a valid UTF-8. * * ## IMPORTANT: string_view lifetime * * The string_view is only valid until the next parse() call on the parser. * * @param iter A json_iterator, which contains a buffer where the string will be written. * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. */ simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter, bool allow_replacement) const noexcept; /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ * * ## IMPORTANT: string_view lifetime * * The string_view is only valid until the next parse() call on the parser. * * @param iter A json_iterator, which contains a buffer where the string will be written. */ simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(json_iterator &iter) const noexcept; const uint8_t * buf{}; friend class object; friend class field; friend class parser; friend struct simdjson_result; }; simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; /** * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. */ simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public icelake::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(icelake::ondemand::raw_json_string &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape(icelake::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(icelake::ondemand::json_iterator &iter) const noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H /* end file simdjson/generic/ondemand/raw_json_string.h for icelake */ /* including simdjson/generic/ondemand/parser.h for icelake: #include "simdjson/generic/ondemand/parser.h" */ /* begin file simdjson/generic/ondemand/parser.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include namespace simdjson { namespace icelake { namespace ondemand { /** * The default batch size for document_stream instances for this On Demand kernel. * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; /** * Some adversary might try to set the batch size to 0 or 1, which might cause problems. * We set a minimum of 32B since anything else is highly likely to be an error. In practice, * most users will want a much larger batch size. * * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. */ static constexpr size_t MINIMAL_BATCH_SIZE = 32; /** * A JSON fragment iterator. * * This holds the actual iterator as well as the buffer for writing strings. */ class parser { public: /** * Create a JSON parser. * * The new parser will have zero capacity. */ inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; inline parser(parser &&other) noexcept = default; simdjson_inline parser(const parser &other) = delete; simdjson_inline parser &operator=(const parser &other) = delete; simdjson_inline parser &operator=(parser &&other) noexcept = default; /** Deallocate the JSON parser. */ inline ~parser() noexcept = default; /** * Start iterating an on-demand JSON document. * * ondemand::parser parser; * document doc = parser.iterate(json); * * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. * Otherwise the iterate method may return an error. In particular, the whole input should be * valid: we do not attempt to tolerate incorrect content either before or after a JSON * document. If there is a UTF-8 BOM, the parser skips it. * * ### IMPORTANT: Validate what you use * * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to * iterate does not parse and validate the whole document. * * ### IMPORTANT: Buffer Lifetime * * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as * long as the document iteration. * * ### IMPORTANT: Document Lifetime * * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before * you call parse() again or destroy the parser. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * * @param json The JSON to parse. * @param len The length of the JSON. * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). * * @return The document, or an error: * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory * allocation fails. * - EMPTY if the document is all whitespace. * - UTF8_ERROR if the document is not valid UTF-8. * - UNESCAPED_CHARS if a string contains control characters that must be escaped * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; /** * @private * * Start iterating an on-demand JSON document. * * ondemand::parser parser; * json_iterator doc = parser.iterate(json); * * ### IMPORTANT: Buffer Lifetime * * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as * long as the document iteration. * * ### IMPORTANT: Document Lifetime * * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before * you call parse() again or destroy the parser. * * The ondemand::document instance holds the iterator. The document must remain in scope * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * * @param json The JSON to parse. * * @return The iterator, or an error: * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory * allocation fails. * - EMPTY if the document is all whitespace. * - UTF8_ERROR if the document is not valid UTF-8. * - UNESCAPED_CHARS if a string contains control characters that must be escaped * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; /** * Parse a buffer containing many JSON documents. * * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; * ondemand::parser parser; * ondemand::document_stream docs = parser.iterate_many(json); * for (auto & doc : docs) { * std::cout << doc["foo"] << std::endl; * } * // Prints 1 2 3 * * No copy of the input buffer is made. * * The function is lazy: it may be that no more than one JSON document at a time is parsed. * * The caller is responsabile to ensure that the input string data remains unchanged and is * not deleted during the loop. * * ### Format * * The buffer must contain a series of one or more JSON documents, concatenated into a single * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, * then starts parsing the next document at that point. (It does this with more parallelism and * lookahead than you might think, though.) * * documents that consist of an object or array may omit the whitespace between them, concatenating * with no separator. Documents that consist of a single primitive (i.e. documents that are not * arrays or objects) MUST be separated with ASCII whitespace. * * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). * If there is a UTF-8 BOM, the parser skips it. * * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * * ### Threads * * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the * hood to do some lookahead. * * ### Parser Capacity * * If the parser's current capacity is less than batch_size, it will allocate enough capacity * to handle it (up to max_capacity). * * @param buf The concatenated JSON to parse. * @param len The length of the concatenated JSON. * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet * spot is cache-related: small enough to fit in cache, yet big enough to * parse as many documents as possible in one tight loop. * Defaults to 10MB, which has been a reasonable sweet spot in our tests. * @param allow_comma_separated (defaults on false) This allows a mode where the documents are * separated by commas instead of whitespace. It comes with a performance * penalty because the entire document is indexed at once (and the document must be * less than 4 GB), and there is no multithreading. In this mode, the batch_size parameter * is effectively ignored, as it is set to at least the document size. * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: * - MEMALLOC if the parser does not have enough capacity and memory allocation fails * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. * - other json errors if parsing fails. You should not rely on these errors to always the same for the * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). */ inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** The capacity of this parser (the largest document it can process). */ simdjson_inline size_t capacity() const noexcept; /** The maximum capacity of this parser (the largest document it is allowed to process). */ simdjson_inline size_t max_capacity() const noexcept; simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; /** * The maximum depth of this parser (the most deeply nested objects and arrays it can process). * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. */ simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length * and `max_depth` depth. * * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. * * @param capacity The new capacity. * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. * @return The error, if there is one. */ simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; #ifdef SIMDJSON_THREADS_ENABLED /** * The parser instance can use threads when they are available to speed up some * operations. It is enabled by default. Changing this attribute will change the * behavior of the parser for future operations. */ bool threaded{true}; #endif /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. * The result must be valid UTF-8. * The provided pointer is advanced to the end of the string by reference, and a string_view instance * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. * * This unescape function is a low-level function. If you want a more user-friendly approach, you should * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() * instead of get_raw_json_string()). * * ## IMPORTANT: string_view lifetime * * The string_view is only valid as long as the bytes in dst. * * @param raw_json_string input * @param dst A pointer to a buffer at least large enough to write this string as well as * an additional SIMDJSON_PADDING bytes. * @param allow_replacement Whether we allow a replacement if the input string contains unmatched surrogate pairs. * @return A string_view pointing at the unescaped string in dst * @error STRING_ERROR if escapes are incorrect. */ simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement = false) const noexcept; /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. * The result may not be valid UTF-8. See https://simonsapin.github.io/wtf-8/ * The provided pointer is advanced to the end of the string by reference, and a string_view instance * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. * * This unescape function is a low-level function. If you want a more user-friendly approach, you should * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() * instead of get_raw_json_string()). * * ## IMPORTANT: string_view lifetime * * The string_view is only valid as long as the bytes in dst. * * @param raw_json_string input * @param dst A pointer to a buffer at least large enough to write this string as well as * an additional SIMDJSON_PADDING bytes. * @return A string_view pointing at the unescaped string in dst * @error STRING_ERROR if escapes are incorrect. */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; std::unique_ptr string_buf{}; #if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif friend class json_iterator; friend class document_stream; }; } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public icelake::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(icelake::ondemand::parser &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* end file simdjson/generic/ondemand/parser.h for icelake */ // All other declarations /* including simdjson/generic/ondemand/array.h for icelake: #include "simdjson/generic/ondemand/array.h" */ /* begin file simdjson/generic/ondemand/array.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { /** * A forward-only JSON array. */ class array { public: /** * Create a new invalid array. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline array() noexcept = default; /** * Begin array iteration. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result begin() noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result end() noexcept; /** * This method scans the array and counts the number of elements. * The count_elements method should always be called before you have begun * iterating through the array: it is expected that you are pointing at * the beginning of the array. * The runtime complexity is linear in the size of the array. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an array is empty, it is more performant to use * the is_empty() method. */ simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the beginning of the array and checks whether the * array is empty. * The runtime complexity is constant time. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. */ simdjson_inline simdjson_result is_empty() & noexcept; /** * Reset the iterator so that we are pointing back at the * beginning of the array. You should still consume values only once even if you * can iterate through the array more than once. If you unescape a string * within the array more than once, you have unsafe code. Note that rewinding * an array means that you may need to reparse it anew: it is not a free * operation. * * @returns true if the array contains some elements (not empty) */ inline simdjson_result reset() & noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node * as the root of its own JSON document. * * ondemand::parser parser; * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/0/foo/a/1") == 20 * * Note that at_pointer() called on the document automatically calls the document's rewind * method between each call. It invalidates all previously accessed arrays, objects and values * that have not been consumed. Yet it is not the case when calling at_pointer on an array * instance: there is no rewind and no invalidation. * * You may only call at_pointer on an array after it has been created, but before it has * been first accessed. When calling at_pointer on an array, the pointer is advanced to * the location indicated by the JSON pointer (in case of success). It is no longer possible * to call at_pointer on the same array. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; /** * Consumes the array and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. */ simdjson_inline simdjson_result raw_json() noexcept; /** * Get the value at the given index. This function has linear-time complexity. * This function should only be called once on an array instance since the array iterator is not reset between each call. * * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; protected: /** * Go to the end of the array, no matter where you are right now. */ simdjson_inline error_code consume() noexcept; /** * Begin array iteration. * * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the * resulting array. * @error INCORRECT_TYPE if the iterator is not at [. */ static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; /** * Begin array iteration from the root. * * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the * resulting array. * @error INCORRECT_TYPE if the iterator is not at [. * @error TAPE_ERROR if there is no closing ] at the end of the document. */ static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; /** * Begin array iteration. * * This version of the method should be called after the initial [ has been verified, and is * intended for use by switch statements that check the type of a value. * * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. */ static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; /** * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. * * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* * into the resulting array. */ simdjson_inline array(const value_iterator &iter) noexcept; /** * Iterator marking current position. * * iter.is_alive() == false indicates iteration is complete. */ value_iterator iter{}; friend class value; friend class document; friend struct simdjson_result; friend struct simdjson_result; friend class array_iterator; }; } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public icelake::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(icelake::ondemand::array &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; inline simdjson_result count_elements() & noexcept; inline simdjson_result is_empty() & noexcept; inline simdjson_result reset() & noexcept; simdjson_inline simdjson_result at(size_t index) noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result raw_json() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H /* end file simdjson/generic/ondemand/array.h for icelake */ /* including simdjson/generic/ondemand/array_iterator.h for icelake: #include "simdjson/generic/ondemand/array_iterator.h" */ /* begin file simdjson/generic/ondemand/array_iterator.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { /** * A forward-only JSON array. * * This is an input_iterator, meaning: * - It is forward-only * - * must be called exactly once per element. * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) */ class array_iterator { public: /** Create a new, invalid array iterator. */ simdjson_inline array_iterator() noexcept = default; // // Iterator interface // /** * Get the current element. * * Part of the std::iterator interface. */ simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * * Part of the std::iterator interface. * * @return true if there are no more elements in the JSON array. */ simdjson_inline bool operator==(const array_iterator &) const noexcept; /** * Check if there are more elements in the JSON array. * * Part of the std::iterator interface. * * @return true if there are more elements in the JSON array. */ simdjson_inline bool operator!=(const array_iterator &) const noexcept; /** * Move to the next element. * * Part of the std::iterator interface. */ simdjson_inline array_iterator &operator++() noexcept; private: value_iterator iter{}; simdjson_inline array_iterator(const value_iterator &iter) noexcept; friend class array; friend class value; friend struct simdjson_result; }; } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public icelake::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(icelake::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; // // Iterator interface // simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. simdjson_inline bool operator==(const simdjson_result &) const noexcept; simdjson_inline bool operator!=(const simdjson_result &) const noexcept; simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H /* end file simdjson/generic/ondemand/array_iterator.h for icelake */ /* including simdjson/generic/ondemand/document.h for icelake: #include "simdjson/generic/ondemand/document.h" */ /* begin file simdjson/generic/ondemand/document.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { /** * A JSON document. It holds a json_iterator instance. * * Used by tokens to get text, and string buffer location. * * You must keep the document around during iteration. */ class document { public: /** * Create a new invalid document. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline document() noexcept = default; simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy simdjson_inline document(document &&other) noexcept = default; simdjson_inline document &operator=(const document &other) noexcept = delete; simdjson_inline document &operator=(document &&other) noexcept = default; /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @returns INCORRECT_TYPE If the JSON value is not an array. */ simdjson_inline simdjson_result get_array() & noexcept; /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @returns INCORRECT_TYPE If the JSON value is not an object. */ simdjson_inline simdjson_result get_object() & noexcept; /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64() noexcept; /** * Cast this JSON value (inside string) to an unsigned integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64_in_string() noexcept; /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64() noexcept; /** * Cast this JSON value (inside string) to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64_in_string() noexcept; /** * Cast this JSON value to a double. * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double() noexcept; /** * Cast this JSON value (inside string) to a double. * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double_in_string() noexcept; /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * Important: Calling get_string() twice on the same document is an error. * * @param Whether to allow a replacement character for unmatched surrogate pairs. * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; /** * Attempts to fill the provided std::string reference with the parsed value of the current string. * * The string is guaranteed to be valid UTF-8. * * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. * We recommend you avoid allocating an std::string unless you need to. * * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a string. * * The string is not guaranteed to be valid UTF-8. See https://simonsapin.github.io/wtf-8/ * * Important: Calling get_wobbly_string() twice on the same document is an error. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_wobbly_string() noexcept; /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_raw_json_string() noexcept; /** * Cast this JSON value to a bool. * * @returns A bool value. * @returns INCORRECT_TYPE if the JSON value is not true or false. */ simdjson_inline simdjson_result get_bool() noexcept; /** * Cast this JSON value to a value when the document is an object or an array. * * You must not have begun iterating through the object or array. When * SIMDJSON_DEVELOPMENT_CHECKS is set to 1 (which is the case when building in Debug mode * by default), and you have already begun iterating, * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use * rewind() to reset the document to its initial state before calling this method. * * @returns A value if a JSON array or object cannot be found. * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). */ simdjson_inline simdjson_result get_value() noexcept; /** * Checks if this JSON value is null. If and only if the value is * null, then it is consumed (we advance). If we find a token that * begins with 'n' but is not 'null', then an error is returned. * * @returns Whether the value is null. * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. */ simdjson_inline simdjson_result is_null() noexcept; /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() & noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); } /** @overload template simdjson_result get() & noexcept */ template simdjson_inline simdjson_result get() && noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); } /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value * * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template simdjson_inline error_code get(T &out) & noexcept; /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. */ simdjson_inline operator array() & noexcept(false); /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. */ simdjson_inline operator object() & noexcept(false); /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline operator uint64_t() noexcept(false); /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. */ simdjson_inline operator int64_t() noexcept(false); /** * Cast this JSON value to a double. * * @returns A double. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. */ simdjson_inline operator double() noexcept(false); /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator std::string_view() noexcept(false); /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator raw_json_string() noexcept(false); /** * Cast this JSON value to a bool. * * @returns A bool value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. */ simdjson_inline operator bool() noexcept(false); /** * Cast this JSON value to a value when the document is an object or an array. * * You must not have begun iterating through the object or array. When * SIMDJSON_DEVELOPMENT_CHECKS is defined, and you have already begun iterating, * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use * rewind() to reset the document to its initial state before calling this method. * * @returns A value value if a JSON array or object cannot be found. * @exception SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). */ simdjson_inline operator value() noexcept(false); #endif /** * This method scans the array and counts the number of elements. * The count_elements method should always be called before you have begun * iterating through the array: it is expected that you are pointing at * the beginning of the array. * The runtime complexity is linear in the size of the array. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. */ simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the object and counts the number of key-value pairs. * The count_fields method should always be called before you have begun * iterating through the object: it is expected that you are pointing at * the beginning of the object. * The runtime complexity is linear in the size of the object. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an object is empty, it is more performant to use * the is_empty() method. */ simdjson_inline simdjson_result count_fields() & noexcept; /** * Get the value at the given index in the array. This function has linear-time complexity. * This function should only be called once on an array instance since the array iterator is not reset between each call. * * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) & noexcept; /** * Begin array iteration. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result begin() & noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result end() & noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. E.g., the array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() * is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field(const char *key) & noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. E.g., the array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to a key * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() * is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](const char *key) & noexcept; /** * Get the type of this JSON value. It does not validate or consume the value. * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result type() noexcept; /** * Checks whether the document is a scalar (string, number, null, Boolean). * Returns false when there it is an array or object. * * @returns true if the type is string, number, null, Boolean * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result is_scalar() noexcept; /** * Checks whether the document is a negative number. * * @returns true if the number if negative. */ simdjson_inline bool is_negative() noexcept; /** * Checks whether the document is an integer number. Note that * this requires to partially parse the number string. If * the value is determined to be an integer, it may still * not parse properly as an integer in subsequent steps * (e.g., it might overflow). * * @returns true if the number if negative. */ simdjson_inline simdjson_result is_integer() noexcept; /** * Determine the number type (integer or floating-point number) as quickly * as possible. This function does not fully validate the input. It is * useful when you only need to classify the numbers, without parsing them. * * If you are planning to retrieve the value or you need full validation, * consider using the get_number() method instead: it will fully parse * and validate the input, and give you access to the type: * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have * an integer greater or equal to 9223372036854775808 * get_number_type() is number_type::signed_integer if we have an * integer that is less than 9223372036854775808 * Otherwise, get_number_type() has value number_type::floating_point_number * * This function requires processing the number string, but it is expected * to be faster than get_number().get_number_type() because it is does not * parse the number value. * * @returns the type of the number */ simdjson_inline simdjson_result get_number_type() noexcept; /** * Attempt to parse an ondemand::number. An ondemand::number may * contain an integer value or a floating-point value, the simdjson * library will autodetect the type. Thus it is a dynamically typed * number. Before accessing the value, you must determine the detected * type. * * number.get_number_type() is number_type::signed_integer if we have * an integer in [-9223372036854775808,9223372036854775808) * You can recover the value by calling number.get_int64() and you * have that number.is_int64() is true. * * number.get_number_type() is number_type::unsigned_integer if we have * an integer in [9223372036854775808,18446744073709551616) * You can recover the value by calling number.get_uint64() and you * have that number.is_uint64() is true. * * Otherwise, number.get_number_type() has value number_type::floating_point_number * and we have a binary64 number. * You can recover the value by calling number.get_double() and you * have that number.is_double() is true. * * You must check the type before accessing the value: it is an error * to call "get_int64()" when number.get_number_type() is not * number_type::signed_integer and when number.is_int64() is false. */ simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; /** * Get the raw JSON for this token. * * The string_view will always point into the input buffer. * * The string_view will start at the beginning of the token, and include the entire token * *as well as all spaces until the next token (or EOF).* This means, for example, that a * string token always begins with a " and is always terminated by the final ", possibly * followed by a number of spaces. * * The string_view is *not* null-terminated. If this is a scalar (string, number, * boolean, or null), the character after the end of the string_view may be the padded buffer. * * Tokens include: * - { * - [ * - "a string (possibly with UTF-8 or backslashed characters like \\\")". * - -1.2e-100 * - true * - false * - null */ simdjson_inline simdjson_result raw_json_token() noexcept; /** * Reset the iterator inside the document instance so we are pointing back at the * beginning of the document, as if it had just been created. It invalidates all * values, objects and arrays that you have created so far (including unescaped strings). */ inline void rewind() noexcept; /** * Returns debugging information. */ inline std::string to_debug_string() noexcept; /** * Some unrecoverable error conditions may render the document instance unusable. * The is_alive() method returns true when the document is still suitable. */ inline bool is_alive() noexcept; /** * Returns the current location in the document if in bounds. */ inline simdjson_result current_location() const noexcept; /** * Returns true if this document has been fully parsed. * If you have consumed the whole document and at_end() returns * false, then there may be trailing content. */ inline bool at_end() const noexcept; /** * Returns the current depth in the document if in bounds. * * E.g., * 0 = finished with document * 1 = document root value (could be [ or {, not yet known) * 2 = , or } inside root array/object * 3 = key or value inside root array/object. */ simdjson_inline int32_t current_depth() const noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard. * * ondemand::parser parser; * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/foo/a/1") == 20 * * It is allowed for a key to be the empty string: * * ondemand::parser parser; * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * * Note that at_pointer() automatically calls rewind between each call. Thus * all values, objects and arrays that you have created so far (including unescaped strings) * are invalidated. After calling at_pointer, you need to consume the result: string values * should be stored in your own variables, arrays should be decoded and stored in your own array-like * structures and so forth. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; /** * Consumes the document and returns a string_view instance corresponding to the * document as represented in JSON. It points inside the original byte array containing * the JSON document. */ simdjson_inline simdjson_result raw_json() noexcept; protected: /** * Consumes the document. */ simdjson_inline error_code consume() noexcept; simdjson_inline document(ondemand::json_iterator &&iter) noexcept; simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; simdjson_inline value_iterator resume_value_iterator() noexcept; simdjson_inline value_iterator get_root_value_iterator() noexcept; simdjson_inline simdjson_result start_or_resume_object() noexcept; static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; // // Fields // json_iterator iter{}; ///< Current position in the document static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 friend class array_iterator; friend class value; friend class ondemand::parser; friend class object; friend class array; friend class field; friend class token; friend class document_stream; friend class document_reference; }; /** * A document_reference is a thin wrapper around a document reference instance. */ class document_reference { public: simdjson_inline document_reference() noexcept; simdjson_inline document_reference(document &d) noexcept; simdjson_inline document_reference(const document_reference &other) noexcept = default; simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; simdjson_inline void rewind() noexcept; simdjson_inline simdjson_result get_array() & noexcept; simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; simdjson_inline simdjson_result raw_json() noexcept; simdjson_inline operator document&() const noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator array() & noexcept(false); simdjson_inline operator object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); simdjson_inline operator value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) & noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; simdjson_inline bool is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; private: document *doc{nullptr}; }; } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public icelake::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(icelake::ondemand::document &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; simdjson_inline simdjson_result get_array() & noexcept; simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & noexcept; template simdjson_inline simdjson_result get() && noexcept; template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator icelake::ondemand::array() & noexcept(false); simdjson_inline operator icelake::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator icelake::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); simdjson_inline operator icelake::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) & noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; simdjson_inline bool at_end() const noexcept; simdjson_inline bool is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public icelake::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(icelake::ondemand::document_reference value, error_code error) noexcept; simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; simdjson_inline simdjson_result get_array() & noexcept; simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator icelake::ondemand::array() & noexcept(false); simdjson_inline operator icelake::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator icelake::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); simdjson_inline operator icelake::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) & noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H /* end file simdjson/generic/ondemand/document.h for icelake */ /* including simdjson/generic/ondemand/document_stream.h for icelake: #include "simdjson/generic/ondemand/document_stream.h" */ /* begin file simdjson/generic/ondemand/document_stream.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #ifdef SIMDJSON_THREADS_ENABLED #include #include #include #endif namespace simdjson { namespace icelake { namespace ondemand { #ifdef SIMDJSON_THREADS_ENABLED /** @private Custom worker class **/ struct stage1_worker { stage1_worker() noexcept = default; stage1_worker(const stage1_worker&) = delete; stage1_worker(stage1_worker&&) = delete; stage1_worker operator=(const stage1_worker&) = delete; ~stage1_worker(); /** * We only start the thread when it is needed, not at object construction, this may throw. * You should only call this once. **/ void start_thread(); /** * Start a stage 1 job. You should first call 'run', then 'finish'. * You must call start_thread once before. */ void run(document_stream * ds, parser * stage1, size_t next_batch_start); /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ void finish(); private: /** * Normally, we would never stop the thread. But we do in the destructor. * This function is only safe assuming that you are not waiting for results. You * should have called run, then finish, and be done. **/ void stop_thread(); std::thread thread{}; /** These three variables define the work done by the thread. **/ ondemand::parser * stage1_thread_parser{}; size_t _next_batch_start{}; document_stream * owner{}; /** * We have two state variables. This could be streamlined to one variable in the future but * we use two for clarity. */ bool has_work{false}; bool can_work{true}; /** * We lock using a mutex. */ std::mutex locking_mutex{}; std::condition_variable cond_var{}; friend class document_stream; }; #endif // SIMDJSON_THREADS_ENABLED /** * A forward-only stream of documents. * * Produced by parser::iterate_many. * */ class document_stream { public: /** * Construct an uninitialized document_stream. * * ```c++ * document_stream docs; * auto error = parser.iterate_many(json).get(docs); * ``` */ simdjson_inline document_stream() noexcept; /** Move one document_stream to another. */ simdjson_inline document_stream(document_stream &&other) noexcept = default; /** Move one document_stream to another. */ simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; simdjson_inline ~document_stream() noexcept; /** * Returns the input size in bytes. */ inline size_t size_in_bytes() const noexcept; /** * After iterating through the stream, this method * returns the number of bytes that were not parsed at the end * of the stream. If truncated_bytes() differs from zero, * then the input was truncated maybe because incomplete JSON * documents were found at the end of the stream. You * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). * * You should only call truncated_bytes() after streaming through all * documents, like so: * * document_stream stream = parser.iterate_many(json,window); * for(auto & doc : stream) { * // do something with doc * } * size_t truncated = stream.truncated_bytes(); * */ inline size_t truncated_bytes() const noexcept; class iterator { public: using value_type = simdjson_result; using reference = value_type; using difference_type = std::ptrdiff_t; using iterator_category = std::input_iterator_tag; /** * Default constructor. */ simdjson_inline iterator() noexcept; /** * Get the current document (or error). */ simdjson_inline simdjson_result operator*() noexcept; /** * Advance to the next document (prefix). */ inline iterator& operator++() noexcept; /** * Check if we're at the end yet. * @param other the end iterator to compare to. */ simdjson_inline bool operator!=(const iterator &other) const noexcept; /** * @private * * Gives the current index in the input document in bytes. * * document_stream stream = parser.parse_many(json,window); * for(auto i = stream.begin(); i != stream.end(); ++i) { * auto doc = *i; * size_t index = i.current_index(); * } * * This function (current_index()) is experimental and the usage * may change in future versions of simdjson: we find the API somewhat * awkward and we would like to offer something friendlier. */ simdjson_inline size_t current_index() const noexcept; /** * @private * * Gives a view of the current document at the current position. * * document_stream stream = parser.iterate_many(json,window); * for(auto i = stream.begin(); i != stream.end(); ++i) { * std::string_view v = i.source(); * } * * The returned string_view instance is simply a map to the (unparsed) * source string: it may thus include white-space characters and all manner * of padding. * * This function (source()) is experimental and the usage * may change in future versions of simdjson: we find the API somewhat * awkward and we would like to offer something friendlier. * */ simdjson_inline std::string_view source() const noexcept; /** * Returns error of the stream (if any). */ inline error_code error() const noexcept; private: simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ document_stream* stream; /** Whether we're finished or not. */ bool finished; friend class document; friend class document_stream; friend class json_iterator; }; /** * Start iterating the documents in the stream. */ simdjson_inline iterator begin() noexcept; /** * The end of the stream, for iterator comparison purposes. */ simdjson_inline iterator end() noexcept; private: document_stream &operator=(const document_stream &) = delete; // Disallow copying document_stream(const document_stream &other) = delete; // Disallow copying /** * Construct a document_stream. Does not allocate or parse anything until the iterator is * used. * * @param parser is a reference to the parser instance used to generate this document_stream * @param buf is the raw byte buffer we need to process * @param len is the length of the raw byte buffer in bytes * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) */ simdjson_inline document_stream( ondemand::parser &parser, const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated ) noexcept; /** * Parse the first document in the buffer. Used by begin(), to handle allocation and * initialization. */ inline void start() noexcept; /** * Parse the next document found in the buffer previously given to document_stream. * * The content should be a valid JSON document encoded as UTF-8. If there is a * UTF-8 BOM, the parser skips it. * * You do NOT need to pre-allocate a parser. This function takes care of * pre-allocating a capacity defined by the batch_size defined when creating the * document_stream object. * * The function returns simdjson::EMPTY if there is no more data to be parsed. * * The function returns simdjson::SUCCESS (as integer = 0) in case of success * and indicates that the buffer has successfully been parsed to the end. * Every document it contained has been parsed without error. * * The function returns an error code from simdjson/simdjson.h in case of failure * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; * the simdjson::error_message function converts these error codes into a string). * * You can also check validity by calling parser.is_valid(). The same parser can * and should be reused for the other documents in the buffer. */ inline void next() noexcept; /** Move the json_iterator of the document to the location of the next document in the stream. */ inline void next_document() noexcept; /** Get the next document index. */ inline size_t next_batch_start() const noexcept; /** Pass the next batch through stage 1 with the given parser. */ inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; // Fields ondemand::parser *parser; const uint8_t *buf; size_t len; size_t batch_size; bool allow_comma_separated; /** * We are going to use just one document instance. The document owns * the json_iterator. It implies that we only ever pass a reference * to the document to the users. */ document doc{}; /** The error (or lack thereof) from the current document. */ error_code error; size_t batch_start{0}; size_t doc_index{}; #ifdef SIMDJSON_THREADS_ENABLED /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ bool use_thread; inline void load_from_stage1_thread() noexcept; /** Start a thread to run stage 1 on the next batch. */ inline void start_stage1_thread() noexcept; /** Wait for the stage 1 thread to finish and capture the results. */ inline void finish_stage1_thread() noexcept; /** The error returned from the stage 1 thread. */ error_code stage1_thread_error{UNINITIALIZED}; /** The thread used to run stage 1 against the next batch in the background. */ std::unique_ptr worker{new(std::nothrow) stage1_worker()}; /** * The parser used to run stage 1 in the background. Will be swapped * with the regular parser when finished. */ ondemand::parser stage1_thread_parser{}; friend struct stage1_worker; #endif // SIMDJSON_THREADS_ENABLED friend class parser; friend class document; friend class json_iterator; friend struct simdjson_result; friend struct internal::simdjson_result_base; }; // document_stream } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public icelake::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(icelake::ondemand::document_stream &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H /* end file simdjson/generic/ondemand/document_stream.h for icelake */ /* including simdjson/generic/ondemand/field.h for icelake: #include "simdjson/generic/ondemand/field.h" */ /* begin file simdjson/generic/ondemand/field.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { /** * A JSON field (key/value pair) in an object. * * Returned from object iteration. * * Extends from std::pair so you can use C++ algorithms that rely on pairs. */ class field : public std::pair { public: /** * Create a new invalid field. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline field() noexcept; /** * Get the key as a string_view (for higher speed, consider raw_key). * We deliberately use a more cumbersome name (unescaped_key) to force users * to think twice about using it. * * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with * an unescaped C string: e.g., key() == "test". */ simdjson_inline raw_json_string key() const noexcept; /** * Get the field value. */ simdjson_inline ondemand::value &value() & noexcept; /** * @overload ondemand::value &ondemand::value() & noexcept */ simdjson_inline ondemand::value value() && noexcept; protected: simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; friend struct simdjson_result; friend class object_iterator; }; } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public icelake::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(icelake::ondemand::field &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; simdjson_inline simdjson_result key() noexcept; simdjson_inline simdjson_result value() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H /* end file simdjson/generic/ondemand/field.h for icelake */ /* including simdjson/generic/ondemand/object.h for icelake: #include "simdjson/generic/ondemand/object.h" */ /* begin file simdjson/generic/ondemand/object.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { /** * A forward-only JSON object field iterator. */ class object { public: /** * Create a new invalid object. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline object() noexcept = default; simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. The value instance you get * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to a * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() * is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. The value instance you get * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to a key * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node * as the root of its own JSON document. * * ondemand::parser parser; * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/foo/a/1") == 20 * * It is allowed for a key to be the empty string: * * ondemand::parser parser; * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * * Note that at_pointer() called on the document automatically calls the document's rewind * method between each call. It invalidates all previously accessed arrays, objects and values * that have not been consumed. Yet it is not the case when calling at_pointer on an object * instance: there is no rewind and no invalidation. * * You may call at_pointer more than once on an object, but each time the pointer is advanced * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding * key (as well as the current key) can no longer be used with following JSON pointer calls. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you * can iterate through the object more than once. If you unescape a string within * the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) */ inline simdjson_result reset() & noexcept; /** * This method scans the beginning of the object and checks whether the * object is empty. * The runtime complexity is constant time. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. */ inline simdjson_result is_empty() & noexcept; /** * This method scans the object and counts the number of key-value pairs. * The count_fields method should always be called before you have begun * iterating through the object: it is expected that you are pointing at * the beginning of the object. * The runtime complexity is linear in the size of the object. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an object is empty, it is more performant to use * the is_empty() method. * * Performance hint: You should only call count_fields() as a last * resort as it may require scanning the document twice or more. */ simdjson_inline simdjson_result count_fields() & noexcept; /** * Consumes the object and returns a string_view instance corresponding to the * object as represented in JSON. It points inside the original byte array containing * the JSON document. */ simdjson_inline simdjson_result raw_json() noexcept; protected: /** * Go to the end of the object, no matter where you are right now. */ simdjson_inline error_code consume() noexcept; static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; static simdjson_inline object resume(const value_iterator &iter) noexcept; simdjson_inline object(const value_iterator &iter) noexcept; simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; value_iterator iter{}; friend class value; friend class document; friend struct simdjson_result; }; } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public icelake::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(icelake::ondemand::object &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; inline simdjson_result raw_json() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H /* end file simdjson/generic/ondemand/object.h for icelake */ /* including simdjson/generic/ondemand/object_iterator.h for icelake: #include "simdjson/generic/ondemand/object_iterator.h" */ /* begin file simdjson/generic/ondemand/object_iterator.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { class object_iterator { public: /** * Create a new invalid object_iterator. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline object_iterator() noexcept = default; // // Iterator interface // // Reads key and value, yielding them to the user. // MUST ONLY BE CALLED ONCE PER ITERATION. simdjson_inline simdjson_result operator*() noexcept; // Assumes it's being compared with the end. true if depth < iter->depth. simdjson_inline bool operator==(const object_iterator &) const noexcept; // Assumes it's being compared with the end. true if depth >= iter->depth. simdjson_inline bool operator!=(const object_iterator &) const noexcept; // Checks for ']' and ',' simdjson_inline object_iterator &operator++() noexcept; private: /** * The underlying JSON iterator. * * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object * is first used, and never changes afterwards. */ value_iterator iter{}; simdjson_inline object_iterator(const value_iterator &iter) noexcept; friend struct simdjson_result; friend class object; }; } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public icelake::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(icelake::ondemand::object_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; // // Iterator interface // // Reads key and value, yielding them to the user. simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. // Assumes it's being compared with the end. true if depth < iter->depth. simdjson_inline bool operator==(const simdjson_result &) const noexcept; // Assumes it's being compared with the end. true if depth >= iter->depth. simdjson_inline bool operator!=(const simdjson_result &) const noexcept; // Checks for ']' and ',' simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H /* end file simdjson/generic/ondemand/object_iterator.h for icelake */ /* including simdjson/generic/ondemand/serialization.h for icelake: #include "simdjson/generic/ondemand/serialization.h" */ /* begin file simdjson/generic/ondemand/serialization.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { /** * Create a string-view instance out of a document instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ inline simdjson_result to_json_string(icelake::ondemand::document& x) noexcept; /** * Create a string-view instance out of a value instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. The value must * not have been accessed previously. It does not * validate the content. */ inline simdjson_result to_json_string(icelake::ondemand::value& x) noexcept; /** * Create a string-view instance out of an object instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ inline simdjson_result to_json_string(icelake::ondemand::object& x) noexcept; /** * Create a string-view instance out of an array instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ inline simdjson_result to_json_string(icelake::ondemand::array& x) noexcept; inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); } // namespace simdjson /** * We want to support argument-dependent lookup (ADL). * Hence we should define operator<< in the namespace * where the argument (here value, object, etc.) resides. * Credit: @madhur4127 * See https://github.com/simdjson/simdjson/issues/1768 */ namespace simdjson { namespace icelake { namespace ondemand { /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The element. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::value x); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The array. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::array value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The array. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document& value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document_reference& value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The object. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::object value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif }}} // namespace simdjson::icelake::ondemand #endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H /* end file simdjson/generic/ondemand/serialization.h for icelake */ // Inline definitions /* including simdjson/generic/ondemand/array-inl.h for icelake: #include "simdjson/generic/ondemand/array-inl.h" */ /* begin file simdjson/generic/ondemand/array-inl.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { // // ### Live States // // While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is // always SUCCESS: // // - Start: This is the state when the array is first found and the iterator is just past the `{`. // In this state, at_start == true. // - Next: After we hand a scalar value to the user, or an array/object which they then fully // iterate over, the iterator is at the `,` before the next value (or `]`). In this state, // depth == iter->depth, at_start == false, and error == SUCCESS. // - Unfinished Business: When we hand an array/object to the user which they do not fully // iterate over, we need to finish that iteration by skipping child values until we reach the // Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. // // ## Error States // // In error states, we will yield exactly one more value before stopping. iter->depth == depth // and at_start is always false. We decrement after yielding the error, moving to the Finished // state. // // - Chained Error: When the array iterator is part of an error chain--for example, in // `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an // array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and // iter->depth == depth, and at_start == false. We decrement depth when we yield the error. // - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, // we flag that as an error and treat it exactly the same as a Chained Error. In this state, // error == TAPE_ERROR, iter->depth == depth, and at_start == false. // // ## Terminal State // // The terminal state has iter->depth < depth. at_start is always false. // // - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this // by decrementing depth. In this state, iter->depth < depth, at_start == false, and // error == SUCCESS. // simdjson_inline array::array(const value_iterator &_iter) noexcept : iter{_iter} { } simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { // We don't need to know if the array is empty to start iteration, but we do want to know if there // is an error--thus `simdjson_unused`. simdjson_unused bool has_value; SIMDJSON_TRY( iter.start_array().get(has_value) ); return array(iter); } simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { simdjson_unused bool has_value; SIMDJSON_TRY( iter.start_root_array().get(has_value) ); return array(iter); } simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { bool has_value; SIMDJSON_TRY(iter.started_array().get(has_value)); return array(iter); } simdjson_inline simdjson_result array::begin() noexcept { #if SIMDJSON_DEVELOPMENT_CHECKS if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif return array_iterator(iter); } simdjson_inline simdjson_result array::end() noexcept { return array_iterator(iter); } simdjson_inline error_code array::consume() noexcept { auto error = iter.json_iter().skip_child(iter.depth()-1); if(error) { iter.abandon(); } return error; } simdjson_inline simdjson_result array::raw_json() noexcept { const uint8_t * starting_point{iter.peek_start()}; auto error = consume(); if(error) { return error; } // After 'consume()', we could be left pointing just beyond the document, but that // is ok because we are not going to dereference the final pointer position, we just // use it to compute the length in bytes. const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_inline simdjson_result array::count_elements() & noexcept { size_t count{0}; // Important: we do not consume any of the values. for(simdjson_unused auto v : *this) { count++; } // The above loop will always succeed, but we want to report errors. if(iter.error()) { return iter.error(); } // We need to move back at the start because we expect users to iterate through // the array after counting the number of elements. iter.reset_array(); return count; } SIMDJSON_POP_DISABLE_WARNINGS simdjson_inline simdjson_result array::is_empty() & noexcept { bool is_not_empty; auto error = iter.reset_array().get(is_not_empty); if(error) { return error; } return !is_not_empty; } inline simdjson_result array::reset() & noexcept { return iter.reset_array(); } inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } json_pointer = json_pointer.substr(1); // - means "the append position" or "the element after the end of the array" // We don't support this, because we're returning a real element, not a position. if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } // Read the array index size_t array_index = 0; size_t i; for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { uint8_t digit = uint8_t(json_pointer[i] - '0'); // Check for non-digit in array index. If it's there, we're trying to get a field in an object if (digit > 9) { return INCORRECT_TYPE; } array_index = array_index*10 + digit; } // 0 followed by other digits is invalid if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" // Empty string is invalid; so is a "/" with no digits before it if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" // Get the child auto child = at(array_index); // If there is an error, it ends here if(child.error()) { return child; } // If there is a /, we're not done yet, call recursively. if (i < json_pointer.length()) { child = child.at_pointer(json_pointer.substr(i)); } return child; } simdjson_inline simdjson_result array::at(size_t index) noexcept { size_t i = 0; for (auto value : *this) { if (i == index) { return value; } i++; } return INDEX_OUT_OF_BOUNDS; } } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( icelake::ondemand::array &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result simdjson_result::begin() noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() noexcept { if (error()) { return error(); } return first.end(); } simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { if (error()) { return error(); } return first.is_empty(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H /* end file simdjson/generic/ondemand/array-inl.h for icelake */ /* including simdjson/generic/ondemand/array_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/array_iterator-inl.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept : iter{_iter} {} simdjson_inline simdjson_result array_iterator::operator*() noexcept { if (iter.error()) { iter.abandon(); return iter.error(); } return value(iter.child()); } simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { return !(*this != other); } simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { return iter.is_open(); } simdjson_inline array_iterator &array_iterator::operator++() noexcept { error_code error; // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. if (( error = iter.error() )) { return *this; } if (( error = iter.skip_child() )) { return *this; } if (( error = iter.has_next_element().error() )) { return *this; } return *this; } } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( icelake::ondemand::array_iterator &&value ) noexcept : icelake::implementation_simdjson_result_base(std::forward(value)) { first.iter.assert_is_valid(); } simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : icelake::implementation_simdjson_result_base({}, error) { } simdjson_inline simdjson_result simdjson_result::operator*() noexcept { if (error()) { return error(); } return *first; } simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return !error(); } return first == other.first; } simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return error(); } return first != other.first; } simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { // Clear the error if there is one, so we don't yield it twice if (error()) { second = SUCCESS; return *this; } ++(first); return *this; } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H /* end file simdjson/generic/ondemand/array_iterator-inl.h for icelake */ /* including simdjson/generic/ondemand/document-inl.h for icelake: #include "simdjson/generic/ondemand/document-inl.h" */ /* begin file simdjson/generic/ondemand/document-inl.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept : iter{std::forward(_iter)} { logger::log_start_value(iter, "document"); } simdjson_inline document document::start(json_iterator &&iter) noexcept { return document(std::forward(iter)); } inline void document::rewind() noexcept { iter.rewind(); } inline std::string document::to_debug_string() noexcept { return iter.to_string(); } inline simdjson_result document::current_location() const noexcept { return iter.current_location(); } inline int32_t document::current_depth() const noexcept { return iter.depth(); } inline bool document::at_end() const noexcept { return iter.at_end(); } inline bool document::is_alive() noexcept { return iter.is_alive(); } simdjson_inline value_iterator document::resume_value_iterator() noexcept { return value_iterator(&iter, 1, iter.root_position()); } simdjson_inline value_iterator document::get_root_value_iterator() noexcept { return resume_value_iterator(); } simdjson_inline simdjson_result document::start_or_resume_object() noexcept { if (iter.at_root()) { return get_object(); } else { return object::resume(resume_value_iterator()); } } simdjson_inline simdjson_result document::get_value() noexcept { // Make sure we start any arrays or objects before returning, so that start_root_() // gets called. // It is the convention throughout the code that the macro `SIMDJSON_DEVELOPMENT_CHECKS` determines whether // we check for OUT_OF_ORDER_ITERATION. Proper on::demand code should never trigger this error. #if SIMDJSON_DEVELOPMENT_CHECKS if (!iter.at_root()) { return OUT_OF_ORDER_ITERATION; } #endif // assert_at_root() serves two purposes: in Debug mode, whether or not // SIMDJSON_DEVELOPMENT_CHECKS is set or not, it checks that we are at the root of // the document (this will typically be redundant). In release mode, it generates // SIMDJSON_ASSUME statements to allow the compiler to make assumptions. iter.assert_at_root(); switch (*iter.peek()) { case '[': { // The following lines check that the document ends with ]. auto value_iterator = get_root_value_iterator(); auto error = value_iterator.check_root_array(); if(error) { return error; } return value(get_root_value_iterator()); } case '{': { // The following lines would check that the document ends with }. auto value_iterator = get_root_value_iterator(); auto error = value_iterator.check_root_object(); if(error) { return error; } return value(get_root_value_iterator()); } default: // Unfortunately, scalar documents are a special case in simdjson and they cannot // be safely converted to value instances. return SCALAR_DOCUMENT_AS_VALUE; } } simdjson_inline simdjson_result document::get_array() & noexcept { auto value = get_root_value_iterator(); return array::start_root(value); } simdjson_inline simdjson_result document::get_object() & noexcept { auto value = get_root_value_iterator(); return object::start_root(value); } /** * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should * give an error, so we check for trailing content. We want to disallow trailing * content. * Thus, in several implementations below, we pass a 'true' parameter value to * a get_root_value_iterator() method: this indicates that we disallow trailing content. */ simdjson_inline simdjson_result document::get_uint64() noexcept { return get_root_value_iterator().get_root_uint64(true); } simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { return get_root_value_iterator().get_root_uint64_in_string(true); } simdjson_inline simdjson_result document::get_int64() noexcept { return get_root_value_iterator().get_root_int64(true); } simdjson_inline simdjson_result document::get_int64_in_string() noexcept { return get_root_value_iterator().get_root_int64_in_string(true); } simdjson_inline simdjson_result document::get_double() noexcept { return get_root_value_iterator().get_root_double(true); } simdjson_inline simdjson_result document::get_double_in_string() noexcept { return get_root_value_iterator().get_root_double_in_string(true); } simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { return get_root_value_iterator().get_root_string(true, allow_replacement); } template simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); } simdjson_inline simdjson_result document::get_wobbly_string() noexcept { return get_root_value_iterator().get_root_wobbly_string(true); } simdjson_inline simdjson_result document::get_raw_json_string() noexcept { return get_root_value_iterator().get_root_raw_json_string(true); } simdjson_inline simdjson_result document::get_bool() noexcept { return get_root_value_iterator().get_root_bool(true); } simdjson_inline simdjson_result document::is_null() noexcept { return get_root_value_iterator().is_root_null(true); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } template simdjson_inline error_code document::get(T &out) & noexcept { return get().get(out); } template simdjson_inline error_code document::get(T &out) && noexcept { return std::forward(*this).get().get(out); } #if SIMDJSON_EXCEPTIONS simdjson_inline document::operator array() & noexcept(false) { return get_array(); } simdjson_inline document::operator object() & noexcept(false) { return get_object(); } simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document::operator double() noexcept(false) { return get_double(); } simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document::operator value() noexcept(false) { return get_value(); } #endif simdjson_inline simdjson_result document::count_elements() & noexcept { auto a = get_array(); simdjson_result answer = a.count_elements(); /* If there was an array, we are now left pointing at its first element. */ if(answer.error() == SUCCESS) { rewind(); } return answer; } simdjson_inline simdjson_result document::count_fields() & noexcept { auto a = get_object(); simdjson_result answer = a.count_fields(); /* If there was an object, we are now left pointing at its first element. */ if(answer.error() == SUCCESS) { rewind(); } return answer; } simdjson_inline simdjson_result document::at(size_t index) & noexcept { auto a = get_array(); return a.at(index); } simdjson_inline simdjson_result document::begin() & noexcept { return get_array().begin(); } simdjson_inline simdjson_result document::end() & noexcept { return {}; } simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { return start_or_resume_object()[key]; } simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { return start_or_resume_object()[key]; } simdjson_inline error_code document::consume() noexcept { auto error = iter.skip_child(0); if(error) { iter.abandon(); } return error; } simdjson_inline simdjson_result document::raw_json() noexcept { auto _iter = get_root_value_iterator(); const uint8_t * starting_point{_iter.peek_start()}; auto error = consume(); if(error) { return error; } // After 'consume()', we could be left pointing just beyond the document, but that // is ok because we are not going to dereference the final pointer position, we just // use it to compute the length in bytes. const uint8_t * final_point{iter.unsafe_pointer()}; return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } simdjson_inline simdjson_result document::type() noexcept { return get_root_value_iterator().type(); } simdjson_inline simdjson_result document::is_scalar() noexcept { json_type this_type; auto error = type().get(this_type); if(error) { return error; } return ! ((this_type == json_type::array) || (this_type == json_type::object)); } simdjson_inline bool document::is_negative() noexcept { return get_root_value_iterator().is_root_negative(); } simdjson_inline simdjson_result document::is_integer() noexcept { return get_root_value_iterator().is_root_integer(true); } simdjson_inline simdjson_result document::get_number_type() noexcept { return get_root_value_iterator().get_root_number_type(true); } simdjson_inline simdjson_result document::get_number() noexcept { return get_root_value_iterator().get_root_number(true); } simdjson_inline simdjson_result document::raw_json_token() noexcept { auto _iter = get_root_value_iterator(); return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); } simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { rewind(); // Rewind the document each time at_pointer is called if (json_pointer.empty()) { return this->get_value(); } json_type t; SIMDJSON_TRY(type().get(t)); switch (t) { case json_type::array: return (*this).get_array().at_pointer(json_pointer); case json_type::object: return (*this).get_object().at_pointer(json_pointer); default: return INVALID_JSON_POINTER; } } } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( icelake::ondemand::document &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base( error ) { } simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } template simdjson_inline simdjson_result simdjson_result::get() & noexcept { if (error()) { return error(); } return first.get(); } template simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first).get(); } template simdjson_inline error_code simdjson_result::get(T &out) & noexcept { if (error()) { return error(); } return first.get(out); } template simdjson_inline error_code simdjson_result::get(T &out) && noexcept { if (error()) { return error(); } return std::forward(first).get(out); } template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first); } template<> simdjson_inline error_code simdjson_result::get(icelake::ondemand::document &out) & noexcept = delete; template<> simdjson_inline error_code simdjson_result::get(icelake::ondemand::document &out) && noexcept { if (error()) { return error(); } out = std::forward(first); return SUCCESS; } simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } simdjson_inline bool simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS simdjson_inline simdjson_result::operator icelake::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator icelake::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator icelake::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator icelake::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } simdjson_inline bool simdjson_result::at_end() const noexcept { if (error()) { return error(); } return first.at_end(); } simdjson_inline int32_t simdjson_result::current_depth() const noexcept { if (error()) { return error(); } return first.current_depth(); } simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } } // namespace simdjson namespace simdjson { namespace icelake { namespace ondemand { simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } /** * The document_reference instances are used primarily/solely for streams of JSON * documents. * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should * give an error, so we check for trailing content. * * However, for streams of JSON documents, we want to be able to start from * "321" "321" "321" * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() * successfully each time. * * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: * this indicates that we allow trailing content. */ simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } template simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } #if SIMDJSON_EXCEPTIONS simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } #endif simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} simdjson_inline document_reference::operator document&() const noexcept { return *doc; } } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::document_reference value, error_code error) noexcept : implementation_simdjson_result_base(std::forward(value), error) {} simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS simdjson_inline simdjson_result::operator icelake::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator icelake::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator icelake::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator icelake::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H /* end file simdjson/generic/ondemand/document-inl.h for icelake */ /* including simdjson/generic/ondemand/document_stream-inl.h for icelake: #include "simdjson/generic/ondemand/document_stream-inl.h" */ /* begin file simdjson/generic/ondemand/document_stream-inl.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #include namespace simdjson { namespace icelake { namespace ondemand { #ifdef SIMDJSON_THREADS_ENABLED inline void stage1_worker::finish() { // After calling "run" someone would call finish() to wait // for the end of the processing. // This function will wait until either the thread has done // the processing or, else, the destructor has been called. std::unique_lock lock(locking_mutex); cond_var.wait(lock, [this]{return has_work == false;}); } inline stage1_worker::~stage1_worker() { // The thread may never outlive the stage1_worker instance // and will always be stopped/joined before the stage1_worker // instance is gone. stop_thread(); } inline void stage1_worker::start_thread() { std::unique_lock lock(locking_mutex); if(thread.joinable()) { return; // This should never happen but we never want to create more than one thread. } thread = std::thread([this]{ while(true) { std::unique_lock thread_lock(locking_mutex); // We wait for either "run" or "stop_thread" to be called. cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); // If, for some reason, the stop_thread() method was called (i.e., the // destructor of stage1_worker is called, then we want to immediately destroy // the thread (and not do any more processing). if(!can_work) { break; } this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, this->_next_batch_start); this->has_work = false; // The condition variable call should be moved after thread_lock.unlock() for performance // reasons but thread sanitizers may report it as a data race if we do. // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock cond_var.notify_one(); // will notify "finish" thread_lock.unlock(); } } ); } inline void stage1_worker::stop_thread() { std::unique_lock lock(locking_mutex); // We have to make sure that all locks can be released. can_work = false; has_work = false; cond_var.notify_all(); lock.unlock(); if(thread.joinable()) { thread.join(); } } inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { std::unique_lock lock(locking_mutex); owner = ds; _next_batch_start = next_batch_start; stage1_thread_parser = stage1; has_work = true; // The condition variable call should be moved after thread_lock.unlock() for performance // reasons but thread sanitizers may report it as a data race if we do. // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock cond_var.notify_one(); // will notify the thread lock that we have work lock.unlock(); } #endif // SIMDJSON_THREADS_ENABLED simdjson_inline document_stream::document_stream( ondemand::parser &_parser, const uint8_t *_buf, size_t _len, size_t _batch_size, bool _allow_comma_separated ) noexcept : parser{&_parser}, buf{_buf}, len{_len}, batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, allow_comma_separated{_allow_comma_separated}, error{SUCCESS} #ifdef SIMDJSON_THREADS_ENABLED , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change #endif { #ifdef SIMDJSON_THREADS_ENABLED if(worker.get() == nullptr) { error = MEMALLOC; } #endif } simdjson_inline document_stream::document_stream() noexcept : parser{nullptr}, buf{nullptr}, len{0}, batch_size{0}, allow_comma_separated{false}, error{UNINITIALIZED} #ifdef SIMDJSON_THREADS_ENABLED , use_thread(false) #endif { } simdjson_inline document_stream::~document_stream() noexcept { #ifdef SIMDJSON_THREADS_ENABLED worker.reset(); #endif } inline size_t document_stream::size_in_bytes() const noexcept { return len; } inline size_t document_stream::truncated_bytes() const noexcept { if(error == CAPACITY) { return len - batch_start; } return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; } simdjson_inline document_stream::iterator::iterator() noexcept : stream{nullptr}, finished{true} { } simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept : stream{_stream}, finished{is_end} { } simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { //if(stream->error) { return stream->error; } return simdjson_result(stream->doc, stream->error); } simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { // If there is an error, then we want the iterator // to be finished, no matter what. (E.g., we do not // keep generating documents with errors, or go beyond // a document with errors.) // // Users do not have to call "operator*()" when they use operator++, // so we need to end the stream in the operator++ function. // // Note that setting finished = true is essential otherwise // we would enter an infinite loop. if (stream->error) { finished = true; } // Note that stream->error() is guarded against error conditions // (it will immediately return if stream->error casts to false). // In effect, this next function does nothing when (stream->error) // is true (hence the risk of an infinite loop). stream->next(); // If that was the last document, we're finished. // It is the only type of error we do not want to appear // in operator*. if (stream->error == EMPTY) { finished = true; } // If we had any other kind of error (not EMPTY) then we want // to pass it along to the operator* and we cannot mark the result // as "finished" just yet. return *this; } simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { return finished != other.finished; } simdjson_inline document_stream::iterator document_stream::begin() noexcept { start(); // If there are no documents, we're finished. return iterator(this, error == EMPTY); } simdjson_inline document_stream::iterator document_stream::end() noexcept { return iterator(this, true); } inline void document_stream::start() noexcept { if (error) { return; } error = parser->allocate(batch_size); if (error) { return; } // Always run the first stage 1 parse immediately batch_start = 0; error = run_stage1(*parser, batch_start); while(error == EMPTY) { // In exceptional cases, we may start with an empty block batch_start = next_batch_start(); if (batch_start >= len) { return; } error = run_stage1(*parser, batch_start); } if (error) { return; } doc_index = batch_start; doc = document(json_iterator(&buf[batch_start], parser)); doc.iter._streaming = true; #ifdef SIMDJSON_THREADS_ENABLED if (use_thread && next_batch_start() < len) { // Kick off the first thread on next batch if needed error = stage1_thread_parser.allocate(batch_size); if (error) { return; } worker->start_thread(); start_stage1_thread(); if (error) { return; } } #endif // SIMDJSON_THREADS_ENABLED } inline void document_stream::next() noexcept { // We always enter at once once in an error condition. if (error) { return; } next_document(); if (error) { return; } auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; // Check if at end of structural indexes (i.e. at end of batch) if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { error = EMPTY; // Load another batch (if available) while (error == EMPTY) { batch_start = next_batch_start(); if (batch_start >= len) { break; } #ifdef SIMDJSON_THREADS_ENABLED if(use_thread) { load_from_stage1_thread(); } else { error = run_stage1(*parser, batch_start); } #else error = run_stage1(*parser, batch_start); #endif /** * Whenever we move to another window, we need to update all pointers to make * it appear as if the input buffer started at the beginning of the window. * * Take this input: * * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] * * Say you process the following window... * * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' * * When you do so, the json_iterator has a pointer at the beginning of the memory region * (pointing at the beginning of '{"z"...'. * * When you move to the window that starts at... * * '[7, 10, 9] [15, 11, 12, 13] ... * * then it is not sufficient to just run stage 1. You also need to re-anchor the * json_iterator so that it believes we are starting at '[7, 10, 9]...'. * * Under the DOM front-end, this gets done automatically because the parser owns * the pointer the data, and when you call stage1 and then stage2 on the same * parser, then stage2 will run on the pointer acquired by stage1. * * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that * we used. But json_iterator has no callback when stage1 is called on the parser. * In fact, I think that the parser is unaware of json_iterator. * * * So we need to re-anchor the json_iterator after each call to stage 1 so that * all of the pointers are in sync. */ doc.iter = json_iterator(&buf[batch_start], parser); doc.iter._streaming = true; /** * End of resync. */ if (error) { continue; } // If the error was EMPTY, we may want to load another batch. doc_index = batch_start; } } } inline void document_stream::next_document() noexcept { // Go to next place where depth=0 (document depth) error = doc.iter.skip_child(0); if (error) { return; } // Always set depth=1 at the start of document doc.iter._depth = 1; // consume comma if comma separated is allowed if (allow_comma_separated) { doc.iter.consume_character(','); } // Resets the string buffer at the beginning, thus invalidating the strings. doc.iter._string_buf_loc = parser->string_buf.get(); doc.iter._root = doc.iter.position(); } inline size_t document_stream::next_batch_start() const noexcept { return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; } inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { // This code only updates the structural index in the parser, it does not update any json_iterator // instance. size_t remaining = len - _batch_start; if (remaining <= batch_size) { return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); } else { return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); } } simdjson_inline size_t document_stream::iterator::current_index() const noexcept { return stream->doc_index; } simdjson_inline std::string_view document_stream::iterator::source() const noexcept { auto depth = stream->doc.iter.depth(); auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); // If at root, process the first token to determine if scalar value if (stream->doc.iter.at_root()) { switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { case '{': case '[': // Depth=1 already at start of document break; case '}': case ']': depth--; break; default: // Scalar value document // TODO: Remove any trailing whitespaces // This returns a string spanning from start of value to the beginning of the next document (excluded) return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1); } cur_struct_index++; } while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { case '{': case '[': depth++; break; case '}': case ']': depth--; break; } if (depth == 0) { break; } cur_struct_index++; } return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; } inline error_code document_stream::iterator::error() const noexcept { return stream->error; } #ifdef SIMDJSON_THREADS_ENABLED inline void document_stream::load_from_stage1_thread() noexcept { worker->finish(); // Swap to the parser that was loaded up in the thread. Make sure the parser has // enough memory to swap to, as well. std::swap(stage1_thread_parser,*parser); error = stage1_thread_error; if (error) { return; } // If there's anything left, start the stage 1 thread! if (next_batch_start() < len) { start_stage1_thread(); } } inline void document_stream::start_stage1_thread() noexcept { // we call the thread on a lambda that will update // this->stage1_thread_error // there is only one thread that may write to this value // TODO this is NOT exception-safe. this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error size_t _next_batch_start = this->next_batch_start(); worker->run(this, & this->stage1_thread_parser, _next_batch_start); } #endif // SIMDJSON_THREADS_ENABLED } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result::simdjson_result( icelake::ondemand::document_stream &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } } #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H /* end file simdjson/generic/ondemand/document_stream-inl.h for icelake */ /* including simdjson/generic/ondemand/field-inl.h for icelake: #include "simdjson/generic/ondemand/field-inl.h" */ /* begin file simdjson/generic/ondemand/field-inl.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { // clang 6 doesn't think the default constructor can be noexcept, so we make it explicit simdjson_inline field::field() noexcept : std::pair() {} simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept : std::pair(key, std::forward(value)) { } simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { raw_json_string key; SIMDJSON_TRY( parent_iter.field_key().get(key) ); SIMDJSON_TRY( parent_iter.field_value() ); return field::start(parent_iter, key); } simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { return field(key, parent_iter.child()); } simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); first.consume(); return answer; } simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; } simdjson_inline value &field::value() & noexcept { return second; } simdjson_inline value field::value() && noexcept { return std::forward(*this).second; } } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( icelake::ondemand::field &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result simdjson_result::key() noexcept { if (error()) { return error(); } return first.key(); } simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { if (error()) { return error(); } return first.unescaped_key(allow_replacement); } simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H /* end file simdjson/generic/ondemand/field-inl.h for icelake */ /* including simdjson/generic/ondemand/json_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/json_iterator-inl.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept : token(std::forward(other.token)), parser{other.parser}, _string_buf_loc{other._string_buf_loc}, error{other.error}, _depth{other._depth}, _root{other._root}, _streaming{other._streaming} { other.parser = nullptr; } simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { token = other.token; parser = other.parser; _string_buf_loc = other._string_buf_loc; error = other.error; _depth = other._depth; _root = other._root; _streaming = other._streaming; other.parser = nullptr; return *this; } simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept : token(buf, &_parser->implementation->structural_indexes[0]), parser{_parser}, _string_buf_loc{parser->string_buf.get()}, _depth{1}, _root{parser->implementation->structural_indexes.get()}, _streaming{false} { logger::log_headers(); #if SIMDJSON_CHECK_EOF assert_more_tokens(); #endif } inline void json_iterator::rewind() noexcept { token.set_position( root_position() ); logger::log_headers(); // We start again _string_buf_loc = parser->string_buf.get(); _depth = 1; } inline bool json_iterator::balanced() const noexcept { token_iterator ti(token); int32_t count{0}; ti.set_position( root_position() ); while(ti.peek() <= peek_last()) { switch (*ti.return_current_and_advance()) { case '[': case '{': count++; break; case ']': case '}': count--; break; default: break; } } return count == 0; } // GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller // relating depth and parent_depth, which is a desired effect. The warning does not show up if the // skip_child() function is not marked inline). SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { if (depth() <= parent_depth) { return SUCCESS; } switch (*return_current_and_advance()) { // TODO consider whether matching braces is a requirement: if non-matching braces indicates // *missing* braces, then future lookups are not in the object/arrays they think they are, // violating the rule "validate enough structure that the user can be confident they are // looking at the right values." // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth // For the first open array/object in a value, we've already incremented depth, so keep it the same // We never stop at colon, but if we did, it wouldn't affect depth case '[': case '{': case ':': logger::log_start_value(*this, "skip"); break; // If there is a comma, we have just finished a value in an array/object, and need to get back in case ',': logger::log_value(*this, "skip"); break; // ] or } means we just finished a value and need to jump out of the array/object case ']': case '}': logger::log_end_value(*this, "skip"); _depth--; if (depth() <= parent_depth) { return SUCCESS; } #if SIMDJSON_CHECK_EOF // If there are no more tokens, the parent is incomplete. if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } #endif // SIMDJSON_CHECK_EOF break; case '"': if(*peek() == ':') { // We are at a key!!! // This might happen if you just started an object and you skip it immediately. // Performance note: it would be nice to get rid of this check as it is somewhat // expensive. // https://github.com/simdjson/simdjson/issues/1742 logger::log_value(*this, "key"); return_current_and_advance(); // eat up the ':' break; // important!!! } simdjson_fallthrough; // Anything else must be a scalar value default: // For the first scalar, we will have incremented depth already, so we decrement it here. logger::log_value(*this, "skip"); _depth--; if (depth() <= parent_depth) { return SUCCESS; } break; } // Now that we've considered the first value, we only increment/decrement for arrays/objects while (position() < end_position()) { switch (*return_current_and_advance()) { case '[': case '{': logger::log_start_value(*this, "skip"); _depth++; break; // TODO consider whether matching braces is a requirement: if non-matching braces indicates // *missing* braces, then future lookups are not in the object/arrays they think they are, // violating the rule "validate enough structure that the user can be confident they are // looking at the right values." // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth case ']': case '}': logger::log_end_value(*this, "skip"); _depth--; if (depth() <= parent_depth) { return SUCCESS; } break; default: logger::log_value(*this, "skip", ""); break; } } return report_error(TAPE_ERROR, "not enough close braces"); } SIMDJSON_POP_DISABLE_WARNINGS simdjson_inline bool json_iterator::at_root() const noexcept { return position() == root_position(); } simdjson_inline bool json_iterator::is_single_token() const noexcept { return parser->implementation->n_structural_indexes == 1; } simdjson_inline bool json_iterator::streaming() const noexcept { return _streaming; } simdjson_inline token_position json_iterator::root_position() const noexcept { return _root; } simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { SIMDJSON_ASSUME( _depth == 1 ); } simdjson_inline void json_iterator::assert_at_root() const noexcept { SIMDJSON_ASSUME( _depth == 1 ); #ifndef SIMDJSON_CLANG_VISUAL_STUDIO // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument // has side effects that will be discarded. SIMDJSON_ASSUME( token.position() == _root ); #endif } simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { assert_valid_position(token._position + required_tokens - 1); } simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { #ifndef SIMDJSON_CLANG_VISUAL_STUDIO SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); #endif } simdjson_inline bool json_iterator::at_end() const noexcept { return position() == end_position(); } simdjson_inline token_position json_iterator::end_position() const noexcept { uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; return &parser->implementation->structural_indexes[n_structural_indexes]; } inline std::string json_iterator::to_string() const noexcept { if( !is_alive() ) { return "dead json_iterator instance"; } const char * current_structural = reinterpret_cast(token.peek()); return std::string("json_iterator [ depth : ") + std::to_string(_depth) + std::string(", structural : '") + std::string(current_structural,1) + std::string("', offset : ") + std::to_string(token.current_offset()) + std::string("', error : ") + error_message(error) + std::string(" ]"); } inline simdjson_result json_iterator::current_location() const noexcept { if (!is_alive()) { // Unrecoverable error if (!at_root()) { return reinterpret_cast(token.peek(-1)); } else { return reinterpret_cast(token.peek()); } } if (at_end()) { return OUT_OF_BOUNDS; } return reinterpret_cast(token.peek()); } simdjson_inline bool json_iterator::is_alive() const noexcept { return parser; } simdjson_inline void json_iterator::abandon() noexcept { parser = nullptr; _depth = 0; } simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(); #endif // SIMDJSON_CHECK_EOF return token.return_current_and_advance(); } simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { // deliberately done without safety guard: return token.peek(); } simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(delta+1); #endif // SIMDJSON_CHECK_EOF return token.peek(delta); } simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(delta+1); #endif // #if SIMDJSON_CHECK_EOF return token.peek_length(delta); } simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { // todo: currently we require end-of-string buffering, but the following // assert_valid_position should be turned on if/when we lift that condition. // assert_valid_position(position); // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF // is ON by default, we have no choice but to disable it for real with a comment. return token.peek(position); } simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { #if SIMDJSON_CHECK_EOF assert_valid_position(position); #endif // SIMDJSON_CHECK_EOF return token.peek_length(position); } simdjson_inline token_position json_iterator::last_position() const noexcept { // The following line fails under some compilers... // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); // since it has side-effects. uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; SIMDJSON_ASSUME(n_structural_indexes > 0); return &parser->implementation->structural_indexes[n_structural_indexes - 1]; } simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { return token.peek(last_position()); } simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); SIMDJSON_ASSUME(_depth == parent_depth + 1); _depth = parent_depth; } simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); SIMDJSON_ASSUME(_depth == child_depth - 1); _depth = child_depth; } simdjson_inline depth_t json_iterator::depth() const noexcept { return _depth; } simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { return _string_buf_loc; } simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); logger::log_error(*this, message); error = _error; return error; } simdjson_inline token_position json_iterator::position() const noexcept { return token.position(); } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { return parser->unescape(in, _string_buf_loc, allow_replacement); } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { return parser->unescape_wobbly(in, _string_buf_loc); } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); SIMDJSON_ASSUME(_depth == child_depth - 1); #if SIMDJSON_DEVELOPMENT_CHECKS #ifndef SIMDJSON_CLANG_VISUAL_STUDIO SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); #endif #endif token.set_position(position); _depth = child_depth; } simdjson_inline error_code json_iterator::consume_character(char c) noexcept { if (*peek() == c) { return_current_and_advance(); return SUCCESS; } return TAPE_ERROR; } #if SIMDJSON_DEVELOPMENT_CHECKS simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; } simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } } #endif simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); logger::log_error(*this, message); return _error; } simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { // This function is not expected to be called in performance-sensitive settings. // Let us guard against silly cases: if((N < max_len) || (N == 0)) { return false; } // Copy to the buffer. std::memcpy(tmpbuf, json, max_len); if(N > max_len) { // We pad whatever remains with ' '. std::memset(tmpbuf + max_len, ' ', N - max_len); } return true; } } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::json_iterator &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H /* end file simdjson/generic/ondemand/json_iterator-inl.h for icelake */ /* including simdjson/generic/ondemand/json_type-inl.h for icelake: #include "simdjson/generic/ondemand/json_type-inl.h" */ /* begin file simdjson/generic/ondemand/json_type-inl.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { switch (type) { case json_type::array: out << "array"; break; case json_type::object: out << "object"; break; case json_type::number: out << "number"; break; case json_type::string: out << "string"; break; case json_type::boolean: out << "boolean"; break; case json_type::null: out << "null"; break; default: SIMDJSON_UNREACHABLE(); } return out; } #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { return out << type.value(); } #endif simdjson_inline number_type number::get_number_type() const noexcept { return type; } simdjson_inline bool number::is_uint64() const noexcept { return get_number_type() == number_type::unsigned_integer; } simdjson_inline uint64_t number::get_uint64() const noexcept { return payload.unsigned_integer; } simdjson_inline number::operator uint64_t() const noexcept { return get_uint64(); } simdjson_inline bool number::is_int64() const noexcept { return get_number_type() == number_type::signed_integer; } simdjson_inline int64_t number::get_int64() const noexcept { return payload.signed_integer; } simdjson_inline number::operator int64_t() const noexcept { return get_int64(); } simdjson_inline bool number::is_double() const noexcept { return get_number_type() == number_type::floating_point_number; } simdjson_inline double number::get_double() const noexcept { return payload.floating_point_number; } simdjson_inline number::operator double() const noexcept { return get_double(); } simdjson_inline double number::as_double() const noexcept { if(is_double()) { return payload.floating_point_number; } if(is_int64()) { return double(payload.signed_integer); } return double(payload.unsigned_integer); } simdjson_inline void number::append_s64(int64_t value) noexcept { payload.signed_integer = value; type = number_type::signed_integer; } simdjson_inline void number::append_u64(uint64_t value) noexcept { payload.unsigned_integer = value; type = number_type::unsigned_integer; } simdjson_inline void number::append_double(double value) noexcept { payload.floating_point_number = value; type = number_type::floating_point_number; } simdjson_inline void number::skip_double() noexcept { type = number_type::floating_point_number; } } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::json_type &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H /* end file simdjson/generic/ondemand/json_type-inl.h for icelake */ /* including simdjson/generic/ondemand/logger-inl.h for icelake: #include "simdjson/generic/ondemand/logger-inl.h" */ /* begin file simdjson/generic/ondemand/logger-inl.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #include namespace simdjson { namespace icelake { namespace ondemand { namespace logger { static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; static constexpr const int LOG_EVENT_LEN = 20; static constexpr const int LOG_BUFFER_LEN = 30; static constexpr const int LOG_SMALL_BUFFER_LEN = 10; static int log_depth = 0; // Not threadsafe. Log only. // Helper to turn unprintable or newline characters into spaces static inline char printable_char(char c) { if (c >= 0x20) { return c; } else { return ' '; } } template static inline std::string string_format(const std::string& format, const Args&... args) { SIMDJSON_PUSH_DISABLE_ALL_WARNINGS int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; auto size = static_cast(size_s); if (size <= 0) return std::string(); std::unique_ptr buf(new char[size]); std::snprintf(buf.get(), size, format.c_str(), args...); SIMDJSON_POP_DISABLE_WARNINGS return std::string(buf.get(), buf.get() + size - 1); } static inline log_level get_log_level_from_env() { SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe char *lvl = getenv("SIMDJSON_LOG_LEVEL"); SIMDJSON_POP_DISABLE_WARNINGS if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } return log_level::info; } static inline log_level log_threshold() { static log_level threshold = get_log_level_from_env(); return threshold; } static inline bool should_log(log_level level) { return level >= log_threshold(); } inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { log_line(iter, index, depth, "", type, detail, log_level::info); } inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { log_line(iter, index, depth, "+", type, detail, log_level::info); if (LOG_ENABLED) { log_depth++; } } inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { log_line(iter, "+", type, "", delta, depth_delta, log_level::info); if (LOG_ENABLED) { log_depth++; } } inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { if (LOG_ENABLED) { log_depth--; } log_line(iter, "-", type, "", delta, depth_delta, log_level::info); } inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); } inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); } inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_event(iter.json_iter(), type, detail, delta, depth_delta); } inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_value(iter.json_iter(), type, detail, delta, depth_delta); } inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { log_start_value(iter.json_iter(), type, delta, depth_delta); } inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { log_end_value(iter.json_iter(), type, delta, depth_delta); } inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { log_error(iter.json_iter(), error, detail, delta, depth_delta); } inline void log_headers() noexcept { if (LOG_ENABLED) { if (simdjson_unlikely(should_log(log_level::info))) { // Technically a static variable is not thread-safe, but if you are using threads and logging... well... static bool displayed_hint{false}; log_depth = 0; printf("\n"); if (!displayed_hint) { // We only print this helpful header once. printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); printf("# +array says 'this is where we were when we discovered the start array'\n"); printf( "# -array says 'this is where we were when we ended the array'\n"); printf("# skip says 'this is a structural or value I am skipping'\n"); printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); printf("#\n"); printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); printf("# in addition to the depth being displayed.\n"); printf("#\n"); printf("# Every token in the document has a single depth determined by the tokens before it,\n"); printf("# and is not affected by what the token actually is.\n"); printf("#\n"); printf("# Not all structural elements are presented as tokens in the logs.\n"); printf("#\n"); printf("# We never give control to the user within an empty array or an empty object.\n"); printf("#\n"); printf("# Inside an array, having a depth greater than the array's depth means that\n"); printf("# we are pointing inside a value.\n"); printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); displayed_hint = true; } printf("\n"); printf("| %-*s ", LOG_EVENT_LEN, "Event"); printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); // printf("| %-*s ", 5, "Next#"); printf("| %-*s ", 5, "Depth"); printf("| Detail "); printf("|\n"); printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); // printf("|%.*s", 5+2, DASHES); printf("|%.*s", 5 + 2, DASHES); printf("|--------"); printf("|\n"); fflush(stdout); } } } template inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); } template inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { if (LOG_ENABLED) { if (simdjson_unlikely(should_log(level))) { const int indent = depth * 2; const auto buf = iter.token.buf; auto msg = string_format(title, std::forward(args)...); printf("| %*s%s%-*s ", indent, "", title_prefix, LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); { // Print the current structural. printf("| "); // Before we begin, the index might point right before the document. // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 if (index < iter._root) { printf("%*s", LOG_BUFFER_LEN, ""); } else { auto current_structural = &buf[*index]; for (int i = 0; i < LOG_BUFFER_LEN; i++) { printf("%c", printable_char(current_structural[i])); } } printf(" "); } { // Print the next structural. printf("| "); auto next_structural = &buf[*(index + 1)]; for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { printf("%c", printable_char(next_structural[i])); } printf(" "); } // printf("| %5u ", *(index+1)); printf("| %5i ", depth); printf("| %6.*s ", int(detail.size()), detail.data()); printf("|\n"); fflush(stdout); } } } } // namespace logger } // namespace ondemand } // namespace icelake } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* end file simdjson/generic/ondemand/logger-inl.h for icelake */ /* including simdjson/generic/ondemand/object-inl.h for icelake: #include "simdjson/generic/ondemand/object-inl.h" */ /* begin file simdjson/generic/ondemand/object-inl.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { return find_field_unordered(key); } simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { return std::forward(*this).find_field_unordered(key); } simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { SIMDJSON_TRY( iter.start_object().error() ); return object(iter); } simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { SIMDJSON_TRY( iter.start_root_object().error() ); return object(iter); } simdjson_inline error_code object::consume() noexcept { if(iter.is_at_key()) { /** * whenever you are pointing at a key, calling skip_child() is * unsafe because you will hit a string and you will assume that * it is string value, and this mistake will lead you to make bad * depth computation. */ /** * We want to 'consume' the key. We could really * just do _json_iter->return_current_and_advance(); at this * point, but, for clarity, we will use the high-level API to * eat the key. We assume that the compiler optimizes away * most of the work. */ simdjson_unused raw_json_string actual_key; auto error = iter.field_key().get(actual_key); if (error) { iter.abandon(); return error; }; // Let us move to the value while we are at it. if ((error = iter.field_value())) { iter.abandon(); return error; } } auto error_skip = iter.json_iter().skip_child(iter.depth()-1); if(error_skip) { iter.abandon(); } return error_skip; } simdjson_inline simdjson_result object::raw_json() noexcept { const uint8_t * starting_point{iter.peek_start()}; auto error = consume(); if(error) { return error; } const uint8_t * final_point{iter._json_iter->peek()}; return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { SIMDJSON_TRY( iter.started_object().error() ); return object(iter); } simdjson_inline object object::resume(const value_iterator &iter) noexcept { return iter; } simdjson_inline object::object(const value_iterator &_iter) noexcept : iter{_iter} { } simdjson_inline simdjson_result object::begin() noexcept { #if SIMDJSON_DEVELOPMENT_CHECKS if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif return object_iterator(iter); } simdjson_inline simdjson_result object::end() noexcept { return object_iterator(iter); } inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } json_pointer = json_pointer.substr(1); size_t slash = json_pointer.find('/'); std::string_view key = json_pointer.substr(0, slash); // Grab the child with the given key simdjson_result child; // If there is an escape character in the key, unescape it and then get the child. size_t escape = key.find('~'); if (escape != std::string_view::npos) { // Unescape the key std::string unescaped(key); do { switch (unescaped[escape+1]) { case '0': unescaped.replace(escape, 2, "~"); break; case '1': unescaped.replace(escape, 2, "/"); break; default: return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); } escape = unescaped.find('~', escape+1); } while (escape != std::string::npos); child = find_field(unescaped); // Take note find_field does not unescape keys when matching } else { child = find_field(key); } if(child.error()) { return child; // we do not continue if there was an error } // If there is a /, we have to recurse and look up more of the path if (slash != std::string_view::npos) { child = child.at_pointer(json_pointer.substr(slash)); } return child; } simdjson_inline simdjson_result object::count_fields() & noexcept { size_t count{0}; // Important: we do not consume any of the values. for(simdjson_unused auto v : *this) { count++; } // The above loop will always succeed, but we want to report errors. if(iter.error()) { return iter.error(); } // We need to move back at the start because we expect users to iterate through // the object after counting the number of elements. iter.reset_object(); return count; } simdjson_inline simdjson_result object::is_empty() & noexcept { bool is_not_empty; auto error = iter.reset_object().get(is_not_empty); if(error) { return error; } return !is_not_empty; } simdjson_inline simdjson_result object::reset() & noexcept { return iter.reset_object(); } } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::object &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} simdjson_inline simdjson_result simdjson_result::begin() noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() noexcept { if (error()) { return error(); } return first.end(); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first).find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first)[key]; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first).find_field(key); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } inline simdjson_result simdjson_result::reset() noexcept { if (error()) { return error(); } return first.reset(); } inline simdjson_result simdjson_result::is_empty() noexcept { if (error()) { return error(); } return first.is_empty(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* end file simdjson/generic/ondemand/object-inl.h for icelake */ /* including simdjson/generic/ondemand/object_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/object_iterator-inl.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { // // object_iterator // simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept : iter{_iter} {} simdjson_inline simdjson_result object_iterator::operator*() noexcept { error_code error = iter.error(); if (error) { iter.abandon(); return error; } auto result = field::start(iter); // TODO this is a safety rail ... users should exit loops as soon as they receive an error. // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. if (result.error()) { iter.abandon(); } return result; } simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { return !(*this != other); } simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { return iter.is_open(); } SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_inline object_iterator &object_iterator::operator++() noexcept { // TODO this is a safety rail ... users should exit loops as soon as they receive an error. // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error simdjson_unused error_code error; if ((error = iter.skip_child() )) { return *this; } simdjson_unused bool has_value; if ((error = iter.has_next_field().get(has_value) )) { return *this; }; return *this; } SIMDJSON_POP_DISABLE_WARNINGS // // ### Live States // // While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is // always SUCCESS: // // - Start: This is the state when the object is first found and the iterator is just past the {. // In this state, at_start == true. // - Next: After we hand a scalar value to the user, or an array/object which they then fully // iterate over, the iterator is at the , or } before the next value. In this state, // depth == iter.depth, at_start == false, and error == SUCCESS. // - Unfinished Business: When we hand an array/object to the user which they do not fully // iterate over, we need to finish that iteration by skipping child values until we reach the // Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. // // ## Error States // // In error states, we will yield exactly one more value before stopping. iter.depth == depth // and at_start is always false. We decrement after yielding the error, moving to the Finished // state. // // - Chained Error: When the object iterator is part of an error chain--for example, in // `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an // object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and // iter.depth == depth, and at_start == false. We decrement depth when we yield the error. // - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, // we flag that as an error and treat it exactly the same as a Chained Error. In this state, // error == TAPE_ERROR, iter.depth == depth, and at_start == false. // // Errors that occur while reading a field to give to the user (such as when the key is not a // string or the field is missing a colon) are yielded immediately. Depth is then decremented, // moving to the Finished state without transitioning through an Error state at all. // // ## Terminal State // // The terminal state has iter.depth < depth. at_start is always false. // // - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. // In this state, iter.depth < depth, at_start == false, and error == SUCCESS. // } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( icelake::ondemand::object_iterator &&value ) noexcept : implementation_simdjson_result_base(std::forward(value)) { first.iter.assert_is_valid(); } simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base({}, error) { } simdjson_inline simdjson_result simdjson_result::operator*() noexcept { if (error()) { return error(); } return *first; } // If we're iterating and there is an error, return the error once. simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return !error(); } return first == other.first; } // If we're iterating and there is an error, return the error once. simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return error(); } return first != other.first; } // Checks for ']' and ',' simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { // Clear the error if there is one, so we don't yield it twice if (error()) { second = SUCCESS; return *this; } ++first; return *this; } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H /* end file simdjson/generic/ondemand/object_iterator-inl.h for icelake */ /* including simdjson/generic/ondemand/parser-inl.h for icelake: #include "simdjson/generic/ondemand/parser-inl.h" */ /* begin file simdjson/generic/ondemand/parser-inl.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ /* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { simdjson_inline parser::parser(size_t max_capacity) noexcept : _max_capacity{max_capacity} { } simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { if (new_capacity > max_capacity()) { return CAPACITY; } if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } // string_capacity copied from document::allocate _capacity = 0; size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); #if SIMDJSON_DEVELOPMENT_CHECKS start_positions.reset(new (std::nothrow) token_position[new_max_depth]); #endif if (implementation) { SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); } else { SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); } _capacity = new_capacity; _max_depth = new_max_depth; return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); // Allocate if needed if (capacity() < json.length() || !string_buf) { SIMDJSON_TRY( allocate(json.length(), max_depth()) ); } // Run stage 1. SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); return document::start({ reinterpret_cast(json.data()), this }); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { return iterate(padded_string_view(json, allocated)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { if(json.capacity() - json.size() < SIMDJSON_PADDING) { json.reserve(json.size() + SIMDJSON_PADDING); } return iterate(padded_string_view(json)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { return iterate(padded_string_view(json)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception SIMDJSON_TRY( result.error() ); padded_string_view json = result.value_unsafe(); return iterate(json); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception SIMDJSON_TRY( result.error() ); const padded_string &json = result.value_unsafe(); return iterate(json); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); // Allocate if needed if (capacity() < json.length()) { SIMDJSON_TRY( allocate(json.length(), max_depth()) ); } // Run stage 1. SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); return json_iterator(reinterpret_cast(json.data()), this); } inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { buf += 3; len -= 3; } if(allow_comma_separated && batch_size < len) { batch_size = len; } return document_stream(*this, buf, len, batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } simdjson_inline size_t parser::max_depth() const noexcept { return _max_depth; } simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { _max_capacity = max_capacity; } else { _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; } } simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); if (!end) { return STRING_ERROR; } std::string_view result(reinterpret_cast(dst), end-dst); dst = end; return result; } simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); if (!end) { return STRING_ERROR; } std::string_view result(reinterpret_cast(dst), end-dst); dst = end; return result; } } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::parser &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* end file simdjson/generic/ondemand/parser-inl.h for icelake */ /* including simdjson/generic/ondemand/raw_json_string-inl.h for icelake: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ /* begin file simdjson/generic/ondemand/raw_json_string-inl.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; // if the content has no escape character, just scan through it quickly! for(;pos < target.size() && target[pos] != '\\';pos++) {} // slow path may begin. bool escaping{false}; for(;pos < target.size();pos++) { if((target[pos] == '"') && !escaping) { return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } return true; } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { size_t pos{0}; // if the content has no escape character, just scan through it quickly! for(;target[pos] && target[pos] != '\\';pos++) {} // slow path may begin. bool escaping{false}; for(;target[pos];pos++) { if((target[pos] == '"') && !escaping) { return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } return true; } simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { // If we are going to call memcmp, then we must know something about the length of the raw_json_string. return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { // Assumptions: does not contain unescaped quote characters, and // the raw content is quote terminated within a valid JSON string. if(target.size() <= SIMDJSON_PADDING) { return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } const char * r{raw()}; size_t pos{0}; for(;pos < target.size();pos++) { if(r[pos] != target[pos]) { return false; } } if(r[pos] != '"') { return false; } return true; } simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { const char * r{raw()}; size_t pos{0}; bool escaping{false}; for(;pos < target.size();pos++) { if(r[pos] != target[pos]) { return false; } // if target is a compile-time constant and it is free from // quotes, then the next part could get optimized away through // inlining. if((target[pos] == '"') && !escaping) { // We have reached the end of the raw_json_string but // the target is not done. return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } if(r[pos] != '"') { return false; } return true; } simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and // the raw content is quote terminated within a valid JSON string. const char * r{raw()}; size_t pos{0}; for(;target[pos];pos++) { if(r[pos] != target[pos]) { return false; } } if(r[pos] != '"') { return false; } return true; } simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { // Assumptions: does not contain unescaped quote characters, and // the raw content is quote terminated within a valid JSON string. const char * r{raw()}; size_t pos{0}; bool escaping{false}; for(;target[pos];pos++) { if(r[pos] != target[pos]) { return false; } // if target is a compile-time constant and it is free from // quotes, then the next part could get optimized away through // inlining. if((target[pos] == '"') && !escaping) { // We have reached the end of the raw_json_string but // the target is not done. return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } if(r[pos] != '"') { return false; } return true; } simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { return a.unsafe_is_equal(c); } simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { return a == c; } simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { return !(a == c); } simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { return !(a == c); } simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { return iter.unescape(*this, allow_replacement); } simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { return iter.unescape_wobbly(*this); } simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { bool in_escape = false; const char *s = str.raw(); while (true) { switch (*s) { case '\\': in_escape = !in_escape; break; case '"': if (in_escape) { in_escape = false; } else { return out; } break; default: if (in_escape) { in_escape = false; } } out << *s; s++; } } } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::raw_json_string &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} simdjson_inline simdjson_result simdjson_result::raw() const noexcept { if (error()) { return error(); } return first.raw(); } simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(icelake::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { if (error()) { return error(); } return first.unescape(iter, allow_replacement); } simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(icelake::ondemand::json_iterator &iter) const noexcept { if (error()) { return error(); } return first.unescape_wobbly(iter); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* end file simdjson/generic/ondemand/raw_json_string-inl.h for icelake */ /* including simdjson/generic/ondemand/serialization-inl.h for icelake: #include "simdjson/generic/ondemand/serialization-inl.h" */ /* begin file simdjson/generic/ondemand/serialization-inl.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { inline std::string_view trim(const std::string_view str) noexcept { // We can almost surely do better by rolling our own find_first_not_of function. size_t first = str.find_first_not_of(" \t\n\r"); // If we have the empty string (just white space), then no trimming is possible, and // we return the empty string_view. if (std::string_view::npos == first) { return std::string_view(); } size_t last = str.find_last_not_of(" \t\n\r"); return str.substr(first, (last - first + 1)); } inline simdjson_result to_json_string(icelake::ondemand::document& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(icelake::ondemand::document_reference& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(icelake::ondemand::value& x) noexcept { /** * If we somehow receive a value that has already been consumed, * then the following code could be in trouble. E.g., we create * an array as needed, but if an array was already created, then * it could be bad. */ using namespace icelake::ondemand; icelake::ondemand::json_type t; auto error = x.type().get(t); if(error != SUCCESS) { return error; } switch (t) { case json_type::array: { icelake::ondemand::array array; error = x.get_array().get(array); if(error) { return error; } return to_json_string(array); } case json_type::object: { icelake::ondemand::object object; error = x.get_object().get(object); if(error) { return error; } return to_json_string(object); } default: return trim(x.raw_json_token()); } } inline simdjson_result to_json_string(icelake::ondemand::object& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(icelake::ondemand::array& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } } // namespace simdjson namespace simdjson { namespace icelake { namespace ondemand { #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::value x) { std::string_view v; auto error = simdjson::to_json_string(x).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::value x) { std::string_view v; auto error = simdjson::to_json_string(x).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::array value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::array value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document_reference& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::object value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::object value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif }}} // namespace simdjson::icelake::ondemand #endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* end file simdjson/generic/ondemand/serialization-inl.h for icelake */ /* including simdjson/generic/ondemand/token_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/token_iterator-inl.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { simdjson_inline token_iterator::token_iterator( const uint8_t *_buf, token_position position ) noexcept : buf{_buf}, _position{position} { } simdjson_inline uint32_t token_iterator::current_offset() const noexcept { return *(_position); } simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { return &buf[*(_position++)]; } simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { return &buf[*position]; } simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { return *position; } simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { return *(position+1) - *position; } simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { return &buf[*(_position+delta)]; } simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { return *(_position+delta); } simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { return *(_position+delta+1) - *(_position+delta); } simdjson_inline token_position token_iterator::position() const noexcept { return _position; } simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { _position = target_position; } simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { return _position == other._position; } simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { return _position != other._position; } simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { return _position > other._position; } simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { return _position >= other._position; } simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { return _position < other._position; } simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { return _position <= other._position; } } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::token_iterator &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H /* end file simdjson/generic/ondemand/token_iterator-inl.h for icelake */ /* including simdjson/generic/ondemand/value-inl.h for icelake: #include "simdjson/generic/ondemand/value-inl.h" */ /* begin file simdjson/generic/ondemand/value-inl.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { simdjson_inline value::value(const value_iterator &_iter) noexcept : iter{_iter} { } simdjson_inline value value::start(const value_iterator &iter) noexcept { return iter; } simdjson_inline value value::resume(const value_iterator &iter) noexcept { return iter; } simdjson_inline simdjson_result value::get_array() noexcept { return array::start(iter); } simdjson_inline simdjson_result value::get_object() noexcept { return object::start(iter); } simdjson_inline simdjson_result value::start_or_resume_object() noexcept { if (iter.at_start()) { return get_object(); } else { return object::resume(iter); } } simdjson_inline simdjson_result value::get_raw_json_string() noexcept { return iter.get_raw_json_string(); } simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { return iter.get_string(allow_replacement); } template simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { return iter.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result value::get_wobbly_string() noexcept { return iter.get_wobbly_string(); } simdjson_inline simdjson_result value::get_double() noexcept { return iter.get_double(); } simdjson_inline simdjson_result value::get_double_in_string() noexcept { return iter.get_double_in_string(); } simdjson_inline simdjson_result value::get_uint64() noexcept { return iter.get_uint64(); } simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { return iter.get_uint64_in_string(); } simdjson_inline simdjson_result value::get_int64() noexcept { return iter.get_int64(); } simdjson_inline simdjson_result value::get_int64_in_string() noexcept { return iter.get_int64_in_string(); } simdjson_inline simdjson_result value::get_bool() noexcept { return iter.get_bool(); } simdjson_inline simdjson_result value::is_null() noexcept { return iter.is_null(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } template simdjson_inline error_code value::get(T &out) noexcept { return get().get(out); } #if SIMDJSON_EXCEPTIONS simdjson_inline value::operator array() noexcept(false) { return get_array(); } simdjson_inline value::operator object() noexcept(false) { return get_object(); } simdjson_inline value::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline value::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline value::operator double() noexcept(false) { return get_double(); } simdjson_inline value::operator std::string_view() noexcept(false) { return get_string(false); } simdjson_inline value::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } simdjson_inline value::operator bool() noexcept(false) { return get_bool(); } #endif simdjson_inline simdjson_result value::begin() & noexcept { return get_array().begin(); } simdjson_inline simdjson_result value::end() & noexcept { return {}; } simdjson_inline simdjson_result value::count_elements() & noexcept { simdjson_result answer; auto a = get_array(); answer = a.count_elements(); // count_elements leaves you pointing inside the array, at the first element. // We need to move back so that the user can create a new array (which requires that // we point at '['). iter.move_at_start(); return answer; } simdjson_inline simdjson_result value::count_fields() & noexcept { simdjson_result answer; auto a = get_object(); answer = a.count_fields(); iter.move_at_start(); return answer; } simdjson_inline simdjson_result value::at(size_t index) noexcept { auto a = get_array(); return a.at(index); } simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result value::find_field(const char *key) noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { return start_or_resume_object()[key]; } simdjson_inline simdjson_result value::operator[](const char *key) noexcept { return start_or_resume_object()[key]; } simdjson_inline simdjson_result value::type() noexcept { return iter.type(); } simdjson_inline simdjson_result value::is_scalar() noexcept { json_type this_type; auto error = type().get(this_type); if(error) { return error; } return ! ((this_type == json_type::array) || (this_type == json_type::object)); } simdjson_inline bool value::is_negative() noexcept { return iter.is_negative(); } simdjson_inline simdjson_result value::is_integer() noexcept { return iter.is_integer(); } simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { return iter.get_number_type(); } simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { return iter.get_number(); } simdjson_inline std::string_view value::raw_json_token() noexcept { return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); } simdjson_inline simdjson_result value::raw_json() noexcept { json_type t; SIMDJSON_TRY(type().get(t)); switch (t) { case json_type::array: { ondemand::array array; SIMDJSON_TRY(get_array().get(array)); return array.raw_json(); } case json_type::object: { ondemand::object object; SIMDJSON_TRY(get_object().get(object)); return object.raw_json(); } default: return raw_json_token(); } } simdjson_inline simdjson_result value::current_location() noexcept { return iter.json_iter().current_location(); } simdjson_inline int32_t value::current_depth() const noexcept{ return iter.json_iter().depth(); } simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { json_type t; SIMDJSON_TRY(type().get(t)); switch (t) { case json_type::array: return (*this).get_array().at_pointer(json_pointer); case json_type::object: return (*this).get_object().at_pointer(json_pointer); default: return INVALID_JSON_POINTER; } } } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( icelake::ondemand::value &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() & noexcept { if (error()) { return error(); } return {}; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::get_array() noexcept { if (error()) { return error(); } return first.get_array(); } simdjson_inline simdjson_result simdjson_result::get_object() noexcept { if (error()) { return error(); } return first.get_object(); } simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } template simdjson_inline simdjson_result simdjson_result::get() noexcept { if (error()) { return error(); } return first.get(); } template simdjson_inline error_code simdjson_result::get(T &out) noexcept { if (error()) { return error(); } return first.get(out); } template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { if (error()) { return error(); } return std::move(first); } template<> simdjson_inline error_code simdjson_result::get(icelake::ondemand::value &out) noexcept { if (error()) { return error(); } out = first; return SUCCESS; } simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS simdjson_inline simdjson_result::operator icelake::ondemand::array() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator icelake::ondemand::object() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator icelake::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); } simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { if (error()) { return error(); } return first.current_depth(); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H /* end file simdjson/generic/ondemand/value-inl.h for icelake */ /* including simdjson/generic/ondemand/value_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/value_iterator-inl.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { simdjson_inline value_iterator::value_iterator( json_iterator *json_iter, depth_t depth, token_position start_position ) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} { } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { SIMDJSON_TRY( start_container('{', "Not an object", "object") ); return started_object(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { SIMDJSON_TRY( start_container('{', "Not an object", "object") ); return started_root_object(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { assert_at_container_start(); #if SIMDJSON_DEVELOPMENT_CHECKS _json_iter->set_start_position(_depth, start_position()); #endif if (*_json_iter->peek() == '}') { logger::log_value(*_json_iter, "empty object"); _json_iter->return_current_and_advance(); end_container(); return false; } return true; } simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { // When in streaming mode, we cannot expect peek_last() to be the last structural element of the // current document. It only works in the normal mode where we have indexed a single document. // Note that adding a check for 'streaming' is not expensive since we only have at most // one root element. if ( ! _json_iter->streaming() ) { // The following lines do not fully protect against garbage content within the // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should // call `at_end()` on the document instance at the end of the processing to // ensure that the processing has finished at the end. // if (*_json_iter->peek_last() != '}') { _json_iter->abandon(); return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); } // If the last character is } *and* the first gibberish character is also '}' // then on-demand could accidentally go over. So we need additional checks. // https://github.com/simdjson/simdjson/issues/1834 // Checking that the document is balanced requires a full scan which is potentially // expensive, but it only happens in edge cases where the first padding character is // a closing bracket. if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { _json_iter->abandon(); // The exact error would require more work. It will typically be an unclosed object. return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); } } return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { auto error = check_root_object(); if(error) { return error; } return started_object(); } simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { #if SIMDJSON_CHECK_EOF if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } #endif // SIMDJSON_CHECK_EOF _json_iter->ascend_to(depth()-1); return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { assert_at_next(); // It's illegal to call this unless there are more tokens: anything that ends in } or ] is // obligated to verify there are more tokens if they are not the top level. switch (*_json_iter->return_current_and_advance()) { case '}': logger::log_end_value(*_json_iter, "object"); SIMDJSON_TRY( end_container() ); return false; case ',': return true; default: return report_error(TAPE_ERROR, "Missing comma between object fields"); } } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { error_code error; bool has_value; // // Initially, the object can be in one of a few different places: // // 1. The start of the object, at the first field: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2, index 1) // ``` if (at_first_field()) { has_value = true; // // 2. When a previous search did not yield a value or the object is empty: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 0) // { } // ^ (depth 0, index 2) // ``` // } else if (!is_open()) { #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif return false; // 3. When a previous search found a field or an iterator yielded a value: // // ``` // // When a field was not fully consumed (or not even touched at all) // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2) // // When a field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // // When the last field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // ``` // } else { if ((error = skip_child() )) { abandon(); return error; } if ((error = has_next_field().get(has_value) )) { abandon(); return error; } #if SIMDJSON_DEVELOPMENT_CHECKS if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif } while (has_value) { // Get the key and colon, stopping at the value. raw_json_string actual_key; // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. // field_key() advances the pointer and checks that '"' is found (corresponding to a key). // The depth is left unchanged by field_key(). if ((error = field_key().get(actual_key) )) { abandon(); return error; }; // field_value() will advance and check that we find a ':' separating the // key and the value. It will also increment the depth by one. if ((error = field_value() )) { abandon(); return error; } // If it matches, stop and return // We could do it this way if we wanted to allow arbitrary // key content (including escaped quotes). //if (actual_key.unsafe_is_equal(max_key_length, key)) { // Instead we do the following which may trigger buffer overruns if the // user provides an adversarial key (containing a well placed unescaped quote // character and being longer than the number of bytes remaining in the JSON // input). if (actual_key.unsafe_is_equal(key)) { logger::log_event(*this, "match", key, -2); // If we return here, then we return while pointing at the ':' that we just checked. return true; } // No match: skip the value and see if , or } is next logger::log_event(*this, "no match", key, -2); // The call to skip_child is meant to skip over the value corresponding to the key. // After skip_child(), we are right before the next comma (',') or the final brace ('}'). SIMDJSON_TRY( skip_child() ); // Skip the value entirely // The has_next_field() advances the pointer and check that either ',' or '}' is found. // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, // then we are in error and we abort. if ((error = has_next_field().get(has_value) )) { abandon(); return error; } } // If the loop ended, we're out of fields to look at. return false; } SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { /** * When find_field_unordered_raw is called, we can either be pointing at the * first key, pointing outside (at the closing brace) or if a key was matched * we can be either pointing right afterthe ':' right before the value (that we need skip), * or we may have consumed the value and we might be at a comma or at the * final brace (ready for a call to has_next_field()). */ error_code error; bool has_value; // First, we scan from that point to the end. // If we don't find a match, we may loop back around, and scan from the beginning to that point. token_position search_start = _json_iter->position(); // We want to know whether we need to go back to the beginning. bool at_first = at_first_field(); /////////////// // Initially, the object can be in one of a few different places: // // 1. At the first key: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2, index 1) // ``` // if (at_first) { has_value = true; // 2. When a previous search did not yield a value or the object is empty: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 0) // { } // ^ (depth 0, index 2) // ``` // } else if (!is_open()) { #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif SIMDJSON_TRY(reset_object().get(has_value)); at_first = true; // 3. When a previous search found a field or an iterator yielded a value: // // ``` // // When a field was not fully consumed (or not even touched at all) // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2) // // When a field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // // When the last field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // ``` // } else { // If someone queried a key but they not did access the value, then we are left pointing // at the ':' and we need to move forward through the value... If the value was // processed then skip_child() does not move the iterator (but may adjust the depth). if ((error = skip_child() )) { abandon(); return error; } search_start = _json_iter->position(); if ((error = has_next_field().get(has_value) )) { abandon(); return error; } #if SIMDJSON_DEVELOPMENT_CHECKS if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif } // After initial processing, we will be in one of two states: // // ``` // // At the beginning of a field // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // // At the end of the object // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 0) // ``` // // Next, we find a match starting from the current position. while (has_value) { SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field // Get the key and colon, stopping at the value. raw_json_string actual_key; // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. // field_key() advances the pointer and checks that '"' is found (corresponding to a key). // The depth is left unchanged by field_key(). if ((error = field_key().get(actual_key) )) { abandon(); return error; }; // field_value() will advance and check that we find a ':' separating the // key and the value. It will also increment the depth by one. if ((error = field_value() )) { abandon(); return error; } // If it matches, stop and return // We could do it this way if we wanted to allow arbitrary // key content (including escaped quotes). // if (actual_key.unsafe_is_equal(max_key_length, key)) { // Instead we do the following which may trigger buffer overruns if the // user provides an adversarial key (containing a well placed unescaped quote // character and being longer than the number of bytes remaining in the JSON // input). if (actual_key.unsafe_is_equal(key)) { logger::log_event(*this, "match", key, -2); // If we return here, then we return while pointing at the ':' that we just checked. return true; } // No match: skip the value and see if , or } is next logger::log_event(*this, "no match", key, -2); // The call to skip_child is meant to skip over the value corresponding to the key. // After skip_child(), we are right before the next comma (',') or the final brace ('}'). SIMDJSON_TRY( skip_child() ); // The has_next_field() advances the pointer and check that either ',' or '}' is found. // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, // then we are in error and we abort. if ((error = has_next_field().get(has_value) )) { abandon(); return error; } } // Performance note: it maybe wasteful to rewind to the beginning when there might be // no other query following. Indeed, it would require reskipping the whole object. // Instead, you can just stay where you are. If there is a new query, there is always time // to rewind. if(at_first) { return false; } // If we reach the end without finding a match, search the rest of the fields starting at the // beginning of the object. // (We have already run through the object before, so we've already validated its structure. We // don't check errors in this bit.) SIMDJSON_TRY(reset_object().get(has_value)); while (true) { SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field // Get the key and colon, stopping at the value. raw_json_string actual_key; // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. // field_key() advances the pointer and checks that '"' is found (corresponding to a key). // The depth is left unchanged by field_key(). error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); // field_value() will advance and check that we find a ':' separating the // key and the value. It will also increment the depth by one. error = field_value(); SIMDJSON_ASSUME(!error); // If it matches, stop and return // We could do it this way if we wanted to allow arbitrary // key content (including escaped quotes). // if (actual_key.unsafe_is_equal(max_key_length, key)) { // Instead we do the following which may trigger buffer overruns if the // user provides an adversarial key (containing a well placed unescaped quote // character and being longer than the number of bytes remaining in the JSON // input). if (actual_key.unsafe_is_equal(key)) { logger::log_event(*this, "match", key, -2); // If we return here, then we return while pointing at the ':' that we just checked. return true; } // No match: skip the value and see if , or } is next logger::log_event(*this, "no match", key, -2); // The call to skip_child is meant to skip over the value corresponding to the key. // After skip_child(), we are right before the next comma (',') or the final brace ('}'). SIMDJSON_TRY( skip_child() ); // If we reached the end of the key-value pair we started from, then we know // that the key is not there so we return false. We are either right before // the next comma or the final brace. if(_json_iter->position() == search_start) { return false; } // The has_next_field() advances the pointer and check that either ',' or '}' is found. // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, // then we are in error and we abort. error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); // If we make the mistake of exiting here, then we could be left pointing at a key // in the middle of an object. That's not an allowable state. } // If the loop ended, we're out of fields to look at. The program should // never reach this point. return false; } SIMDJSON_POP_DISABLE_WARNINGS simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { assert_at_next(); const uint8_t *key = _json_iter->return_current_and_advance(); if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } return raw_json_string(key); } simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { assert_at_next(); if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } _json_iter->descend_to(depth()+1); return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { SIMDJSON_TRY( start_container('[', "Not an array", "array") ); return started_array(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { SIMDJSON_TRY( start_container('[', "Not an array", "array") ); return started_root_array(); } inline std::string value_iterator::to_string() const noexcept { auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); if(_json_iter != nullptr) { answer += _json_iter->to_string(); } answer += std::string(" ]"); return answer; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { assert_at_container_start(); if (*_json_iter->peek() == ']') { logger::log_value(*_json_iter, "empty array"); _json_iter->return_current_and_advance(); SIMDJSON_TRY( end_container() ); return false; } _json_iter->descend_to(depth()+1); #if SIMDJSON_DEVELOPMENT_CHECKS _json_iter->set_start_position(_depth, start_position()); #endif return true; } simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { // When in streaming mode, we cannot expect peek_last() to be the last structural element of the // current document. It only works in the normal mode where we have indexed a single document. // Note that adding a check for 'streaming' is not expensive since we only have at most // one root element. if ( ! _json_iter->streaming() ) { // The following lines do not fully protect against garbage content within the // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should // also call `at_end()` on the document instance at the end of the processing to // ensure that the processing has finished at the end. // if (*_json_iter->peek_last() != ']') { _json_iter->abandon(); return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); } // If the last character is ] *and* the first gibberish character is also ']' // then on-demand could accidentally go over. So we need additional checks. // https://github.com/simdjson/simdjson/issues/1834 // Checking that the document is balanced requires a full scan which is potentially // expensive, but it only happens in edge cases where the first padding character is // a closing bracket. if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { _json_iter->abandon(); // The exact error would require more work. It will typically be an unclosed array. return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); } } return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { auto error = check_root_array(); if (error) { return error; } return started_array(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { assert_at_next(); logger::log_event(*this, "has_next_element"); switch (*_json_iter->return_current_and_advance()) { case ']': logger::log_end_value(*_json_iter, "array"); SIMDJSON_TRY( end_container() ); return false; case ',': _json_iter->descend_to(depth()+1); return true; default: return report_error(TAPE_ERROR, "Missing comma between array elements"); } } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { auto not_true = atomparsing::str4ncmp(json, "true"); auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); if (error) { return incorrect_type_error("Not a boolean"); } return simdjson_result(!not_true); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); // if we start with 'n', we must be a null if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } return is_null_string; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { return get_raw_json_string().unescape(json_iter(), allow_replacement); } template simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { std::string_view content; auto err = get_string(allow_replacement).get(content); if (err) { return err; } receiver = content; return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { return get_raw_json_string().unescape_wobbly(json_iter()); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { auto json = peek_scalar("string"); if (*json != '"') { return incorrect_type_error("Not a string"); } advance_scalar("string"); return raw_json_string(json+1); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { auto result = numberparsing::parse_double(peek_non_root_scalar("double")); if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { auto result = parse_bool(peek_non_root_scalar("bool")); if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } return result; } simdjson_inline simdjson_result value_iterator::is_null() noexcept { bool is_null_value; SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); if(is_null_value) { advance_non_root_scalar("null"); } return is_null_value; } simdjson_inline bool value_iterator::is_negative() noexcept { return numberparsing::is_negative(peek_non_root_scalar("numbersign")); } simdjson_inline bool value_iterator::is_root_negative() noexcept { return numberparsing::is_negative(peek_root_scalar("numbersign")); } simdjson_inline simdjson_result value_iterator::is_integer() noexcept { return numberparsing::is_integer(peek_non_root_scalar("integer")); } simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { return numberparsing::get_number_type(peek_non_root_scalar("integer")); } simdjson_inline simdjson_result value_iterator::get_number() noexcept { number num; error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); if(error) { return error; } return num; } simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("is_root_integer"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { return false; // if there are more than 20 characters, it cannot be represented as an integer. } auto answer = numberparsing::is_integer(tmpbuf); // If the parsing was a success, we must still check that it is // a single scalar. Note that we parse first because of cases like '[]' where // getting TRAILING_CONTENT is wrong. if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } return answer; } simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } auto answer = numberparsing::get_number_type(tmpbuf); if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } return answer; } simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } number num; error_code error = numberparsing::parse_number(tmpbuf, num); if(error) { return error; } if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("number"); return num; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); } template simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { std::string_view content; auto err = get_root_string(check_trailing, allow_replacement).get(content); if (err) { return err; } receiver = content; return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { auto json = peek_scalar("string"); if (*json != '"') { return incorrect_type_error("Not a string"); } if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_scalar("string"); return raw_json_string(json+1); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_unsigned(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_unsigned_in_string(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_integer(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_integer_in_string(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_double(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_double_in_string(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("bool"); uint8_t tmpbuf[5+1+1]; // +1 for null termination tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } auto result = parse_bool(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("bool"); } return result; } simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("null"); bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); if(result) { // we have something that looks like a null. if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("null"); } return result; } simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); return _json_iter->skip_child(depth()); } simdjson_inline value_iterator value_iterator::child() const noexcept { assert_at_child(); return { _json_iter, depth()+1, _json_iter->token.position() }; } // GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller // relating depth and iterator depth, which is a desired effect. It does not happen if is_open is // marked non-inline. SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_inline bool value_iterator::is_open() const noexcept { return _json_iter->depth() >= depth(); } SIMDJSON_POP_DISABLE_WARNINGS simdjson_inline bool value_iterator::at_end() const noexcept { return _json_iter->at_end(); } simdjson_inline bool value_iterator::at_start() const noexcept { return _json_iter->token.position() == start_position(); } simdjson_inline bool value_iterator::at_first_field() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); return _json_iter->token.position() == start_position() + 1; } simdjson_inline void value_iterator::abandon() noexcept { _json_iter->abandon(); } simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { return _depth; } simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { return _json_iter->error; } simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { return _json_iter->string_buf_loc(); } simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { return *_json_iter; } simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { return *_json_iter; } simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { return _json_iter->peek(start_position()); } simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { return _json_iter->peek_length(start_position()); } simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. if (!is_at_start()) { return peek_start(); } // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. assert_at_start(); return _json_iter->peek(); } simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. if (!is_at_start()) { return; } // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. assert_at_start(); _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); } simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { logger::log_start_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. const uint8_t *json; if (!is_at_start()) { #if SIMDJSON_DEVELOPMENT_CHECKS if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif json = peek_start(); if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } } else { assert_at_start(); /** * We should be prudent. Let us peek. If it is not the right type, we * return an error. Only once we have determined that we have the right * type are we allowed to advance! */ json = _json_iter->peek(); if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } _json_iter->return_current_and_advance(); } return SUCCESS; } simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return peek_start(); } assert_at_root(); return _json_iter->peek(); } simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return peek_start(); } assert_at_non_root_start(); return _json_iter->peek(); } simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return; } assert_at_root(); _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); } simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return; } assert_at_non_root_start(); _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); } simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { logger::log_error(*_json_iter, start_position(), depth(), message); return INCORRECT_TYPE; } simdjson_inline bool value_iterator::is_at_start() const noexcept { return position() == start_position(); } simdjson_inline bool value_iterator::is_at_key() const noexcept { // Keys are at the same depth as the object. // Note here that we could be safer and check that we are within an object, // but we do not. return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; } simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). auto delta = position() - start_position(); return delta == 1 || delta == 2; } inline void value_iterator::assert_at_start() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth ); SIMDJSON_ASSUME( _depth > 0 ); } inline void value_iterator::assert_at_container_start() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); SIMDJSON_ASSUME( _json_iter->_depth == _depth ); SIMDJSON_ASSUME( _depth > 0 ); } inline void value_iterator::assert_at_next() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth ); SIMDJSON_ASSUME( _depth > 0 ); } simdjson_inline void value_iterator::move_at_start() noexcept { _json_iter->_depth = _depth; _json_iter->token.set_position(_start_position); } simdjson_inline void value_iterator::move_at_container_start() noexcept { _json_iter->_depth = _depth; _json_iter->token.set_position(_start_position + 1); } simdjson_inline simdjson_result value_iterator::reset_array() noexcept { if(error()) { return error(); } move_at_container_start(); return started_array(); } simdjson_inline simdjson_result value_iterator::reset_object() noexcept { if(error()) { return error(); } move_at_container_start(); return started_object(); } inline void value_iterator::assert_at_child() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); SIMDJSON_ASSUME( _depth > 0 ); } inline void value_iterator::assert_at_root() const noexcept { assert_at_start(); SIMDJSON_ASSUME( _depth == 1 ); } inline void value_iterator::assert_at_non_root_start() const noexcept { assert_at_start(); SIMDJSON_ASSUME( _depth > 1 ); } inline void value_iterator::assert_is_valid() const noexcept { SIMDJSON_ASSUME( _json_iter != nullptr ); } simdjson_inline bool value_iterator::is_valid() const noexcept { return _json_iter != nullptr; } simdjson_inline simdjson_result value_iterator::type() const noexcept { switch (*peek_start()) { case '{': return json_type::object; case '[': return json_type::array; case '"': return json_type::string; case 'n': return json_type::null; case 't': case 'f': return json_type::boolean; case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return json_type::number; default: return TAPE_ERROR; } } simdjson_inline token_position value_iterator::start_position() const noexcept { return _start_position; } simdjson_inline token_position value_iterator::position() const noexcept { return _json_iter->position(); } simdjson_inline token_position value_iterator::end_position() const noexcept { return _json_iter->end_position(); } simdjson_inline token_position value_iterator::last_position() const noexcept { return _json_iter->last_position(); } simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { return _json_iter->report_error(error, message); } } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::value_iterator &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* end file simdjson/generic/ondemand/value_iterator-inl.h for icelake */ /* end file simdjson/generic/ondemand/amalgamated.h for icelake */ /* including simdjson/icelake/end.h: #include "simdjson/icelake/end.h" */ /* begin file simdjson/icelake/end.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE SIMDJSON_UNTARGET_REGION #endif /* undefining SIMDJSON_IMPLEMENTATION from "icelake" */ #undef SIMDJSON_IMPLEMENTATION /* end file simdjson/icelake/end.h */ #endif // SIMDJSON_ICELAKE_ONDEMAND_H /* end file simdjson/icelake/ondemand.h */ #elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(ppc64) /* including simdjson/ppc64/ondemand.h: #include "simdjson/ppc64/ondemand.h" */ /* begin file simdjson/ppc64/ondemand.h */ #ifndef SIMDJSON_PPC64_ONDEMAND_H #define SIMDJSON_PPC64_ONDEMAND_H /* including simdjson/ppc64/begin.h: #include "simdjson/ppc64/begin.h" */ /* begin file simdjson/ppc64/begin.h */ /* defining SIMDJSON_IMPLEMENTATION to "ppc64" */ #define SIMDJSON_IMPLEMENTATION ppc64 /* including simdjson/ppc64/base.h: #include "simdjson/ppc64/base.h" */ /* begin file simdjson/ppc64/base.h */ #ifndef SIMDJSON_PPC64_BASE_H #define SIMDJSON_PPC64_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { /** * Implementation for ALTIVEC (PPC64). */ namespace ppc64 { class implementation; namespace { namespace simd { template struct simd8; template struct simd8x64; } // namespace simd } // unnamed namespace } // namespace ppc64 } // namespace simdjson #endif // SIMDJSON_PPC64_BASE_H /* end file simdjson/ppc64/base.h */ /* including simdjson/ppc64/intrinsics.h: #include "simdjson/ppc64/intrinsics.h" */ /* begin file simdjson/ppc64/intrinsics.h */ #ifndef SIMDJSON_PPC64_INTRINSICS_H #define SIMDJSON_PPC64_INTRINSICS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ // This should be the correct header whether // you use visual studio or other compilers. #include // These are defined by altivec.h in GCC toolchain, it is safe to undef them. #ifdef bool #undef bool #endif #ifdef vector #undef vector #endif static_assert(sizeof(__vector unsigned char) <= simdjson::SIMDJSON_PADDING, "insufficient padding for ppc64"); #endif // SIMDJSON_PPC64_INTRINSICS_H /* end file simdjson/ppc64/intrinsics.h */ /* including simdjson/ppc64/bitmanipulation.h: #include "simdjson/ppc64/bitmanipulation.h" */ /* begin file simdjson/ppc64/bitmanipulation.h */ #ifndef SIMDJSON_PPC64_BITMANIPULATION_H #define SIMDJSON_PPC64_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace { // We sometimes call trailing_zero on inputs that are zero, // but the algorithms do not end up using the returned value. // Sadly, sanitizers are not smart enough to figure it out. SIMDJSON_NO_SANITIZE_UNDEFINED // This function can be used safely even if not all bytes have been // initialized. // See issue https://github.com/simdjson/simdjson/issues/1965 SIMDJSON_NO_SANITIZE_MEMORY simdjson_inline int trailing_zeroes(uint64_t input_num) { #if SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long ret; // Search the mask data from least significant bit (LSB) // to the most significant bit (MSB) for a set bit (1). _BitScanForward64(&ret, input_num); return (int)ret; #else // SIMDJSON_REGULAR_VISUAL_STUDIO return __builtin_ctzll(input_num); #endif // SIMDJSON_REGULAR_VISUAL_STUDIO } /* result might be undefined when input_num is zero */ simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return input_num & (input_num - 1); } /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { #if SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long leading_zero = 0; // Search the mask data from most significant bit (MSB) // to least significant bit (LSB) for a set bit (1). if (_BitScanReverse64(&leading_zero, input_num)) return (int)(63 - leading_zero); else return 64; #else return __builtin_clzll(input_num); #endif // SIMDJSON_REGULAR_VISUAL_STUDIO } #if SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_inline int count_ones(uint64_t input_num) { // note: we do not support legacy 32-bit Windows in this kernel return __popcnt64(input_num); // Visual Studio wants two underscores } #else simdjson_inline int count_ones(uint64_t input_num) { return __builtin_popcountll(input_num); } #endif simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { #if SIMDJSON_REGULAR_VISUAL_STUDIO *result = value1 + value2; return *result < value1; #else return __builtin_uaddll_overflow(value1, value2, reinterpret_cast(result)); #endif } } // unnamed namespace } // namespace ppc64 } // namespace simdjson #endif // SIMDJSON_PPC64_BITMANIPULATION_H /* end file simdjson/ppc64/bitmanipulation.h */ /* including simdjson/ppc64/bitmask.h: #include "simdjson/ppc64/bitmask.h" */ /* begin file simdjson/ppc64/bitmask.h */ #ifndef SIMDJSON_PPC64_BITMASK_H #define SIMDJSON_PPC64_BITMASK_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace { // // Perform a "cumulative bitwise xor," flipping bits each time a 1 is // encountered. // // For example, prefix_xor(00100100) == 00011100 // simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { // You can use the version below, however gcc sometimes miscompiles // vec_pmsum_be, it happens somewhere around between 8 and 9th version. // The performance boost was not noticeable, falling back to a usual // implementation. // __vector unsigned long long all_ones = {~0ull, ~0ull}; // __vector unsigned long long mask = {bitmask, 0}; // // Clang and GCC return different values for pmsum for ull so cast it to one. // // Generally it is not specified by ALTIVEC ISA what is returned by // // vec_pmsum_be. // #if defined(__LITTLE_ENDIAN__) // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]); // #else // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]); // #endif bitmask ^= bitmask << 1; bitmask ^= bitmask << 2; bitmask ^= bitmask << 4; bitmask ^= bitmask << 8; bitmask ^= bitmask << 16; bitmask ^= bitmask << 32; return bitmask; } } // unnamed namespace } // namespace ppc64 } // namespace simdjson #endif /* end file simdjson/ppc64/bitmask.h */ /* including simdjson/ppc64/numberparsing_defs.h: #include "simdjson/ppc64/numberparsing_defs.h" */ /* begin file simdjson/ppc64/numberparsing_defs.h */ #ifndef SIMDJSON_PPC64_NUMBERPARSING_DEFS_H #define SIMDJSON_PPC64_NUMBERPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/intrinsics.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #if defined(__linux__) #include #elif defined(__FreeBSD__) #include #endif namespace simdjson { namespace ppc64 { namespace numberparsing { // we don't have appropriate instructions, so let us use a scalar function // credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ /** @private */ static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { uint64_t val; std::memcpy(&val, chars, sizeof(uint64_t)); #ifdef __BIG_ENDIAN__ #if defined(__linux__) val = bswap_64(val); #elif defined(__FreeBSD__) val = bswap64(val); #endif #endif val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); } /** @private */ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { internal::value128 answer; #if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS #ifdef _M_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); answer.low = value1 * value2; #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 #endif // _M_ARM64 #else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); #endif return answer; } } // namespace numberparsing } // namespace ppc64 } // namespace simdjson #define SIMDJSON_SWAR_NUMBER_PARSING 1 #endif // SIMDJSON_PPC64_NUMBERPARSING_DEFS_H /* end file simdjson/ppc64/numberparsing_defs.h */ /* including simdjson/ppc64/simd.h: #include "simdjson/ppc64/simd.h" */ /* begin file simdjson/ppc64/simd.h */ #ifndef SIMDJSON_PPC64_SIMD_H #define SIMDJSON_PPC64_SIMD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include namespace simdjson { namespace ppc64 { namespace { namespace simd { using __m128i = __vector unsigned char; template struct base { __m128i value; // Zero constructor simdjson_inline base() : value{__m128i()} {} // Conversion from SIMD register simdjson_inline base(const __m128i _value) : value(_value) {} // Conversion to SIMD register simdjson_inline operator const __m128i &() const { return this->value; } simdjson_inline operator __m128i &() { return this->value; } // Bit operations simdjson_inline Child operator|(const Child other) const { return vec_or(this->value, (__m128i)other); } simdjson_inline Child operator&(const Child other) const { return vec_and(this->value, (__m128i)other); } simdjson_inline Child operator^(const Child other) const { return vec_xor(this->value, (__m128i)other); } simdjson_inline Child bit_andnot(const Child other) const { return vec_andc(this->value, (__m128i)other); } simdjson_inline Child &operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } simdjson_inline Child &operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } simdjson_inline Child &operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } }; template > struct base8 : base> { typedef uint16_t bitmask_t; typedef uint32_t bitmask2_t; simdjson_inline base8() : base>() {} simdjson_inline base8(const __m128i _value) : base>(_value) {} friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs); } static const int SIZE = sizeof(base>::value); template simdjson_inline simd8 prev(simd8 prev_chunk) const { __m128i chunk = this->value; #ifdef __LITTLE_ENDIAN__ chunk = (__m128i)vec_reve(this->value); prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk); #endif chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N); #ifdef __LITTLE_ENDIAN__ chunk = (__m128i)vec_reve((__m128i)chunk); #endif return chunk; } }; // SIMD byte mask type (returned by things like eq and gt) template <> struct simd8 : base8 { static simdjson_inline simd8 splat(bool _value) { return (__m128i)vec_splats((unsigned char)(-(!!_value))); } simdjson_inline simd8() : base8() {} simdjson_inline simd8(const __m128i _value) : base8(_value) {} // Splat constructor simdjson_inline simd8(bool _value) : base8(splat(_value)) {} simdjson_inline int to_bitmask() const { __vector unsigned long long result; const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value, (__m128i)perm_mask)); #ifdef __LITTLE_ENDIAN__ return static_cast(result[1]); #else return static_cast(result[0]); #endif } simdjson_inline bool any() const { return !vec_all_eq(this->value, (__m128i)vec_splats(0)); } simdjson_inline simd8 operator~() const { return this->value ^ (__m128i)splat(true); } }; template struct base8_numeric : base8 { static simdjson_inline simd8 splat(T value) { (void)value; return (__m128i)vec_splats(value); } static simdjson_inline simd8 zero() { return splat(0); } static simdjson_inline simd8 load(const T values[16]) { return (__m128i)(vec_vsx_ld(0, reinterpret_cast(values))); } // Repeat 16 values as many times as necessary (usually for lookup tables) static simdjson_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15) { return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15); } simdjson_inline base8_numeric() : base8() {} simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} // Store to array simdjson_inline void store(T dst[16]) const { vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst)); } // Override to distinguish from bool version simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } // Addition/subtraction are the same for signed and unsigned simdjson_inline simd8 operator+(const simd8 other) const { return (__m128i)((__m128i)this->value + (__m128i)other); } simdjson_inline simd8 operator-(const simd8 other) const { return (__m128i)((__m128i)this->value - (__m128i)other); } simdjson_inline simd8 &operator+=(const simd8 other) { *this = *this + other; return *static_cast *>(this); } simdjson_inline simd8 &operator-=(const simd8 other) { *this = *this - other; return *static_cast *>(this); } // Perform a lookup assuming the value is between 0 and 16 (undefined behavior // for out of range values) template simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value); } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted // as a bitset). Passing a 0 value for mask would be equivalent to writing out // every byte to output. Only the first 16 - count_ones(mask) bytes of the // result are significant but 16 bytes get written. Design consideration: it // seems like a function with the signature simd8 compress(uint32_t mask) // would be sensible, but the AVX ISA makes this kind of approach difficult. template simdjson_inline void compress(uint16_t mask, L *output) const { using internal::BitsSetTable256mul2; using internal::pshufb_combine_table; using internal::thintable_epi8; // this particular implementation was inspired by work done by @animetosho // we do it in two steps, first 8 bytes and then second 8 bytes uint8_t mask1 = uint8_t(mask); // least significant 8 bits uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits // next line just loads the 64-bit values thintable_epi8[mask1] and // thintable_epi8[mask2] into a 128-bit register, using only // two instructions on most compilers. #ifdef __LITTLE_ENDIAN__ __m128i shufmask = (__m128i)(__vector unsigned long long){ thintable_epi8[mask1], thintable_epi8[mask2]}; #else __m128i shufmask = (__m128i)(__vector unsigned long long){ thintable_epi8[mask2], thintable_epi8[mask1]}; shufmask = (__m128i)vec_reve((__m128i)shufmask); #endif // we increment by 0x08 the second half of the mask shufmask = ((__m128i)shufmask) + ((__m128i)(__vector int){0, 0, 0x08080808, 0x08080808}); // this is the version "nearly pruned" __m128i pruned = vec_perm(this->value, this->value, shufmask); // we still need to put the two halves together. // we compute the popcount of the first half: int pop1 = BitsSetTable256mul2[mask1]; // then load the corresponding mask, what it does is to write // only the first pop1 bytes from the first 8 bytes, and then // it fills in with the bytes from the second 8 bytes + some filling // at the end. __m128i compactmask = vec_vsx_ld(0, reinterpret_cast(pshufb_combine_table + pop1 * 8)); __m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask); vec_vsx_st(answer, 0, reinterpret_cast<__m128i *>(output)); } template simdjson_inline simd8 lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, L replace12, L replace13, L replace14, L replace15) const { return lookup_16(simd8::repeat_16( replace0, replace1, replace2, replace3, replace4, replace5, replace6, replace7, replace8, replace9, replace10, replace11, replace12, replace13, replace14, replace15)); } }; // Signed bytes template <> struct simd8 : base8_numeric { simdjson_inline simd8() : base8_numeric() {} simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor simdjson_inline simd8(const int8_t *values) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15) : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15) { return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15); } // Order-sensitive comparisons simdjson_inline simd8 max_val(const simd8 other) const { return (__m128i)vec_max((__vector signed char)this->value, (__vector signed char)(__m128i)other); } simdjson_inline simd8 min_val(const simd8 other) const { return (__m128i)vec_min((__vector signed char)this->value, (__vector signed char)(__m128i)other); } simdjson_inline simd8 operator>(const simd8 other) const { return (__m128i)vec_cmpgt((__vector signed char)this->value, (__vector signed char)(__m128i)other); } simdjson_inline simd8 operator<(const simd8 other) const { return (__m128i)vec_cmplt((__vector signed char)this->value, (__vector signed char)(__m128i)other); } }; // Unsigned bytes template <> struct simd8 : base8_numeric { simdjson_inline simd8() : base8_numeric() {} simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Array constructor simdjson_inline simd8(const uint8_t *values) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) { return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15); } // Saturated math simdjson_inline simd8 saturating_add(const simd8 other) const { return (__m128i)vec_adds(this->value, (__m128i)other); } simdjson_inline simd8 saturating_sub(const simd8 other) const { return (__m128i)vec_subs(this->value, (__m128i)other); } // Order-specific operations simdjson_inline simd8 max_val(const simd8 other) const { return (__m128i)vec_max(this->value, (__m128i)other); } simdjson_inline simd8 min_val(const simd8 other) const { return (__m128i)vec_min(this->value, (__m128i)other); } // Same as >, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } // Same as <, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } // Bit-specific operations simdjson_inline simd8 bits_not_set() const { return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0))); } simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } simdjson_inline bool bits_not_set_anywhere() const { return vec_all_eq(this->value, (__m128i)vec_splats(0)); } simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return vec_all_eq(vec_and(this->value, (__m128i)bits), (__m128i)vec_splats(0)); } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } template simdjson_inline simd8 shr() const { return simd8( (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N))); } template simdjson_inline simd8 shl() const { return simd8( (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N))); } }; template struct simd8x64 { static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); static_assert(NUM_CHUNKS == 4, "PPC64 kernel should use four registers per 64-byte block."); const simd8 chunks[NUM_CHUNKS]; simd8x64(const simd8x64 &o) = delete; // no copy allowed simd8x64 & operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr + 16), simd8::load(ptr + 32), simd8::load(ptr + 48)} {} simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr + sizeof(simd8) * 0); this->chunks[1].store(ptr + sizeof(simd8) * 1); this->chunks[2].store(ptr + sizeof(simd8) * 2); this->chunks[3].store(ptr + sizeof(simd8) * 3); } simdjson_inline simd8 reduce_or() const { return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); } simdjson_inline uint64_t compress(uint64_t mask, T *output) const { this->chunks[0].compress(uint16_t(mask), output); this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); return 64 - count_ones(mask); } simdjson_inline uint64_t to_bitmask() const { uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); uint64_t r1 = this->chunks[1].to_bitmask(); uint64_t r2 = this->chunks[2].to_bitmask(); uint64_t r3 = this->chunks[3].to_bitmask(); return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); } simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, this->chunks[2] == mask, this->chunks[3] == mask) .to_bitmask(); } simdjson_inline uint64_t eq(const simd8x64 &other) const { return simd8x64(this->chunks[0] == other.chunks[0], this->chunks[1] == other.chunks[1], this->chunks[2] == other.chunks[2], this->chunks[3] == other.chunks[3]) .to_bitmask(); } simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, this->chunks[2] <= mask, this->chunks[3] <= mask) .to_bitmask(); } }; // struct simd8x64 } // namespace simd } // unnamed namespace } // namespace ppc64 } // namespace simdjson #endif // SIMDJSON_PPC64_SIMD_INPUT_H /* end file simdjson/ppc64/simd.h */ /* including simdjson/ppc64/stringparsing_defs.h: #include "simdjson/ppc64/stringparsing_defs.h" */ /* begin file simdjson/ppc64/stringparsing_defs.h */ #ifndef SIMDJSON_PPC64_STRINGPARSING_DEFS_H #define SIMDJSON_PPC64_STRINGPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/simd.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace { using namespace simd; // Holds backslashes and quotes locations. struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return bs_bits != 0; } simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } uint32_t bs_bits; uint32_t quote_bits; }; // struct backslash_and_quote simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than " "SIMDJSON_PADDING bytes"); simd8 v0(src); simd8 v1(src + sizeof(v0)); v0.store(dst); v1.store(dst + sizeof(v0)); // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on // PPC; therefore, we smash them together into a 64-byte mask and get the // bitmask from there. uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); return { uint32_t(bs_and_quote), // bs_bits uint32_t(bs_and_quote >> 32) // quote_bits }; } } // unnamed namespace } // namespace ppc64 } // namespace simdjson #endif // SIMDJSON_PPC64_STRINGPARSING_DEFS_H /* end file simdjson/ppc64/stringparsing_defs.h */ #define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 /* end file simdjson/ppc64/begin.h */ /* including simdjson/generic/ondemand/amalgamated.h for ppc64: #include "simdjson/generic/ondemand/amalgamated.h" */ /* begin file simdjson/generic/ondemand/amalgamated.h for ppc64 */ #if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) #error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! #endif // Stuff other things depend on /* including simdjson/generic/ondemand/base.h for ppc64: #include "simdjson/generic/ondemand/base.h" */ /* begin file simdjson/generic/ondemand/base.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { /** * A fast, simple, DOM-like interface that parses JSON as you use it. * * Designed for maximum speed and a lower memory profile. */ namespace ondemand { /** Represents the depth of a JSON value (number of nested arrays/objects). */ using depth_t = int32_t; /** @copydoc simdjson::ppc64::number_type */ using number_type = simdjson::ppc64::number_type; /** @private Position in the JSON buffer indexes */ using token_position = const uint32_t *; class array; class array_iterator; class document; class document_reference; class document_stream; class field; class json_iterator; enum class json_type; struct number; class object; class object_iterator; class parser; class raw_json_string; class token_iterator; class value; class value_iterator; } // namespace ondemand } // namespace ppc64 } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H /* end file simdjson/generic/ondemand/base.h for ppc64 */ /* including simdjson/generic/ondemand/value_iterator.h for ppc64: #include "simdjson/generic/ondemand/value_iterator.h" */ /* begin file simdjson/generic/ondemand/value_iterator.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { /** * Iterates through a single JSON value at a particular depth. * * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects * the caller to call the right ones. * * @private This is not intended for external use. */ class value_iterator { protected: /** The underlying JSON iterator */ json_iterator *_json_iter{}; /** The depth of this value */ depth_t _depth{}; /** * The starting token index for this value */ token_position _start_position{}; public: simdjson_inline value_iterator() noexcept = default; /** * Denote that we're starting a document. */ simdjson_inline void start_document() noexcept; /** * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. * * Optimized for scalars. */ simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; /** * Tell whether the iterator is at the EOF mark */ simdjson_inline bool at_end() const noexcept; /** * Tell whether the iterator is at the start of the value */ simdjson_inline bool at_start() const noexcept; /** * Tell whether the value is open--if the value has not been used, or the array/object is still open. */ simdjson_inline bool is_open() const noexcept; /** * Tell whether the value is at an object's first field (just after the {). */ simdjson_inline bool at_first_field() const noexcept; /** * Abandon all iteration. */ simdjson_inline void abandon() noexcept; /** * Get the child value as a value_iterator. */ simdjson_inline value_iterator child_value() const noexcept; /** * Get the depth of this value. */ simdjson_inline int32_t depth() const noexcept; /** * Get the JSON type of this value. * * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result type() const noexcept; /** * @addtogroup object Object iteration * * Methods to iterate and find object fields. These methods generally *assume* the value is * actually an object; the caller is responsible for keeping track of that fact. * * @{ */ /** * Start an object iteration. * * @returns Whether the object had any fields (returns false for empty). * @error INCORRECT_TYPE if there is no opening { */ simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; /** * Start an object iteration from the root. * * @returns Whether the object had any fields (returns false for empty). * @error INCORRECT_TYPE if there is no opening { * @error TAPE_ERROR if there is no matching } at end of document */ simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; /** * Checks whether an object could be started from the root. May be called by start_root_object. * * @returns SUCCESS if it is possible to safely start an object from the root (document level). * @error INCORRECT_TYPE if there is no opening { * @error TAPE_ERROR if there is no matching } at end of document */ simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; /** * Start an object iteration after the user has already checked and moved past the {. * * Does not move the iterator unless the object is empty ({}). * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** * Start an object iteration from the root, after the user has already checked and moved past the {. * * Does not move the iterator unless the object is empty ({}). * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; /** * Moves to the next field in an object. * * Looks for , and }. If } is found, the object is finished and the iterator advances past it. * Otherwise, it advances to the next value. * * @return whether there is another field in the object. * @error TAPE_ERROR If there is a comma missing between fields. * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. */ simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; /** * Get the current field's key. */ simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; /** * Pass the : in the field and move to its value. */ simdjson_warn_unused simdjson_inline error_code field_value() noexcept; /** * Find the next field with the given key. * * Assumes you have called next_field() or otherwise matched the previous value. * * This means the iterator must be sitting at the next key: * * ``` * { "a": 1, "b": 2 } * ^ * ``` * * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; /** * Find the next field with the given key, *without* unescaping. This assumes object order: it * will not find the field if it was already passed when looking for some *other* field. * * Assumes you have called next_field() or otherwise matched the previous value. * * This means the iterator must be sitting at the next key: * * ``` * { "a": 1, "b": 2 } * ^ * ``` * * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; /** * Find the field with the given key without regard to order, and *without* unescaping. * * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. * * Assumes you have called next_field() or otherwise matched the previous value. * * This means the iterator must be sitting at the next key: * * ``` * { "a": 1, "b": 2 } * ^ * ``` * * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; /** @} */ /** * @addtogroup array Array iteration * Methods to iterate over array elements. These methods generally *assume* the value is actually * an object; the caller is responsible for keeping track of that fact. * @{ */ /** * Check for an opening [ and start an array iteration. * * @returns Whether the array had any elements (returns false for empty). * @error INCORRECT_TYPE If there is no [. */ simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; /** * Check for an opening [ and start an array iteration while at the root. * * @returns Whether the array had any elements (returns false for empty). * @error INCORRECT_TYPE If there is no [. * @error TAPE_ERROR if there is no matching ] at end of document */ simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; /** * Checks whether an array could be started from the root. May be called by start_root_array. * * @returns SUCCESS if it is possible to safely start an array from the root (document level). * @error INCORRECT_TYPE If there is no [. * @error TAPE_ERROR if there is no matching ] at end of document */ simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; /** * Start an array iteration, after the user has already checked and moved past the [. * * Does not move the iterator unless the array is empty ([]). * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** * Start an array iteration from the root, after the user has already checked and moved past the [. * * Does not move the iterator unless the array is empty ([]). * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; /** * Moves to the next element in an array. * * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. * Otherwise, it advances to the next value. * * @return Whether there is another element in the array. * @error TAPE_ERROR If there is a comma missing between elements. */ simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; /** * Get a child value iterator. */ simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; /** @} */ /** * @defgroup scalar Scalar values * @addtogroup scalar * @{ */ simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; template simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; simdjson_warn_unused simdjson_inline bool is_negative() noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; template simdjson_warn_unused simdjson_inline error_code get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; simdjson_inline error_code error() const noexcept; simdjson_inline uint8_t *&string_buf_loc() noexcept; simdjson_inline const json_iterator &json_iter() const noexcept; simdjson_inline json_iterator &json_iter() noexcept; simdjson_inline void assert_is_valid() const noexcept; simdjson_inline bool is_valid() const noexcept; /** @} */ protected: /** * Restarts an array iteration. * @returns Whether the array has any elements (returns false for empty). */ simdjson_inline simdjson_result reset_array() noexcept; /** * Restarts an object iteration. * @returns Whether the object has any fields (returns false for empty). */ simdjson_inline simdjson_result reset_object() noexcept; /** * move_at_start(): moves us so that we are pointing at the beginning of * the container. It updates the index so that at_start() is true and it * syncs the depth. The user can then create a new container instance. * * Usage: used with value::count_elements(). **/ simdjson_inline void move_at_start() noexcept; /** * move_at_container_start(): moves us so that we are pointing at the beginning of * the container so that assert_at_container_start() passes. * * Usage: used with reset_array() and reset_object(). **/ simdjson_inline void move_at_container_start() noexcept; /* Useful for debugging and logging purposes. */ inline std::string to_string() const noexcept; simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; simdjson_inline const uint8_t *peek_start() const noexcept; simdjson_inline uint32_t peek_start_length() const noexcept; /** * The general idea of the advance_... methods and the peek_* methods * is that you first peek and check that you have desired type. If you do, * and only if you do, then you advance. * * We used to unconditionally advance. But this made reasoning about our * current state difficult. * Suppose you always advance. Look at the 'value' matching the key * "shadowable" in the following example... * * ({"globals":{"a":{"shadowable":[}}}}) * * If the user thinks it is a Boolean and asks for it, then we check the '[', * decide it is not a Boolean, but still move into the next character ('}'). Now * we are left pointing at '}' right after a '['. And we have not yet reported * an error, only that we do not have a Boolean. * * If, instead, you just stand your ground until it is content that you know, then * you will only even move beyond the '[' if the user tells you that you have an * array. So you will be at the '}' character inside the array and, hopefully, you * will then catch the error because an array cannot start with '}', but the code * processing Boolean values does not know this. * * So the contract is: first call 'peek_...' and then call 'advance_...' only * if you have determined that it is a type you can handle. * * Unfortunately, it makes the code more verbose, longer and maybe more error prone. */ simdjson_inline void advance_scalar(const char *type) noexcept; simdjson_inline void advance_root_scalar(const char *type) noexcept; simdjson_inline void advance_non_root_scalar(const char *type) noexcept; simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; simdjson_inline error_code end_container() noexcept; /** * Advance to a place expecting a value (increasing depth). * * @return The current token (the one left behind). * @error TAPE_ERROR If the document ended early. */ simdjson_inline simdjson_result advance_to_value() noexcept; simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; simdjson_inline bool is_at_start() const noexcept; /** * is_at_iterator_start() returns true on an array or object after it has just been * created, whether the instance is empty or not. * * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) */ simdjson_inline bool is_at_iterator_start() const noexcept; /** * Assuming that we are within an object, this returns true if we * are pointing at a key. * * Usage: the skip_child() method should never be used while we are pointing * at a key inside an object. */ simdjson_inline bool is_at_key() const noexcept; inline void assert_at_start() const noexcept; inline void assert_at_container_start() const noexcept; inline void assert_at_root() const noexcept; inline void assert_at_child() const noexcept; inline void assert_at_next() const noexcept; inline void assert_at_non_root_start() const noexcept; /** Get the starting position of this value */ simdjson_inline token_position start_position() const noexcept; /** @copydoc error_code json_iterator::position() const noexcept; */ simdjson_inline token_position position() const noexcept; /** @copydoc error_code json_iterator::end_position() const noexcept; */ simdjson_inline token_position last_position() const noexcept; /** @copydoc error_code json_iterator::end_position() const noexcept; */ simdjson_inline token_position end_position() const noexcept; /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ simdjson_inline error_code report_error(error_code error, const char *message) noexcept; friend class document; friend class object; friend class array; friend class value; }; // value_iterator } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(ppc64::ondemand::value_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H /* end file simdjson/generic/ondemand/value_iterator.h for ppc64 */ /* including simdjson/generic/ondemand/value.h for ppc64: #include "simdjson/generic/ondemand/value.h" */ /* begin file simdjson/generic/ondemand/value.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { /** * An ephemeral JSON value returned during iteration. It is only valid for as long as you do * not access more data in the JSON document. */ class value { public: /** * Create a new invalid value. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline value() noexcept = default; /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); } /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template simdjson_inline error_code get(T &out) noexcept; /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @returns INCORRECT_TYPE If the JSON value is not an array. */ simdjson_inline simdjson_result get_array() noexcept; /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @returns INCORRECT_TYPE If the JSON value is not an object. */ simdjson_inline simdjson_result get_object() noexcept; /** * Cast this JSON value to an unsigned integer. * * @returns A unsigned 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64() noexcept; /** * Cast this JSON value (inside string) to a unsigned integer. * * @returns A unsigned 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64_in_string() noexcept; /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64() noexcept; /** * Cast this JSON value (inside string) to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64_in_string() noexcept; /** * Cast this JSON value to a double. * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double() noexcept; /** * Cast this JSON value (inside string) to a double * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double_in_string() noexcept; /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). * * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; /** * Attempts to fill the provided std::string reference with the parsed value of the current string. * * The string is guaranteed to be valid UTF-8. * * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. * We recommend you avoid allocating an std::string unless you need to. * * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a "wobbly" string. * * The string is may not be a valid UTF-8 string. * See https://simonsapin.github.io/wtf-8/ * * Important: a value should be consumed once. Calling get_wobbly_string() twice on the same value * is an error. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_wobbly_string() noexcept; /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_raw_json_string() noexcept; /** * Cast this JSON value to a bool. * * @returns A bool value. * @returns INCORRECT_TYPE if the JSON value is not true or false. */ simdjson_inline simdjson_result get_bool() noexcept; /** * Checks if this JSON value is null. If and only if the value is * null, then it is consumed (we advance). If we find a token that * begins with 'n' but is not 'null', then an error is returned. * * @returns Whether the value is null. * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. */ simdjson_inline simdjson_result is_null() noexcept; #if SIMDJSON_EXCEPTIONS /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. */ simdjson_inline operator array() noexcept(false); /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. */ simdjson_inline operator object() noexcept(false); /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline operator uint64_t() noexcept(false); /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. */ simdjson_inline operator int64_t() noexcept(false); /** * Cast this JSON value to a double. * * @returns A double. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. */ simdjson_inline operator double() noexcept(false); /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator std::string_view() noexcept(false); /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator raw_json_string() noexcept(false); /** * Cast this JSON value to a bool. * * @returns A bool value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. */ simdjson_inline operator bool() noexcept(false); #endif /** * Begin array iteration. * * Part of the std::iterable interface. * * @returns INCORRECT_TYPE If the JSON value is not an array. */ simdjson_inline simdjson_result begin() & noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result end() & noexcept; /** * This method scans the array and counts the number of elements. * The count_elements method should always be called before you have begun * iterating through the array: it is expected that you are pointing at * the beginning of the array. * The runtime complexity is linear in the size of the array. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * Performance hint: You should only call count_elements() as a last * resort as it may require scanning the document twice or more. */ simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the object and counts the number of key-value pairs. * The count_fields method should always be called before you have begun * iterating through the object: it is expected that you are pointing at * the beginning of the object. * The runtime complexity is linear in the size of the object. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an object is empty, it is more performant to use * the is_empty() method on the object instance. * * Performance hint: You should only call count_fields() as a last * resort as it may require scanning the document twice or more. */ simdjson_inline simdjson_result count_fields() & noexcept; /** * Get the value at the given index in the array. This function has linear-time complexity. * This function should only be called once on an array instance since the array iterator is not reset between each call. * * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field(const char *key) noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](const char *key) noexcept; /** * Get the type of this JSON value. It does not validate or consume the value. * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * * @return The type of JSON value (json_type::array, json_type::object, json_type::string, * json_type::number, json_type::boolean, or json_type::null). * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result type() noexcept; /** * Checks whether the value is a scalar (string, number, null, Boolean). * Returns false when there it is an array or object. * * @returns true if the type is string, number, null, Boolean * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result is_scalar() noexcept; /** * Checks whether the value is a negative number. * * @returns true if the number if negative. */ simdjson_inline bool is_negative() noexcept; /** * Checks whether the value is an integer number. Note that * this requires to partially parse the number string. If * the value is determined to be an integer, it may still * not parse properly as an integer in subsequent steps * (e.g., it might overflow). * * Performance note: if you call this function systematically * before parsing a number, you may have fallen for a performance * anti-pattern. * * @returns true if the number if negative. */ simdjson_inline simdjson_result is_integer() noexcept; /** * Determine the number type (integer or floating-point number) as quickly * as possible. This function does not fully validate the input. It is * useful when you only need to classify the numbers, without parsing them. * * If you are planning to retrieve the value or you need full validation, * consider using the get_number() method instead: it will fully parse * and validate the input, and give you access to the type: * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have * an integer greater or equal to 9223372036854775808 * get_number_type() is number_type::signed_integer if we have an * integer that is less than 9223372036854775808 * Otherwise, get_number_type() has value number_type::floating_point_number * * This function requires processing the number string, but it is expected * to be faster than get_number().get_number_type() because it is does not * parse the number value. * * @returns the type of the number */ simdjson_inline simdjson_result get_number_type() noexcept; /** * Attempt to parse an ondemand::number. An ondemand::number may * contain an integer value or a floating-point value, the simdjson * library will autodetect the type. Thus it is a dynamically typed * number. Before accessing the value, you must determine the detected * type. * * number.get_number_type() is number_type::signed_integer if we have * an integer in [-9223372036854775808,9223372036854775808) * You can recover the value by calling number.get_int64() and you * have that number.is_int64() is true. * * number.get_number_type() is number_type::unsigned_integer if we have * an integer in [9223372036854775808,18446744073709551616) * You can recover the value by calling number.get_uint64() and you * have that number.is_uint64() is true. * * Otherwise, number.get_number_type() has value number_type::floating_point_number * and we have a binary64 number. * You can recover the value by calling number.get_double() and you * have that number.is_double() is true. * * You must check the type before accessing the value: it is an error * to call "get_int64()" when number.get_number_type() is not * number_type::signed_integer and when number.is_int64() is false. * * Performance note: this is designed with performance in mind. When * calling 'get_number()', you scan the number string only once, determining * efficiently the type and storing it in an efficient manner. */ simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; /** * Get the raw JSON for this token. * * The string_view will always point into the input buffer. * * The string_view will start at the beginning of the token, and include the entire token * *as well as all spaces until the next token (or EOF).* This means, for example, that a * string token always begins with a " and is always terminated by the final ", possibly * followed by a number of spaces. * * The string_view is *not* null-terminated. However, if this is a scalar (string, number, * boolean, or null), the character after the end of the string_view is guaranteed to be * a non-space token. * * Tokens include: * - { * - [ * - "a string (possibly with UTF-8 or backslashed characters like \\\")". * - -1.2e-100 * - true * - false * - null * * See also value::raw_json(). */ simdjson_inline std::string_view raw_json_token() noexcept; /** * Get a string_view pointing at this value in the JSON document. * If this element is an array or an object, it consumes the array or the object * and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. * If this element is a scalar (string, number, Boolean, null), it returns what * raw_json_token() would return. */ simdjson_inline simdjson_result raw_json() noexcept; /** * Returns the current location in the document if in bounds. */ simdjson_inline simdjson_result current_location() noexcept; /** * Returns the current depth in the document if in bounds. * * E.g., * 0 = finished with document * 1 = document root value (could be [ or {, not yet known) * 2 = , or } inside root array/object * 3 = key or value inside root array/object. */ simdjson_inline int32_t current_depth() const noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard. * * ondemand::parser parser; * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/foo/a/1") == 20 * * It is allowed for a key to be the empty string: * * ondemand::parser parser; * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * * Note that at_pointer() called on the document automatically calls the document's rewind * method between each call. It invalidates all previously accessed arrays, objects and values * that have not been consumed. * * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not * standardized (by RFC 6901). We provide some experimental support for JSON pointers * on non-document instances. Yet it is not the case when calling at_pointer on an array * or an object instance: there is no rewind and no invalidation. * * You may only call at_pointer on an array after it has been created, but before it has * been first accessed. When calling at_pointer on an array, the pointer is advanced to * the location indicated by the JSON pointer (in case of success). It is no longer possible * to call at_pointer on the same array. * * You may call at_pointer more than once on an object, but each time the pointer is advanced * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding * key (as well as the current key) can no longer be used with following JSON pointer calls. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; protected: /** * Create a value. */ simdjson_inline value(const value_iterator &iter) noexcept; /** * Skip this value, allowing iteration to continue. */ simdjson_inline void skip() noexcept; /** * Start a value at the current position. * * (It should already be started; this is just a self-documentation method.) */ static simdjson_inline value start(const value_iterator &iter) noexcept; /** * Resume a value. */ static simdjson_inline value resume(const value_iterator &iter) noexcept; /** * Get the object, starting or resuming it as necessary */ simdjson_inline simdjson_result start_or_resume_object() noexcept; // simdjson_inline void log_value(const char *type) const noexcept; // simdjson_inline void log_error(const char *message) const noexcept; value_iterator iter{}; friend class document; friend class array_iterator; friend class field; friend class object; friend struct simdjson_result; friend struct simdjson_result; }; } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(ppc64::ondemand::value &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result get_array() noexcept; simdjson_inline simdjson_result get_object() noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() noexcept; template simdjson_inline error_code get(T &out) noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator ppc64::ondemand::array() noexcept(false); simdjson_inline operator ppc64::ondemand::object() noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator ppc64::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field(const char *key) noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](const char *key) noexcept; /** * Get the type of this JSON value. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). */ simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result raw_json() noexcept; /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ simdjson_inline simdjson_result current_location() noexcept; /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H /* end file simdjson/generic/ondemand/value.h for ppc64 */ /* including simdjson/generic/ondemand/logger.h for ppc64: #include "simdjson/generic/ondemand/logger.h" */ /* begin file simdjson/generic/ondemand/logger.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { // Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical // that the call to the log functions be side-effect free. Thus, for example, you should not // create temporary std::string instances. namespace logger { enum class log_level : int32_t { info = 0, error = 1 }; #if SIMDJSON_VERBOSE_LOGGING static constexpr const bool LOG_ENABLED = true; #else static constexpr const bool LOG_ENABLED = false; #endif // We do not want these functions to be 'really inlined' since real inlining is // for performance purposes and if you are using the loggers, you do not care about // performance (or should not). static inline void log_headers() noexcept; // If args are provided, title will be treated as format string template static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; template static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; } // namespace logger } // namespace ondemand } // namespace ppc64 } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H /* end file simdjson/generic/ondemand/logger.h for ppc64 */ /* including simdjson/generic/ondemand/token_iterator.h for ppc64: #include "simdjson/generic/ondemand/token_iterator.h" */ /* begin file simdjson/generic/ondemand/token_iterator.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { /** * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) * detected by stage 1. * * @private This is not intended for external use. */ class token_iterator { public: /** * Create a new invalid token_iterator. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline token_iterator() noexcept = default; simdjson_inline token_iterator(token_iterator &&other) noexcept = default; simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; simdjson_inline token_iterator(const token_iterator &other) noexcept = default; simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; /** * Advance to the next token (returning the current one). */ simdjson_inline const uint8_t *return_current_and_advance() noexcept; /** * Reports the current offset in bytes from the start of the underlying buffer. */ simdjson_inline uint32_t current_offset() const noexcept; /** * Get the JSON text for a given token (relative). * * This is not null-terminated; it is a view into the JSON. * * @param delta The relative position of the token to retrieve. e.g. 0 = current token, * 1 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** * Get the maximum length of the JSON text for a given token. * * The length will include any whitespace at the end of the token. * * @param delta The relative position of the token to retrieve. e.g. 0 = current token, * 1 = next token, -1 = prev token. */ simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; /** * Get the JSON text for a given token. * * This is not null-terminated; it is a view into the JSON. * * @param position The position of the token. * */ simdjson_inline const uint8_t *peek(token_position position) const noexcept; /** * Get the maximum length of the JSON text for a given token. * * The length will include any whitespace at the end of the token. * * @param position The position of the token. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; /** * Return the current index. */ simdjson_inline token_position position() const noexcept; /** * Reset to a previously saved index. */ simdjson_inline void set_position(token_position target_position) noexcept; // NOTE: we don't support a full C++ iterator interface, because we expect people to make // different calls to advance the iterator based on *their own* state. simdjson_inline bool operator==(const token_iterator &other) const noexcept; simdjson_inline bool operator!=(const token_iterator &other) const noexcept; simdjson_inline bool operator>(const token_iterator &other) const noexcept; simdjson_inline bool operator>=(const token_iterator &other) const noexcept; simdjson_inline bool operator<(const token_iterator &other) const noexcept; simdjson_inline bool operator<=(const token_iterator &other) const noexcept; protected: simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; /** * Get the index of the JSON text for a given token (relative). * * This is not null-terminated; it is a view into the JSON. * * @param delta The relative position of the token to retrieve. e.g. 0 = current token, * 1 = next token, -1 = prev token. */ simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; /** * Get the index of the JSON text for a given token. * * This is not null-terminated; it is a view into the JSON. * * @param position The position of the token. * */ simdjson_inline uint32_t peek_index(token_position position) const noexcept; const uint8_t *buf{}; token_position _position{}; friend class json_iterator; friend class value_iterator; friend class object; template friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; template friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; }; } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(ppc64::ondemand::token_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H /* end file simdjson/generic/ondemand/token_iterator.h for ppc64 */ /* including simdjson/generic/ondemand/json_iterator.h for ppc64: #include "simdjson/generic/ondemand/json_iterator.h" */ /* begin file simdjson/generic/ondemand/json_iterator.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { /** * Iterates through JSON tokens, keeping track of depth and string buffer. * * @private This is not intended for external use. */ class json_iterator { protected: token_iterator token{}; ondemand::parser *parser{}; /** * Next free location in the string buffer. * * Used by raw_json_string::unescape() to have a place to unescape strings to. */ uint8_t *_string_buf_loc{}; /** * JSON error, if there is one. * * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. * * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If * this is not elided, we should make sure it's at least not using up a register. Failing that, * we should store it in document so there's only one of them. */ error_code error{SUCCESS}; /** * Depth of the current token in the JSON. * * - 0 = finished with document * - 1 = document root value (could be [ or {, not yet known) * - 2 = , or } inside root array/object * - 3 = key or value inside root array/object. */ depth_t _depth{}; /** * Beginning of the document indexes. * Normally we have root == parser->implementation->structural_indexes.get() * but this may differ, especially in streaming mode (where we have several * documents); */ token_position _root{}; /** * Normally, a json_iterator operates over a single document, but in * some cases, we may have a stream of documents. This attribute is meant * as meta-data: the json_iterator works the same irrespective of the * value of this attribute. */ bool _streaming{false}; public: simdjson_inline json_iterator() noexcept = default; simdjson_inline json_iterator(json_iterator &&other) noexcept; simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; /** * Skips a JSON value, whether it is a scalar, array or object. */ simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; /** * Tell whether the iterator is still at the start */ simdjson_inline bool at_root() const noexcept; /** * Tell whether we should be expected to run in streaming * mode (iterating over many documents). It is pure metadata * that does not affect how the iterator works. It is used by * start_root_array() and start_root_object(). */ simdjson_inline bool streaming() const noexcept; /** * Get the root value iterator */ simdjson_inline token_position root_position() const noexcept; /** * Assert that we are at the document depth (== 1) */ simdjson_inline void assert_at_document_depth() const noexcept; /** * Assert that we are at the root of the document */ simdjson_inline void assert_at_root() const noexcept; /** * Tell whether the iterator is at the EOF mark */ simdjson_inline bool at_end() const noexcept; /** * Tell whether the iterator is live (has not been moved). */ simdjson_inline bool is_alive() const noexcept; /** * Abandon this iterator, setting depth to 0 (as if the document is finished). */ simdjson_inline void abandon() noexcept; /** * Advance the current token without modifying depth. */ simdjson_inline const uint8_t *return_current_and_advance() noexcept; /** * Returns true if there is a single token in the index (i.e., it is * a JSON with a scalar value such as a single number). * * @return whether there is a single token */ simdjson_inline bool is_single_token() const noexcept; /** * Assert that there are at least the given number of tokens left. * * Has no effect in release builds. */ simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; /** * Assert that the given position addresses an actual token (is within bounds). * * Has no effect in release builds. */ simdjson_inline void assert_valid_position(token_position position) const noexcept; /** * Get the JSON text for a given token (relative). * * This is not null-terminated; it is a view into the JSON. * * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** * Get the maximum length of the JSON text for the current token (or relative). * * The length will include any whitespace at the end of the token. * * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. */ simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; /** * Get a pointer to the current location in the input buffer. * * This is not null-terminated; it is a view into the JSON. * * You may be pointing outside of the input buffer: it is not generally * safe to dereference this pointer. */ simdjson_inline const uint8_t *unsafe_pointer() const noexcept; /** * Get the JSON text for a given token. * * This is not null-terminated; it is a view into the JSON. * * @param position The position of the token to retrieve. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek(token_position position) const noexcept; /** * Get the maximum length of the JSON text for the current token (or relative). * * The length will include any whitespace at the end of the token. * * @param position The position of the token to retrieve. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; /** * Get the JSON text for the last token in the document. * * This is not null-terminated; it is a view into the JSON. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek_last() const noexcept; /** * Ascend one level. * * Validates that the depth - 1 == parent_depth. * * @param parent_depth the expected parent depth. */ simdjson_inline void ascend_to(depth_t parent_depth) noexcept; /** * Descend one level. * * Validates that the new depth == child_depth. * * @param child_depth the expected child depth. */ simdjson_inline void descend_to(depth_t child_depth) noexcept; simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; /** * Get current depth. */ simdjson_inline depth_t depth() const noexcept; /** * Get current (writeable) location in the string buffer. */ simdjson_inline uint8_t *&string_buf_loc() noexcept; /** * Report an unrecoverable error, preventing further iteration. * * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ simdjson_inline error_code report_error(error_code error, const char *message) noexcept; /** * Log error, but don't stop iteration. * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; /** * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with * N bytes of capacity. It will return false if N is too small (smaller than max_len) of if it is zero. * The buffer (tmpbuf) is padded with space characters. */ simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept; simdjson_inline token_position position() const noexcept; /** * Write the raw_json_string to the string buffer and return a string_view. * Each raw_json_string should be unescaped once, or else the string buffer might * overflow. */ simdjson_inline simdjson_result unescape(raw_json_string in, bool allow_replacement) noexcept; simdjson_inline simdjson_result unescape_wobbly(raw_json_string in) noexcept; simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; simdjson_inline error_code consume_character(char c) noexcept; #if SIMDJSON_DEVELOPMENT_CHECKS simdjson_inline token_position start_position(depth_t depth) const noexcept; simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; #endif /* Useful for debugging and logging purposes. */ inline std::string to_string() const noexcept; /** * Returns the current location in the document if in bounds. */ inline simdjson_result current_location() const noexcept; /** * Updates this json iterator so that it is back at the beginning of the document, * as if it had just been created. */ inline void rewind() noexcept; /** * This checks whether the {,},[,] are balanced so that the document * ends with proper zero depth. This requires scanning the whole document * and it may be expensive. It is expected that it will be rarely called. * It does not attempt to match { with } and [ with ]. */ inline bool balanced() const noexcept; protected: simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; /// The last token before the end simdjson_inline token_position last_position() const noexcept; /// The token *at* the end. This points at gibberish and should only be used for comparison. simdjson_inline token_position end_position() const noexcept; /// The end of the buffer. simdjson_inline token_position end() const noexcept; friend class document; friend class document_stream; friend class object; friend class array; friend class value; friend class raw_json_string; friend class parser; friend class value_iterator; template friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; template friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; }; // json_iterator } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(ppc64::ondemand::json_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H /* end file simdjson/generic/ondemand/json_iterator.h for ppc64 */ /* including simdjson/generic/ondemand/json_type.h for ppc64: #include "simdjson/generic/ondemand/json_type.h" */ /* begin file simdjson/generic/ondemand/json_type.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { /** * The type of a JSON value. */ enum class json_type { // Start at 1 to catch uninitialized / default values more easily array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) string, ///< A JSON string ( "a" or "hello world\n" ...) boolean, ///< A JSON boolean (true or false) null ///< A JSON null (null) }; /** * A type representing a JSON number. * The design of the struct is deliberately straight-forward. All * functions return standard values with no error check. */ struct number { /** * return the automatically determined type of * the number: number_type::floating_point_number, * number_type::signed_integer or number_type::unsigned_integer. * * enum class number_type { * floating_point_number=1, /// a binary64 number * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement * unsigned_integer /// a positive integer larger or equal to 1<<63 * }; */ simdjson_inline ondemand::number_type get_number_type() const noexcept; /** * return true if the automatically determined type of * the number is number_type::unsigned_integer. */ simdjson_inline bool is_uint64() const noexcept; /** * return the value as a uint64_t, only valid if is_uint64() is true. */ simdjson_inline uint64_t get_uint64() const noexcept; simdjson_inline operator uint64_t() const noexcept; /** * return true if the automatically determined type of * the number is number_type::signed_integer. */ simdjson_inline bool is_int64() const noexcept; /** * return the value as a int64_t, only valid if is_int64() is true. */ simdjson_inline int64_t get_int64() const noexcept; simdjson_inline operator int64_t() const noexcept; /** * return true if the automatically determined type of * the number is number_type::floating_point_number. */ simdjson_inline bool is_double() const noexcept; /** * return the value as a double, only valid if is_double() is true. */ simdjson_inline double get_double() const noexcept; simdjson_inline operator double() const noexcept; /** * Convert the number to a double. Though it always succeed, the conversion * may be lossy if the number cannot be represented exactly. */ simdjson_inline double as_double() const noexcept; protected: /** * The next block of declaration is designed so that we can call the number parsing * functions on a number type. They are protected and should never be used outside * of the core simdjson library. */ friend class value_iterator; template friend error_code numberparsing::slow_float_parsing(simdjson_unused const uint8_t * src, W writer); template friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); template friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); /** Store a signed 64-bit value to the number. */ simdjson_inline void append_s64(int64_t value) noexcept; /** Store an unsigned 64-bit value to the number. */ simdjson_inline void append_u64(uint64_t value) noexcept; /** Store a double value to the number. */ simdjson_inline void append_double(double value) noexcept; /** Specifies that the value is a double, but leave it undefined. */ simdjson_inline void skip_double() noexcept; /** * End of friend declarations. */ /** * Our attributes are a union type (size = 64 bits) * followed by a type indicator. */ union { double floating_point_number; int64_t signed_integer; uint64_t unsigned_integer; } payload{0}; number_type type{number_type::signed_integer}; }; /** * Write the JSON type to the output stream * * @param out The output stream. * @param type The json_type. */ inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; #if SIMDJSON_EXCEPTIONS /** * Send JSON type to an output stream. * * @param out The output stream. * @param type The json_type. * @throw simdjson_error if the result being printed has an error. If there is an error with the * underlying output stream, that error will be propagated (simdjson_error will not be * thrown). */ inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); #endif } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(ppc64::ondemand::json_type &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H /* end file simdjson/generic/ondemand/json_type.h for ppc64 */ /* including simdjson/generic/ondemand/raw_json_string.h for ppc64: #include "simdjson/generic/ondemand/raw_json_string.h" */ /* begin file simdjson/generic/ondemand/raw_json_string.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { /** * A string escaped per JSON rules, terminated with quote ("). They are used to represent * unescaped keys inside JSON documents. * * (In other words, a pointer to the beginning of a string, just after the start quote, inside a * JSON file.) * * This class is deliberately simplistic and has little functionality. You can * compare a raw_json_string instance with an unescaped C string, but * that is nearly all you can do. * * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser * instance. Doing so requires you to have a sufficiently large buffer. * * The raw_json_string instances originate typically from field instance which in turn represent * key-value pairs from object instances. From a field instance, you get the raw_json_string * instance by calling key(). You can, if you want a more usable string_view instance, call * the unescaped_key() method on the field instance. You may also create a raw_json_string from * any other string value, with the value.get_raw_json_string() method. Again, you can get * a more usable string_view instance by calling get_string(). * */ class raw_json_string { public: /** * Create a new invalid raw_json_string. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline raw_json_string() noexcept = default; /** * Create a new invalid raw_json_string pointed at the given location in the JSON. * * The given location must be just *after* the beginning quote (") in the JSON file. * * It *must* be terminated by a ", and be a valid JSON string. */ simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; /** * Get the raw pointer to the beginning of the string in the JSON (just after the "). * * It is possible for this function to return a null pointer if the instance * has outlived its existence. */ simdjson_inline const char * raw() const noexcept; /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done) on target.size() characters, * and if the raw_json_string instance has a quote character at byte index target.size(). * We never read more than length + 1 bytes in the raw_json_string instance. * If length is smaller than target.size(), this will return false. * * The std::string_view instance may contain any characters. However, the caller * is responsible for setting length so that length bytes may be read in the * raw_json_string. * * Performance: the comparison may be done using memcmp which may be efficient * for long strings. */ simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done). * The std::string_view instance should not contain unescaped quote characters: * the caller is responsible for this check. See is_free_from_unescaped_quote. * * Performance: the comparison is done byte-by-byte which might be inefficient for * long strings. * * If target is a compile-time constant, and your compiler likes you, * you should be able to do the following without performance penalty... * * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); * s.unsafe_is_equal(target); */ simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; /** * This compares the current instance to the C string target: returns true if * they are byte-by-byte equal (no escaping is done). * The provided C string should not contain an unescaped quote character: * the caller is responsible for this check. See is_free_from_unescaped_quote. * * If target is a compile-time constant, and your compiler likes you, * you should be able to do the following without performance penalty... * * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); * s.unsafe_is_equal(target); */ simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done). */ simdjson_inline bool is_equal(std::string_view target) const noexcept; /** * This compares the current instance to the C string target: returns true if * they are byte-by-byte equal (no escaping is done). */ simdjson_inline bool is_equal(const char* target) const noexcept; /** * Returns true if target is free from unescaped quote. If target is known at * compile-time, we might expect the computation to happen at compile time with * many compilers (not all!). */ static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; private: /** * This will set the inner pointer to zero, effectively making * this instance unusable. */ simdjson_inline void consume() noexcept { buf = nullptr; } /** * Checks whether the inner pointer is non-null and thus usable. */ simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. * The result will be a valid UTF-8. * * ## IMPORTANT: string_view lifetime * * The string_view is only valid until the next parse() call on the parser. * * @param iter A json_iterator, which contains a buffer where the string will be written. * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. */ simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter, bool allow_replacement) const noexcept; /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ * * ## IMPORTANT: string_view lifetime * * The string_view is only valid until the next parse() call on the parser. * * @param iter A json_iterator, which contains a buffer where the string will be written. */ simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(json_iterator &iter) const noexcept; const uint8_t * buf{}; friend class object; friend class field; friend class parser; friend struct simdjson_result; }; simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; /** * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. */ simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(ppc64::ondemand::raw_json_string &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape(ppc64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(ppc64::ondemand::json_iterator &iter) const noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H /* end file simdjson/generic/ondemand/raw_json_string.h for ppc64 */ /* including simdjson/generic/ondemand/parser.h for ppc64: #include "simdjson/generic/ondemand/parser.h" */ /* begin file simdjson/generic/ondemand/parser.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include namespace simdjson { namespace ppc64 { namespace ondemand { /** * The default batch size for document_stream instances for this On Demand kernel. * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; /** * Some adversary might try to set the batch size to 0 or 1, which might cause problems. * We set a minimum of 32B since anything else is highly likely to be an error. In practice, * most users will want a much larger batch size. * * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. */ static constexpr size_t MINIMAL_BATCH_SIZE = 32; /** * A JSON fragment iterator. * * This holds the actual iterator as well as the buffer for writing strings. */ class parser { public: /** * Create a JSON parser. * * The new parser will have zero capacity. */ inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; inline parser(parser &&other) noexcept = default; simdjson_inline parser(const parser &other) = delete; simdjson_inline parser &operator=(const parser &other) = delete; simdjson_inline parser &operator=(parser &&other) noexcept = default; /** Deallocate the JSON parser. */ inline ~parser() noexcept = default; /** * Start iterating an on-demand JSON document. * * ondemand::parser parser; * document doc = parser.iterate(json); * * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. * Otherwise the iterate method may return an error. In particular, the whole input should be * valid: we do not attempt to tolerate incorrect content either before or after a JSON * document. If there is a UTF-8 BOM, the parser skips it. * * ### IMPORTANT: Validate what you use * * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to * iterate does not parse and validate the whole document. * * ### IMPORTANT: Buffer Lifetime * * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as * long as the document iteration. * * ### IMPORTANT: Document Lifetime * * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before * you call parse() again or destroy the parser. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * * @param json The JSON to parse. * @param len The length of the JSON. * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). * * @return The document, or an error: * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory * allocation fails. * - EMPTY if the document is all whitespace. * - UTF8_ERROR if the document is not valid UTF-8. * - UNESCAPED_CHARS if a string contains control characters that must be escaped * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; /** * @private * * Start iterating an on-demand JSON document. * * ondemand::parser parser; * json_iterator doc = parser.iterate(json); * * ### IMPORTANT: Buffer Lifetime * * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as * long as the document iteration. * * ### IMPORTANT: Document Lifetime * * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before * you call parse() again or destroy the parser. * * The ondemand::document instance holds the iterator. The document must remain in scope * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * * @param json The JSON to parse. * * @return The iterator, or an error: * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory * allocation fails. * - EMPTY if the document is all whitespace. * - UTF8_ERROR if the document is not valid UTF-8. * - UNESCAPED_CHARS if a string contains control characters that must be escaped * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; /** * Parse a buffer containing many JSON documents. * * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; * ondemand::parser parser; * ondemand::document_stream docs = parser.iterate_many(json); * for (auto & doc : docs) { * std::cout << doc["foo"] << std::endl; * } * // Prints 1 2 3 * * No copy of the input buffer is made. * * The function is lazy: it may be that no more than one JSON document at a time is parsed. * * The caller is responsabile to ensure that the input string data remains unchanged and is * not deleted during the loop. * * ### Format * * The buffer must contain a series of one or more JSON documents, concatenated into a single * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, * then starts parsing the next document at that point. (It does this with more parallelism and * lookahead than you might think, though.) * * documents that consist of an object or array may omit the whitespace between them, concatenating * with no separator. Documents that consist of a single primitive (i.e. documents that are not * arrays or objects) MUST be separated with ASCII whitespace. * * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). * If there is a UTF-8 BOM, the parser skips it. * * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * * ### Threads * * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the * hood to do some lookahead. * * ### Parser Capacity * * If the parser's current capacity is less than batch_size, it will allocate enough capacity * to handle it (up to max_capacity). * * @param buf The concatenated JSON to parse. * @param len The length of the concatenated JSON. * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet * spot is cache-related: small enough to fit in cache, yet big enough to * parse as many documents as possible in one tight loop. * Defaults to 10MB, which has been a reasonable sweet spot in our tests. * @param allow_comma_separated (defaults on false) This allows a mode where the documents are * separated by commas instead of whitespace. It comes with a performance * penalty because the entire document is indexed at once (and the document must be * less than 4 GB), and there is no multithreading. In this mode, the batch_size parameter * is effectively ignored, as it is set to at least the document size. * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: * - MEMALLOC if the parser does not have enough capacity and memory allocation fails * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. * - other json errors if parsing fails. You should not rely on these errors to always the same for the * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). */ inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** The capacity of this parser (the largest document it can process). */ simdjson_inline size_t capacity() const noexcept; /** The maximum capacity of this parser (the largest document it is allowed to process). */ simdjson_inline size_t max_capacity() const noexcept; simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; /** * The maximum depth of this parser (the most deeply nested objects and arrays it can process). * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. */ simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length * and `max_depth` depth. * * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. * * @param capacity The new capacity. * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. * @return The error, if there is one. */ simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; #ifdef SIMDJSON_THREADS_ENABLED /** * The parser instance can use threads when they are available to speed up some * operations. It is enabled by default. Changing this attribute will change the * behavior of the parser for future operations. */ bool threaded{true}; #endif /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. * The result must be valid UTF-8. * The provided pointer is advanced to the end of the string by reference, and a string_view instance * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. * * This unescape function is a low-level function. If you want a more user-friendly approach, you should * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() * instead of get_raw_json_string()). * * ## IMPORTANT: string_view lifetime * * The string_view is only valid as long as the bytes in dst. * * @param raw_json_string input * @param dst A pointer to a buffer at least large enough to write this string as well as * an additional SIMDJSON_PADDING bytes. * @param allow_replacement Whether we allow a replacement if the input string contains unmatched surrogate pairs. * @return A string_view pointing at the unescaped string in dst * @error STRING_ERROR if escapes are incorrect. */ simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement = false) const noexcept; /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. * The result may not be valid UTF-8. See https://simonsapin.github.io/wtf-8/ * The provided pointer is advanced to the end of the string by reference, and a string_view instance * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. * * This unescape function is a low-level function. If you want a more user-friendly approach, you should * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() * instead of get_raw_json_string()). * * ## IMPORTANT: string_view lifetime * * The string_view is only valid as long as the bytes in dst. * * @param raw_json_string input * @param dst A pointer to a buffer at least large enough to write this string as well as * an additional SIMDJSON_PADDING bytes. * @return A string_view pointing at the unescaped string in dst * @error STRING_ERROR if escapes are incorrect. */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; std::unique_ptr string_buf{}; #if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif friend class json_iterator; friend class document_stream; }; } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(ppc64::ondemand::parser &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* end file simdjson/generic/ondemand/parser.h for ppc64 */ // All other declarations /* including simdjson/generic/ondemand/array.h for ppc64: #include "simdjson/generic/ondemand/array.h" */ /* begin file simdjson/generic/ondemand/array.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { /** * A forward-only JSON array. */ class array { public: /** * Create a new invalid array. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline array() noexcept = default; /** * Begin array iteration. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result begin() noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result end() noexcept; /** * This method scans the array and counts the number of elements. * The count_elements method should always be called before you have begun * iterating through the array: it is expected that you are pointing at * the beginning of the array. * The runtime complexity is linear in the size of the array. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an array is empty, it is more performant to use * the is_empty() method. */ simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the beginning of the array and checks whether the * array is empty. * The runtime complexity is constant time. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. */ simdjson_inline simdjson_result is_empty() & noexcept; /** * Reset the iterator so that we are pointing back at the * beginning of the array. You should still consume values only once even if you * can iterate through the array more than once. If you unescape a string * within the array more than once, you have unsafe code. Note that rewinding * an array means that you may need to reparse it anew: it is not a free * operation. * * @returns true if the array contains some elements (not empty) */ inline simdjson_result reset() & noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node * as the root of its own JSON document. * * ondemand::parser parser; * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/0/foo/a/1") == 20 * * Note that at_pointer() called on the document automatically calls the document's rewind * method between each call. It invalidates all previously accessed arrays, objects and values * that have not been consumed. Yet it is not the case when calling at_pointer on an array * instance: there is no rewind and no invalidation. * * You may only call at_pointer on an array after it has been created, but before it has * been first accessed. When calling at_pointer on an array, the pointer is advanced to * the location indicated by the JSON pointer (in case of success). It is no longer possible * to call at_pointer on the same array. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; /** * Consumes the array and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. */ simdjson_inline simdjson_result raw_json() noexcept; /** * Get the value at the given index. This function has linear-time complexity. * This function should only be called once on an array instance since the array iterator is not reset between each call. * * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; protected: /** * Go to the end of the array, no matter where you are right now. */ simdjson_inline error_code consume() noexcept; /** * Begin array iteration. * * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the * resulting array. * @error INCORRECT_TYPE if the iterator is not at [. */ static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; /** * Begin array iteration from the root. * * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the * resulting array. * @error INCORRECT_TYPE if the iterator is not at [. * @error TAPE_ERROR if there is no closing ] at the end of the document. */ static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; /** * Begin array iteration. * * This version of the method should be called after the initial [ has been verified, and is * intended for use by switch statements that check the type of a value. * * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. */ static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; /** * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. * * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* * into the resulting array. */ simdjson_inline array(const value_iterator &iter) noexcept; /** * Iterator marking current position. * * iter.is_alive() == false indicates iteration is complete. */ value_iterator iter{}; friend class value; friend class document; friend struct simdjson_result; friend struct simdjson_result; friend class array_iterator; }; } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(ppc64::ondemand::array &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; inline simdjson_result count_elements() & noexcept; inline simdjson_result is_empty() & noexcept; inline simdjson_result reset() & noexcept; simdjson_inline simdjson_result at(size_t index) noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result raw_json() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H /* end file simdjson/generic/ondemand/array.h for ppc64 */ /* including simdjson/generic/ondemand/array_iterator.h for ppc64: #include "simdjson/generic/ondemand/array_iterator.h" */ /* begin file simdjson/generic/ondemand/array_iterator.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { /** * A forward-only JSON array. * * This is an input_iterator, meaning: * - It is forward-only * - * must be called exactly once per element. * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) */ class array_iterator { public: /** Create a new, invalid array iterator. */ simdjson_inline array_iterator() noexcept = default; // // Iterator interface // /** * Get the current element. * * Part of the std::iterator interface. */ simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * * Part of the std::iterator interface. * * @return true if there are no more elements in the JSON array. */ simdjson_inline bool operator==(const array_iterator &) const noexcept; /** * Check if there are more elements in the JSON array. * * Part of the std::iterator interface. * * @return true if there are more elements in the JSON array. */ simdjson_inline bool operator!=(const array_iterator &) const noexcept; /** * Move to the next element. * * Part of the std::iterator interface. */ simdjson_inline array_iterator &operator++() noexcept; private: value_iterator iter{}; simdjson_inline array_iterator(const value_iterator &iter) noexcept; friend class array; friend class value; friend struct simdjson_result; }; } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(ppc64::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; // // Iterator interface // simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. simdjson_inline bool operator==(const simdjson_result &) const noexcept; simdjson_inline bool operator!=(const simdjson_result &) const noexcept; simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H /* end file simdjson/generic/ondemand/array_iterator.h for ppc64 */ /* including simdjson/generic/ondemand/document.h for ppc64: #include "simdjson/generic/ondemand/document.h" */ /* begin file simdjson/generic/ondemand/document.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { /** * A JSON document. It holds a json_iterator instance. * * Used by tokens to get text, and string buffer location. * * You must keep the document around during iteration. */ class document { public: /** * Create a new invalid document. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline document() noexcept = default; simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy simdjson_inline document(document &&other) noexcept = default; simdjson_inline document &operator=(const document &other) noexcept = delete; simdjson_inline document &operator=(document &&other) noexcept = default; /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @returns INCORRECT_TYPE If the JSON value is not an array. */ simdjson_inline simdjson_result get_array() & noexcept; /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @returns INCORRECT_TYPE If the JSON value is not an object. */ simdjson_inline simdjson_result get_object() & noexcept; /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64() noexcept; /** * Cast this JSON value (inside string) to an unsigned integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64_in_string() noexcept; /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64() noexcept; /** * Cast this JSON value (inside string) to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64_in_string() noexcept; /** * Cast this JSON value to a double. * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double() noexcept; /** * Cast this JSON value (inside string) to a double. * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double_in_string() noexcept; /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * Important: Calling get_string() twice on the same document is an error. * * @param Whether to allow a replacement character for unmatched surrogate pairs. * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; /** * Attempts to fill the provided std::string reference with the parsed value of the current string. * * The string is guaranteed to be valid UTF-8. * * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. * We recommend you avoid allocating an std::string unless you need to. * * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a string. * * The string is not guaranteed to be valid UTF-8. See https://simonsapin.github.io/wtf-8/ * * Important: Calling get_wobbly_string() twice on the same document is an error. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_wobbly_string() noexcept; /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_raw_json_string() noexcept; /** * Cast this JSON value to a bool. * * @returns A bool value. * @returns INCORRECT_TYPE if the JSON value is not true or false. */ simdjson_inline simdjson_result get_bool() noexcept; /** * Cast this JSON value to a value when the document is an object or an array. * * You must not have begun iterating through the object or array. When * SIMDJSON_DEVELOPMENT_CHECKS is set to 1 (which is the case when building in Debug mode * by default), and you have already begun iterating, * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use * rewind() to reset the document to its initial state before calling this method. * * @returns A value if a JSON array or object cannot be found. * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). */ simdjson_inline simdjson_result get_value() noexcept; /** * Checks if this JSON value is null. If and only if the value is * null, then it is consumed (we advance). If we find a token that * begins with 'n' but is not 'null', then an error is returned. * * @returns Whether the value is null. * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. */ simdjson_inline simdjson_result is_null() noexcept; /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() & noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); } /** @overload template simdjson_result get() & noexcept */ template simdjson_inline simdjson_result get() && noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); } /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value * * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template simdjson_inline error_code get(T &out) & noexcept; /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. */ simdjson_inline operator array() & noexcept(false); /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. */ simdjson_inline operator object() & noexcept(false); /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline operator uint64_t() noexcept(false); /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. */ simdjson_inline operator int64_t() noexcept(false); /** * Cast this JSON value to a double. * * @returns A double. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. */ simdjson_inline operator double() noexcept(false); /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator std::string_view() noexcept(false); /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator raw_json_string() noexcept(false); /** * Cast this JSON value to a bool. * * @returns A bool value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. */ simdjson_inline operator bool() noexcept(false); /** * Cast this JSON value to a value when the document is an object or an array. * * You must not have begun iterating through the object or array. When * SIMDJSON_DEVELOPMENT_CHECKS is defined, and you have already begun iterating, * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use * rewind() to reset the document to its initial state before calling this method. * * @returns A value value if a JSON array or object cannot be found. * @exception SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). */ simdjson_inline operator value() noexcept(false); #endif /** * This method scans the array and counts the number of elements. * The count_elements method should always be called before you have begun * iterating through the array: it is expected that you are pointing at * the beginning of the array. * The runtime complexity is linear in the size of the array. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. */ simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the object and counts the number of key-value pairs. * The count_fields method should always be called before you have begun * iterating through the object: it is expected that you are pointing at * the beginning of the object. * The runtime complexity is linear in the size of the object. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an object is empty, it is more performant to use * the is_empty() method. */ simdjson_inline simdjson_result count_fields() & noexcept; /** * Get the value at the given index in the array. This function has linear-time complexity. * This function should only be called once on an array instance since the array iterator is not reset between each call. * * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) & noexcept; /** * Begin array iteration. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result begin() & noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result end() & noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. E.g., the array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() * is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field(const char *key) & noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. E.g., the array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to a key * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() * is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](const char *key) & noexcept; /** * Get the type of this JSON value. It does not validate or consume the value. * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result type() noexcept; /** * Checks whether the document is a scalar (string, number, null, Boolean). * Returns false when there it is an array or object. * * @returns true if the type is string, number, null, Boolean * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result is_scalar() noexcept; /** * Checks whether the document is a negative number. * * @returns true if the number if negative. */ simdjson_inline bool is_negative() noexcept; /** * Checks whether the document is an integer number. Note that * this requires to partially parse the number string. If * the value is determined to be an integer, it may still * not parse properly as an integer in subsequent steps * (e.g., it might overflow). * * @returns true if the number if negative. */ simdjson_inline simdjson_result is_integer() noexcept; /** * Determine the number type (integer or floating-point number) as quickly * as possible. This function does not fully validate the input. It is * useful when you only need to classify the numbers, without parsing them. * * If you are planning to retrieve the value or you need full validation, * consider using the get_number() method instead: it will fully parse * and validate the input, and give you access to the type: * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have * an integer greater or equal to 9223372036854775808 * get_number_type() is number_type::signed_integer if we have an * integer that is less than 9223372036854775808 * Otherwise, get_number_type() has value number_type::floating_point_number * * This function requires processing the number string, but it is expected * to be faster than get_number().get_number_type() because it is does not * parse the number value. * * @returns the type of the number */ simdjson_inline simdjson_result get_number_type() noexcept; /** * Attempt to parse an ondemand::number. An ondemand::number may * contain an integer value or a floating-point value, the simdjson * library will autodetect the type. Thus it is a dynamically typed * number. Before accessing the value, you must determine the detected * type. * * number.get_number_type() is number_type::signed_integer if we have * an integer in [-9223372036854775808,9223372036854775808) * You can recover the value by calling number.get_int64() and you * have that number.is_int64() is true. * * number.get_number_type() is number_type::unsigned_integer if we have * an integer in [9223372036854775808,18446744073709551616) * You can recover the value by calling number.get_uint64() and you * have that number.is_uint64() is true. * * Otherwise, number.get_number_type() has value number_type::floating_point_number * and we have a binary64 number. * You can recover the value by calling number.get_double() and you * have that number.is_double() is true. * * You must check the type before accessing the value: it is an error * to call "get_int64()" when number.get_number_type() is not * number_type::signed_integer and when number.is_int64() is false. */ simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; /** * Get the raw JSON for this token. * * The string_view will always point into the input buffer. * * The string_view will start at the beginning of the token, and include the entire token * *as well as all spaces until the next token (or EOF).* This means, for example, that a * string token always begins with a " and is always terminated by the final ", possibly * followed by a number of spaces. * * The string_view is *not* null-terminated. If this is a scalar (string, number, * boolean, or null), the character after the end of the string_view may be the padded buffer. * * Tokens include: * - { * - [ * - "a string (possibly with UTF-8 or backslashed characters like \\\")". * - -1.2e-100 * - true * - false * - null */ simdjson_inline simdjson_result raw_json_token() noexcept; /** * Reset the iterator inside the document instance so we are pointing back at the * beginning of the document, as if it had just been created. It invalidates all * values, objects and arrays that you have created so far (including unescaped strings). */ inline void rewind() noexcept; /** * Returns debugging information. */ inline std::string to_debug_string() noexcept; /** * Some unrecoverable error conditions may render the document instance unusable. * The is_alive() method returns true when the document is still suitable. */ inline bool is_alive() noexcept; /** * Returns the current location in the document if in bounds. */ inline simdjson_result current_location() const noexcept; /** * Returns true if this document has been fully parsed. * If you have consumed the whole document and at_end() returns * false, then there may be trailing content. */ inline bool at_end() const noexcept; /** * Returns the current depth in the document if in bounds. * * E.g., * 0 = finished with document * 1 = document root value (could be [ or {, not yet known) * 2 = , or } inside root array/object * 3 = key or value inside root array/object. */ simdjson_inline int32_t current_depth() const noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard. * * ondemand::parser parser; * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/foo/a/1") == 20 * * It is allowed for a key to be the empty string: * * ondemand::parser parser; * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * * Note that at_pointer() automatically calls rewind between each call. Thus * all values, objects and arrays that you have created so far (including unescaped strings) * are invalidated. After calling at_pointer, you need to consume the result: string values * should be stored in your own variables, arrays should be decoded and stored in your own array-like * structures and so forth. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; /** * Consumes the document and returns a string_view instance corresponding to the * document as represented in JSON. It points inside the original byte array containing * the JSON document. */ simdjson_inline simdjson_result raw_json() noexcept; protected: /** * Consumes the document. */ simdjson_inline error_code consume() noexcept; simdjson_inline document(ondemand::json_iterator &&iter) noexcept; simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; simdjson_inline value_iterator resume_value_iterator() noexcept; simdjson_inline value_iterator get_root_value_iterator() noexcept; simdjson_inline simdjson_result start_or_resume_object() noexcept; static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; // // Fields // json_iterator iter{}; ///< Current position in the document static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 friend class array_iterator; friend class value; friend class ondemand::parser; friend class object; friend class array; friend class field; friend class token; friend class document_stream; friend class document_reference; }; /** * A document_reference is a thin wrapper around a document reference instance. */ class document_reference { public: simdjson_inline document_reference() noexcept; simdjson_inline document_reference(document &d) noexcept; simdjson_inline document_reference(const document_reference &other) noexcept = default; simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; simdjson_inline void rewind() noexcept; simdjson_inline simdjson_result get_array() & noexcept; simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; simdjson_inline simdjson_result raw_json() noexcept; simdjson_inline operator document&() const noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator array() & noexcept(false); simdjson_inline operator object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); simdjson_inline operator value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) & noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; simdjson_inline bool is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; private: document *doc{nullptr}; }; } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(ppc64::ondemand::document &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; simdjson_inline simdjson_result get_array() & noexcept; simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & noexcept; template simdjson_inline simdjson_result get() && noexcept; template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator ppc64::ondemand::array() & noexcept(false); simdjson_inline operator ppc64::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator ppc64::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); simdjson_inline operator ppc64::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) & noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; simdjson_inline bool at_end() const noexcept; simdjson_inline bool is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(ppc64::ondemand::document_reference value, error_code error) noexcept; simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; simdjson_inline simdjson_result get_array() & noexcept; simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator ppc64::ondemand::array() & noexcept(false); simdjson_inline operator ppc64::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator ppc64::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); simdjson_inline operator ppc64::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) & noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H /* end file simdjson/generic/ondemand/document.h for ppc64 */ /* including simdjson/generic/ondemand/document_stream.h for ppc64: #include "simdjson/generic/ondemand/document_stream.h" */ /* begin file simdjson/generic/ondemand/document_stream.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #ifdef SIMDJSON_THREADS_ENABLED #include #include #include #endif namespace simdjson { namespace ppc64 { namespace ondemand { #ifdef SIMDJSON_THREADS_ENABLED /** @private Custom worker class **/ struct stage1_worker { stage1_worker() noexcept = default; stage1_worker(const stage1_worker&) = delete; stage1_worker(stage1_worker&&) = delete; stage1_worker operator=(const stage1_worker&) = delete; ~stage1_worker(); /** * We only start the thread when it is needed, not at object construction, this may throw. * You should only call this once. **/ void start_thread(); /** * Start a stage 1 job. You should first call 'run', then 'finish'. * You must call start_thread once before. */ void run(document_stream * ds, parser * stage1, size_t next_batch_start); /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ void finish(); private: /** * Normally, we would never stop the thread. But we do in the destructor. * This function is only safe assuming that you are not waiting for results. You * should have called run, then finish, and be done. **/ void stop_thread(); std::thread thread{}; /** These three variables define the work done by the thread. **/ ondemand::parser * stage1_thread_parser{}; size_t _next_batch_start{}; document_stream * owner{}; /** * We have two state variables. This could be streamlined to one variable in the future but * we use two for clarity. */ bool has_work{false}; bool can_work{true}; /** * We lock using a mutex. */ std::mutex locking_mutex{}; std::condition_variable cond_var{}; friend class document_stream; }; #endif // SIMDJSON_THREADS_ENABLED /** * A forward-only stream of documents. * * Produced by parser::iterate_many. * */ class document_stream { public: /** * Construct an uninitialized document_stream. * * ```c++ * document_stream docs; * auto error = parser.iterate_many(json).get(docs); * ``` */ simdjson_inline document_stream() noexcept; /** Move one document_stream to another. */ simdjson_inline document_stream(document_stream &&other) noexcept = default; /** Move one document_stream to another. */ simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; simdjson_inline ~document_stream() noexcept; /** * Returns the input size in bytes. */ inline size_t size_in_bytes() const noexcept; /** * After iterating through the stream, this method * returns the number of bytes that were not parsed at the end * of the stream. If truncated_bytes() differs from zero, * then the input was truncated maybe because incomplete JSON * documents were found at the end of the stream. You * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). * * You should only call truncated_bytes() after streaming through all * documents, like so: * * document_stream stream = parser.iterate_many(json,window); * for(auto & doc : stream) { * // do something with doc * } * size_t truncated = stream.truncated_bytes(); * */ inline size_t truncated_bytes() const noexcept; class iterator { public: using value_type = simdjson_result; using reference = value_type; using difference_type = std::ptrdiff_t; using iterator_category = std::input_iterator_tag; /** * Default constructor. */ simdjson_inline iterator() noexcept; /** * Get the current document (or error). */ simdjson_inline simdjson_result operator*() noexcept; /** * Advance to the next document (prefix). */ inline iterator& operator++() noexcept; /** * Check if we're at the end yet. * @param other the end iterator to compare to. */ simdjson_inline bool operator!=(const iterator &other) const noexcept; /** * @private * * Gives the current index in the input document in bytes. * * document_stream stream = parser.parse_many(json,window); * for(auto i = stream.begin(); i != stream.end(); ++i) { * auto doc = *i; * size_t index = i.current_index(); * } * * This function (current_index()) is experimental and the usage * may change in future versions of simdjson: we find the API somewhat * awkward and we would like to offer something friendlier. */ simdjson_inline size_t current_index() const noexcept; /** * @private * * Gives a view of the current document at the current position. * * document_stream stream = parser.iterate_many(json,window); * for(auto i = stream.begin(); i != stream.end(); ++i) { * std::string_view v = i.source(); * } * * The returned string_view instance is simply a map to the (unparsed) * source string: it may thus include white-space characters and all manner * of padding. * * This function (source()) is experimental and the usage * may change in future versions of simdjson: we find the API somewhat * awkward and we would like to offer something friendlier. * */ simdjson_inline std::string_view source() const noexcept; /** * Returns error of the stream (if any). */ inline error_code error() const noexcept; private: simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ document_stream* stream; /** Whether we're finished or not. */ bool finished; friend class document; friend class document_stream; friend class json_iterator; }; /** * Start iterating the documents in the stream. */ simdjson_inline iterator begin() noexcept; /** * The end of the stream, for iterator comparison purposes. */ simdjson_inline iterator end() noexcept; private: document_stream &operator=(const document_stream &) = delete; // Disallow copying document_stream(const document_stream &other) = delete; // Disallow copying /** * Construct a document_stream. Does not allocate or parse anything until the iterator is * used. * * @param parser is a reference to the parser instance used to generate this document_stream * @param buf is the raw byte buffer we need to process * @param len is the length of the raw byte buffer in bytes * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) */ simdjson_inline document_stream( ondemand::parser &parser, const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated ) noexcept; /** * Parse the first document in the buffer. Used by begin(), to handle allocation and * initialization. */ inline void start() noexcept; /** * Parse the next document found in the buffer previously given to document_stream. * * The content should be a valid JSON document encoded as UTF-8. If there is a * UTF-8 BOM, the parser skips it. * * You do NOT need to pre-allocate a parser. This function takes care of * pre-allocating a capacity defined by the batch_size defined when creating the * document_stream object. * * The function returns simdjson::EMPTY if there is no more data to be parsed. * * The function returns simdjson::SUCCESS (as integer = 0) in case of success * and indicates that the buffer has successfully been parsed to the end. * Every document it contained has been parsed without error. * * The function returns an error code from simdjson/simdjson.h in case of failure * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; * the simdjson::error_message function converts these error codes into a string). * * You can also check validity by calling parser.is_valid(). The same parser can * and should be reused for the other documents in the buffer. */ inline void next() noexcept; /** Move the json_iterator of the document to the location of the next document in the stream. */ inline void next_document() noexcept; /** Get the next document index. */ inline size_t next_batch_start() const noexcept; /** Pass the next batch through stage 1 with the given parser. */ inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; // Fields ondemand::parser *parser; const uint8_t *buf; size_t len; size_t batch_size; bool allow_comma_separated; /** * We are going to use just one document instance. The document owns * the json_iterator. It implies that we only ever pass a reference * to the document to the users. */ document doc{}; /** The error (or lack thereof) from the current document. */ error_code error; size_t batch_start{0}; size_t doc_index{}; #ifdef SIMDJSON_THREADS_ENABLED /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ bool use_thread; inline void load_from_stage1_thread() noexcept; /** Start a thread to run stage 1 on the next batch. */ inline void start_stage1_thread() noexcept; /** Wait for the stage 1 thread to finish and capture the results. */ inline void finish_stage1_thread() noexcept; /** The error returned from the stage 1 thread. */ error_code stage1_thread_error{UNINITIALIZED}; /** The thread used to run stage 1 against the next batch in the background. */ std::unique_ptr worker{new(std::nothrow) stage1_worker()}; /** * The parser used to run stage 1 in the background. Will be swapped * with the regular parser when finished. */ ondemand::parser stage1_thread_parser{}; friend struct stage1_worker; #endif // SIMDJSON_THREADS_ENABLED friend class parser; friend class document; friend class json_iterator; friend struct simdjson_result; friend struct internal::simdjson_result_base; }; // document_stream } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(ppc64::ondemand::document_stream &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H /* end file simdjson/generic/ondemand/document_stream.h for ppc64 */ /* including simdjson/generic/ondemand/field.h for ppc64: #include "simdjson/generic/ondemand/field.h" */ /* begin file simdjson/generic/ondemand/field.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { /** * A JSON field (key/value pair) in an object. * * Returned from object iteration. * * Extends from std::pair so you can use C++ algorithms that rely on pairs. */ class field : public std::pair { public: /** * Create a new invalid field. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline field() noexcept; /** * Get the key as a string_view (for higher speed, consider raw_key). * We deliberately use a more cumbersome name (unescaped_key) to force users * to think twice about using it. * * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with * an unescaped C string: e.g., key() == "test". */ simdjson_inline raw_json_string key() const noexcept; /** * Get the field value. */ simdjson_inline ondemand::value &value() & noexcept; /** * @overload ondemand::value &ondemand::value() & noexcept */ simdjson_inline ondemand::value value() && noexcept; protected: simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; friend struct simdjson_result; friend class object_iterator; }; } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(ppc64::ondemand::field &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; simdjson_inline simdjson_result key() noexcept; simdjson_inline simdjson_result value() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H /* end file simdjson/generic/ondemand/field.h for ppc64 */ /* including simdjson/generic/ondemand/object.h for ppc64: #include "simdjson/generic/ondemand/object.h" */ /* begin file simdjson/generic/ondemand/object.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { /** * A forward-only JSON object field iterator. */ class object { public: /** * Create a new invalid object. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline object() noexcept = default; simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. The value instance you get * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to a * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() * is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. The value instance you get * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to a key * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node * as the root of its own JSON document. * * ondemand::parser parser; * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/foo/a/1") == 20 * * It is allowed for a key to be the empty string: * * ondemand::parser parser; * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * * Note that at_pointer() called on the document automatically calls the document's rewind * method between each call. It invalidates all previously accessed arrays, objects and values * that have not been consumed. Yet it is not the case when calling at_pointer on an object * instance: there is no rewind and no invalidation. * * You may call at_pointer more than once on an object, but each time the pointer is advanced * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding * key (as well as the current key) can no longer be used with following JSON pointer calls. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you * can iterate through the object more than once. If you unescape a string within * the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) */ inline simdjson_result reset() & noexcept; /** * This method scans the beginning of the object and checks whether the * object is empty. * The runtime complexity is constant time. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. */ inline simdjson_result is_empty() & noexcept; /** * This method scans the object and counts the number of key-value pairs. * The count_fields method should always be called before you have begun * iterating through the object: it is expected that you are pointing at * the beginning of the object. * The runtime complexity is linear in the size of the object. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an object is empty, it is more performant to use * the is_empty() method. * * Performance hint: You should only call count_fields() as a last * resort as it may require scanning the document twice or more. */ simdjson_inline simdjson_result count_fields() & noexcept; /** * Consumes the object and returns a string_view instance corresponding to the * object as represented in JSON. It points inside the original byte array containing * the JSON document. */ simdjson_inline simdjson_result raw_json() noexcept; protected: /** * Go to the end of the object, no matter where you are right now. */ simdjson_inline error_code consume() noexcept; static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; static simdjson_inline object resume(const value_iterator &iter) noexcept; simdjson_inline object(const value_iterator &iter) noexcept; simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; value_iterator iter{}; friend class value; friend class document; friend struct simdjson_result; }; } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(ppc64::ondemand::object &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; inline simdjson_result raw_json() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H /* end file simdjson/generic/ondemand/object.h for ppc64 */ /* including simdjson/generic/ondemand/object_iterator.h for ppc64: #include "simdjson/generic/ondemand/object_iterator.h" */ /* begin file simdjson/generic/ondemand/object_iterator.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { class object_iterator { public: /** * Create a new invalid object_iterator. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline object_iterator() noexcept = default; // // Iterator interface // // Reads key and value, yielding them to the user. // MUST ONLY BE CALLED ONCE PER ITERATION. simdjson_inline simdjson_result operator*() noexcept; // Assumes it's being compared with the end. true if depth < iter->depth. simdjson_inline bool operator==(const object_iterator &) const noexcept; // Assumes it's being compared with the end. true if depth >= iter->depth. simdjson_inline bool operator!=(const object_iterator &) const noexcept; // Checks for ']' and ',' simdjson_inline object_iterator &operator++() noexcept; private: /** * The underlying JSON iterator. * * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object * is first used, and never changes afterwards. */ value_iterator iter{}; simdjson_inline object_iterator(const value_iterator &iter) noexcept; friend struct simdjson_result; friend class object; }; } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(ppc64::ondemand::object_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; // // Iterator interface // // Reads key and value, yielding them to the user. simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. // Assumes it's being compared with the end. true if depth < iter->depth. simdjson_inline bool operator==(const simdjson_result &) const noexcept; // Assumes it's being compared with the end. true if depth >= iter->depth. simdjson_inline bool operator!=(const simdjson_result &) const noexcept; // Checks for ']' and ',' simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H /* end file simdjson/generic/ondemand/object_iterator.h for ppc64 */ /* including simdjson/generic/ondemand/serialization.h for ppc64: #include "simdjson/generic/ondemand/serialization.h" */ /* begin file simdjson/generic/ondemand/serialization.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { /** * Create a string-view instance out of a document instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ inline simdjson_result to_json_string(ppc64::ondemand::document& x) noexcept; /** * Create a string-view instance out of a value instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. The value must * not have been accessed previously. It does not * validate the content. */ inline simdjson_result to_json_string(ppc64::ondemand::value& x) noexcept; /** * Create a string-view instance out of an object instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ inline simdjson_result to_json_string(ppc64::ondemand::object& x) noexcept; /** * Create a string-view instance out of an array instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ inline simdjson_result to_json_string(ppc64::ondemand::array& x) noexcept; inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); } // namespace simdjson /** * We want to support argument-dependent lookup (ADL). * Hence we should define operator<< in the namespace * where the argument (here value, object, etc.) resides. * Credit: @madhur4127 * See https://github.com/simdjson/simdjson/issues/1768 */ namespace simdjson { namespace ppc64 { namespace ondemand { /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The element. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::value x); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The array. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::array value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The array. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document& value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document_reference& value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The object. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::object value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif }}} // namespace simdjson::ppc64::ondemand #endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H /* end file simdjson/generic/ondemand/serialization.h for ppc64 */ // Inline definitions /* including simdjson/generic/ondemand/array-inl.h for ppc64: #include "simdjson/generic/ondemand/array-inl.h" */ /* begin file simdjson/generic/ondemand/array-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { // // ### Live States // // While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is // always SUCCESS: // // - Start: This is the state when the array is first found and the iterator is just past the `{`. // In this state, at_start == true. // - Next: After we hand a scalar value to the user, or an array/object which they then fully // iterate over, the iterator is at the `,` before the next value (or `]`). In this state, // depth == iter->depth, at_start == false, and error == SUCCESS. // - Unfinished Business: When we hand an array/object to the user which they do not fully // iterate over, we need to finish that iteration by skipping child values until we reach the // Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. // // ## Error States // // In error states, we will yield exactly one more value before stopping. iter->depth == depth // and at_start is always false. We decrement after yielding the error, moving to the Finished // state. // // - Chained Error: When the array iterator is part of an error chain--for example, in // `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an // array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and // iter->depth == depth, and at_start == false. We decrement depth when we yield the error. // - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, // we flag that as an error and treat it exactly the same as a Chained Error. In this state, // error == TAPE_ERROR, iter->depth == depth, and at_start == false. // // ## Terminal State // // The terminal state has iter->depth < depth. at_start is always false. // // - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this // by decrementing depth. In this state, iter->depth < depth, at_start == false, and // error == SUCCESS. // simdjson_inline array::array(const value_iterator &_iter) noexcept : iter{_iter} { } simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { // We don't need to know if the array is empty to start iteration, but we do want to know if there // is an error--thus `simdjson_unused`. simdjson_unused bool has_value; SIMDJSON_TRY( iter.start_array().get(has_value) ); return array(iter); } simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { simdjson_unused bool has_value; SIMDJSON_TRY( iter.start_root_array().get(has_value) ); return array(iter); } simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { bool has_value; SIMDJSON_TRY(iter.started_array().get(has_value)); return array(iter); } simdjson_inline simdjson_result array::begin() noexcept { #if SIMDJSON_DEVELOPMENT_CHECKS if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif return array_iterator(iter); } simdjson_inline simdjson_result array::end() noexcept { return array_iterator(iter); } simdjson_inline error_code array::consume() noexcept { auto error = iter.json_iter().skip_child(iter.depth()-1); if(error) { iter.abandon(); } return error; } simdjson_inline simdjson_result array::raw_json() noexcept { const uint8_t * starting_point{iter.peek_start()}; auto error = consume(); if(error) { return error; } // After 'consume()', we could be left pointing just beyond the document, but that // is ok because we are not going to dereference the final pointer position, we just // use it to compute the length in bytes. const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_inline simdjson_result array::count_elements() & noexcept { size_t count{0}; // Important: we do not consume any of the values. for(simdjson_unused auto v : *this) { count++; } // The above loop will always succeed, but we want to report errors. if(iter.error()) { return iter.error(); } // We need to move back at the start because we expect users to iterate through // the array after counting the number of elements. iter.reset_array(); return count; } SIMDJSON_POP_DISABLE_WARNINGS simdjson_inline simdjson_result array::is_empty() & noexcept { bool is_not_empty; auto error = iter.reset_array().get(is_not_empty); if(error) { return error; } return !is_not_empty; } inline simdjson_result array::reset() & noexcept { return iter.reset_array(); } inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } json_pointer = json_pointer.substr(1); // - means "the append position" or "the element after the end of the array" // We don't support this, because we're returning a real element, not a position. if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } // Read the array index size_t array_index = 0; size_t i; for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { uint8_t digit = uint8_t(json_pointer[i] - '0'); // Check for non-digit in array index. If it's there, we're trying to get a field in an object if (digit > 9) { return INCORRECT_TYPE; } array_index = array_index*10 + digit; } // 0 followed by other digits is invalid if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" // Empty string is invalid; so is a "/" with no digits before it if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" // Get the child auto child = at(array_index); // If there is an error, it ends here if(child.error()) { return child; } // If there is a /, we're not done yet, call recursively. if (i < json_pointer.length()) { child = child.at_pointer(json_pointer.substr(i)); } return child; } simdjson_inline simdjson_result array::at(size_t index) noexcept { size_t i = 0; for (auto value : *this) { if (i == index) { return value; } i++; } return INDEX_OUT_OF_BOUNDS; } } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( ppc64::ondemand::array &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result simdjson_result::begin() noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() noexcept { if (error()) { return error(); } return first.end(); } simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { if (error()) { return error(); } return first.is_empty(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H /* end file simdjson/generic/ondemand/array-inl.h for ppc64 */ /* including simdjson/generic/ondemand/array_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/array_iterator-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept : iter{_iter} {} simdjson_inline simdjson_result array_iterator::operator*() noexcept { if (iter.error()) { iter.abandon(); return iter.error(); } return value(iter.child()); } simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { return !(*this != other); } simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { return iter.is_open(); } simdjson_inline array_iterator &array_iterator::operator++() noexcept { error_code error; // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. if (( error = iter.error() )) { return *this; } if (( error = iter.skip_child() )) { return *this; } if (( error = iter.has_next_element().error() )) { return *this; } return *this; } } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( ppc64::ondemand::array_iterator &&value ) noexcept : ppc64::implementation_simdjson_result_base(std::forward(value)) { first.iter.assert_is_valid(); } simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : ppc64::implementation_simdjson_result_base({}, error) { } simdjson_inline simdjson_result simdjson_result::operator*() noexcept { if (error()) { return error(); } return *first; } simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return !error(); } return first == other.first; } simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return error(); } return first != other.first; } simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { // Clear the error if there is one, so we don't yield it twice if (error()) { second = SUCCESS; return *this; } ++(first); return *this; } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H /* end file simdjson/generic/ondemand/array_iterator-inl.h for ppc64 */ /* including simdjson/generic/ondemand/document-inl.h for ppc64: #include "simdjson/generic/ondemand/document-inl.h" */ /* begin file simdjson/generic/ondemand/document-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept : iter{std::forward(_iter)} { logger::log_start_value(iter, "document"); } simdjson_inline document document::start(json_iterator &&iter) noexcept { return document(std::forward(iter)); } inline void document::rewind() noexcept { iter.rewind(); } inline std::string document::to_debug_string() noexcept { return iter.to_string(); } inline simdjson_result document::current_location() const noexcept { return iter.current_location(); } inline int32_t document::current_depth() const noexcept { return iter.depth(); } inline bool document::at_end() const noexcept { return iter.at_end(); } inline bool document::is_alive() noexcept { return iter.is_alive(); } simdjson_inline value_iterator document::resume_value_iterator() noexcept { return value_iterator(&iter, 1, iter.root_position()); } simdjson_inline value_iterator document::get_root_value_iterator() noexcept { return resume_value_iterator(); } simdjson_inline simdjson_result document::start_or_resume_object() noexcept { if (iter.at_root()) { return get_object(); } else { return object::resume(resume_value_iterator()); } } simdjson_inline simdjson_result document::get_value() noexcept { // Make sure we start any arrays or objects before returning, so that start_root_() // gets called. // It is the convention throughout the code that the macro `SIMDJSON_DEVELOPMENT_CHECKS` determines whether // we check for OUT_OF_ORDER_ITERATION. Proper on::demand code should never trigger this error. #if SIMDJSON_DEVELOPMENT_CHECKS if (!iter.at_root()) { return OUT_OF_ORDER_ITERATION; } #endif // assert_at_root() serves two purposes: in Debug mode, whether or not // SIMDJSON_DEVELOPMENT_CHECKS is set or not, it checks that we are at the root of // the document (this will typically be redundant). In release mode, it generates // SIMDJSON_ASSUME statements to allow the compiler to make assumptions. iter.assert_at_root(); switch (*iter.peek()) { case '[': { // The following lines check that the document ends with ]. auto value_iterator = get_root_value_iterator(); auto error = value_iterator.check_root_array(); if(error) { return error; } return value(get_root_value_iterator()); } case '{': { // The following lines would check that the document ends with }. auto value_iterator = get_root_value_iterator(); auto error = value_iterator.check_root_object(); if(error) { return error; } return value(get_root_value_iterator()); } default: // Unfortunately, scalar documents are a special case in simdjson and they cannot // be safely converted to value instances. return SCALAR_DOCUMENT_AS_VALUE; } } simdjson_inline simdjson_result document::get_array() & noexcept { auto value = get_root_value_iterator(); return array::start_root(value); } simdjson_inline simdjson_result document::get_object() & noexcept { auto value = get_root_value_iterator(); return object::start_root(value); } /** * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should * give an error, so we check for trailing content. We want to disallow trailing * content. * Thus, in several implementations below, we pass a 'true' parameter value to * a get_root_value_iterator() method: this indicates that we disallow trailing content. */ simdjson_inline simdjson_result document::get_uint64() noexcept { return get_root_value_iterator().get_root_uint64(true); } simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { return get_root_value_iterator().get_root_uint64_in_string(true); } simdjson_inline simdjson_result document::get_int64() noexcept { return get_root_value_iterator().get_root_int64(true); } simdjson_inline simdjson_result document::get_int64_in_string() noexcept { return get_root_value_iterator().get_root_int64_in_string(true); } simdjson_inline simdjson_result document::get_double() noexcept { return get_root_value_iterator().get_root_double(true); } simdjson_inline simdjson_result document::get_double_in_string() noexcept { return get_root_value_iterator().get_root_double_in_string(true); } simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { return get_root_value_iterator().get_root_string(true, allow_replacement); } template simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); } simdjson_inline simdjson_result document::get_wobbly_string() noexcept { return get_root_value_iterator().get_root_wobbly_string(true); } simdjson_inline simdjson_result document::get_raw_json_string() noexcept { return get_root_value_iterator().get_root_raw_json_string(true); } simdjson_inline simdjson_result document::get_bool() noexcept { return get_root_value_iterator().get_root_bool(true); } simdjson_inline simdjson_result document::is_null() noexcept { return get_root_value_iterator().is_root_null(true); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } template simdjson_inline error_code document::get(T &out) & noexcept { return get().get(out); } template simdjson_inline error_code document::get(T &out) && noexcept { return std::forward(*this).get().get(out); } #if SIMDJSON_EXCEPTIONS simdjson_inline document::operator array() & noexcept(false) { return get_array(); } simdjson_inline document::operator object() & noexcept(false) { return get_object(); } simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document::operator double() noexcept(false) { return get_double(); } simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document::operator value() noexcept(false) { return get_value(); } #endif simdjson_inline simdjson_result document::count_elements() & noexcept { auto a = get_array(); simdjson_result answer = a.count_elements(); /* If there was an array, we are now left pointing at its first element. */ if(answer.error() == SUCCESS) { rewind(); } return answer; } simdjson_inline simdjson_result document::count_fields() & noexcept { auto a = get_object(); simdjson_result answer = a.count_fields(); /* If there was an object, we are now left pointing at its first element. */ if(answer.error() == SUCCESS) { rewind(); } return answer; } simdjson_inline simdjson_result document::at(size_t index) & noexcept { auto a = get_array(); return a.at(index); } simdjson_inline simdjson_result document::begin() & noexcept { return get_array().begin(); } simdjson_inline simdjson_result document::end() & noexcept { return {}; } simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { return start_or_resume_object()[key]; } simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { return start_or_resume_object()[key]; } simdjson_inline error_code document::consume() noexcept { auto error = iter.skip_child(0); if(error) { iter.abandon(); } return error; } simdjson_inline simdjson_result document::raw_json() noexcept { auto _iter = get_root_value_iterator(); const uint8_t * starting_point{_iter.peek_start()}; auto error = consume(); if(error) { return error; } // After 'consume()', we could be left pointing just beyond the document, but that // is ok because we are not going to dereference the final pointer position, we just // use it to compute the length in bytes. const uint8_t * final_point{iter.unsafe_pointer()}; return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } simdjson_inline simdjson_result document::type() noexcept { return get_root_value_iterator().type(); } simdjson_inline simdjson_result document::is_scalar() noexcept { json_type this_type; auto error = type().get(this_type); if(error) { return error; } return ! ((this_type == json_type::array) || (this_type == json_type::object)); } simdjson_inline bool document::is_negative() noexcept { return get_root_value_iterator().is_root_negative(); } simdjson_inline simdjson_result document::is_integer() noexcept { return get_root_value_iterator().is_root_integer(true); } simdjson_inline simdjson_result document::get_number_type() noexcept { return get_root_value_iterator().get_root_number_type(true); } simdjson_inline simdjson_result document::get_number() noexcept { return get_root_value_iterator().get_root_number(true); } simdjson_inline simdjson_result document::raw_json_token() noexcept { auto _iter = get_root_value_iterator(); return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); } simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { rewind(); // Rewind the document each time at_pointer is called if (json_pointer.empty()) { return this->get_value(); } json_type t; SIMDJSON_TRY(type().get(t)); switch (t) { case json_type::array: return (*this).get_array().at_pointer(json_pointer); case json_type::object: return (*this).get_object().at_pointer(json_pointer); default: return INVALID_JSON_POINTER; } } } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( ppc64::ondemand::document &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base( error ) { } simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } template simdjson_inline simdjson_result simdjson_result::get() & noexcept { if (error()) { return error(); } return first.get(); } template simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first).get(); } template simdjson_inline error_code simdjson_result::get(T &out) & noexcept { if (error()) { return error(); } return first.get(out); } template simdjson_inline error_code simdjson_result::get(T &out) && noexcept { if (error()) { return error(); } return std::forward(first).get(out); } template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first); } template<> simdjson_inline error_code simdjson_result::get(ppc64::ondemand::document &out) & noexcept = delete; template<> simdjson_inline error_code simdjson_result::get(ppc64::ondemand::document &out) && noexcept { if (error()) { return error(); } out = std::forward(first); return SUCCESS; } simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } simdjson_inline bool simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS simdjson_inline simdjson_result::operator ppc64::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator ppc64::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator ppc64::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator ppc64::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } simdjson_inline bool simdjson_result::at_end() const noexcept { if (error()) { return error(); } return first.at_end(); } simdjson_inline int32_t simdjson_result::current_depth() const noexcept { if (error()) { return error(); } return first.current_depth(); } simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } } // namespace simdjson namespace simdjson { namespace ppc64 { namespace ondemand { simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } /** * The document_reference instances are used primarily/solely for streams of JSON * documents. * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should * give an error, so we check for trailing content. * * However, for streams of JSON documents, we want to be able to start from * "321" "321" "321" * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() * successfully each time. * * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: * this indicates that we allow trailing content. */ simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } template simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } #if SIMDJSON_EXCEPTIONS simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } #endif simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} simdjson_inline document_reference::operator document&() const noexcept { return *doc; } } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::document_reference value, error_code error) noexcept : implementation_simdjson_result_base(std::forward(value), error) {} simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS simdjson_inline simdjson_result::operator ppc64::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator ppc64::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator ppc64::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator ppc64::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H /* end file simdjson/generic/ondemand/document-inl.h for ppc64 */ /* including simdjson/generic/ondemand/document_stream-inl.h for ppc64: #include "simdjson/generic/ondemand/document_stream-inl.h" */ /* begin file simdjson/generic/ondemand/document_stream-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #include namespace simdjson { namespace ppc64 { namespace ondemand { #ifdef SIMDJSON_THREADS_ENABLED inline void stage1_worker::finish() { // After calling "run" someone would call finish() to wait // for the end of the processing. // This function will wait until either the thread has done // the processing or, else, the destructor has been called. std::unique_lock lock(locking_mutex); cond_var.wait(lock, [this]{return has_work == false;}); } inline stage1_worker::~stage1_worker() { // The thread may never outlive the stage1_worker instance // and will always be stopped/joined before the stage1_worker // instance is gone. stop_thread(); } inline void stage1_worker::start_thread() { std::unique_lock lock(locking_mutex); if(thread.joinable()) { return; // This should never happen but we never want to create more than one thread. } thread = std::thread([this]{ while(true) { std::unique_lock thread_lock(locking_mutex); // We wait for either "run" or "stop_thread" to be called. cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); // If, for some reason, the stop_thread() method was called (i.e., the // destructor of stage1_worker is called, then we want to immediately destroy // the thread (and not do any more processing). if(!can_work) { break; } this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, this->_next_batch_start); this->has_work = false; // The condition variable call should be moved after thread_lock.unlock() for performance // reasons but thread sanitizers may report it as a data race if we do. // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock cond_var.notify_one(); // will notify "finish" thread_lock.unlock(); } } ); } inline void stage1_worker::stop_thread() { std::unique_lock lock(locking_mutex); // We have to make sure that all locks can be released. can_work = false; has_work = false; cond_var.notify_all(); lock.unlock(); if(thread.joinable()) { thread.join(); } } inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { std::unique_lock lock(locking_mutex); owner = ds; _next_batch_start = next_batch_start; stage1_thread_parser = stage1; has_work = true; // The condition variable call should be moved after thread_lock.unlock() for performance // reasons but thread sanitizers may report it as a data race if we do. // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock cond_var.notify_one(); // will notify the thread lock that we have work lock.unlock(); } #endif // SIMDJSON_THREADS_ENABLED simdjson_inline document_stream::document_stream( ondemand::parser &_parser, const uint8_t *_buf, size_t _len, size_t _batch_size, bool _allow_comma_separated ) noexcept : parser{&_parser}, buf{_buf}, len{_len}, batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, allow_comma_separated{_allow_comma_separated}, error{SUCCESS} #ifdef SIMDJSON_THREADS_ENABLED , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change #endif { #ifdef SIMDJSON_THREADS_ENABLED if(worker.get() == nullptr) { error = MEMALLOC; } #endif } simdjson_inline document_stream::document_stream() noexcept : parser{nullptr}, buf{nullptr}, len{0}, batch_size{0}, allow_comma_separated{false}, error{UNINITIALIZED} #ifdef SIMDJSON_THREADS_ENABLED , use_thread(false) #endif { } simdjson_inline document_stream::~document_stream() noexcept { #ifdef SIMDJSON_THREADS_ENABLED worker.reset(); #endif } inline size_t document_stream::size_in_bytes() const noexcept { return len; } inline size_t document_stream::truncated_bytes() const noexcept { if(error == CAPACITY) { return len - batch_start; } return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; } simdjson_inline document_stream::iterator::iterator() noexcept : stream{nullptr}, finished{true} { } simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept : stream{_stream}, finished{is_end} { } simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { //if(stream->error) { return stream->error; } return simdjson_result(stream->doc, stream->error); } simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { // If there is an error, then we want the iterator // to be finished, no matter what. (E.g., we do not // keep generating documents with errors, or go beyond // a document with errors.) // // Users do not have to call "operator*()" when they use operator++, // so we need to end the stream in the operator++ function. // // Note that setting finished = true is essential otherwise // we would enter an infinite loop. if (stream->error) { finished = true; } // Note that stream->error() is guarded against error conditions // (it will immediately return if stream->error casts to false). // In effect, this next function does nothing when (stream->error) // is true (hence the risk of an infinite loop). stream->next(); // If that was the last document, we're finished. // It is the only type of error we do not want to appear // in operator*. if (stream->error == EMPTY) { finished = true; } // If we had any other kind of error (not EMPTY) then we want // to pass it along to the operator* and we cannot mark the result // as "finished" just yet. return *this; } simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { return finished != other.finished; } simdjson_inline document_stream::iterator document_stream::begin() noexcept { start(); // If there are no documents, we're finished. return iterator(this, error == EMPTY); } simdjson_inline document_stream::iterator document_stream::end() noexcept { return iterator(this, true); } inline void document_stream::start() noexcept { if (error) { return; } error = parser->allocate(batch_size); if (error) { return; } // Always run the first stage 1 parse immediately batch_start = 0; error = run_stage1(*parser, batch_start); while(error == EMPTY) { // In exceptional cases, we may start with an empty block batch_start = next_batch_start(); if (batch_start >= len) { return; } error = run_stage1(*parser, batch_start); } if (error) { return; } doc_index = batch_start; doc = document(json_iterator(&buf[batch_start], parser)); doc.iter._streaming = true; #ifdef SIMDJSON_THREADS_ENABLED if (use_thread && next_batch_start() < len) { // Kick off the first thread on next batch if needed error = stage1_thread_parser.allocate(batch_size); if (error) { return; } worker->start_thread(); start_stage1_thread(); if (error) { return; } } #endif // SIMDJSON_THREADS_ENABLED } inline void document_stream::next() noexcept { // We always enter at once once in an error condition. if (error) { return; } next_document(); if (error) { return; } auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; // Check if at end of structural indexes (i.e. at end of batch) if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { error = EMPTY; // Load another batch (if available) while (error == EMPTY) { batch_start = next_batch_start(); if (batch_start >= len) { break; } #ifdef SIMDJSON_THREADS_ENABLED if(use_thread) { load_from_stage1_thread(); } else { error = run_stage1(*parser, batch_start); } #else error = run_stage1(*parser, batch_start); #endif /** * Whenever we move to another window, we need to update all pointers to make * it appear as if the input buffer started at the beginning of the window. * * Take this input: * * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] * * Say you process the following window... * * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' * * When you do so, the json_iterator has a pointer at the beginning of the memory region * (pointing at the beginning of '{"z"...'. * * When you move to the window that starts at... * * '[7, 10, 9] [15, 11, 12, 13] ... * * then it is not sufficient to just run stage 1. You also need to re-anchor the * json_iterator so that it believes we are starting at '[7, 10, 9]...'. * * Under the DOM front-end, this gets done automatically because the parser owns * the pointer the data, and when you call stage1 and then stage2 on the same * parser, then stage2 will run on the pointer acquired by stage1. * * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that * we used. But json_iterator has no callback when stage1 is called on the parser. * In fact, I think that the parser is unaware of json_iterator. * * * So we need to re-anchor the json_iterator after each call to stage 1 so that * all of the pointers are in sync. */ doc.iter = json_iterator(&buf[batch_start], parser); doc.iter._streaming = true; /** * End of resync. */ if (error) { continue; } // If the error was EMPTY, we may want to load another batch. doc_index = batch_start; } } } inline void document_stream::next_document() noexcept { // Go to next place where depth=0 (document depth) error = doc.iter.skip_child(0); if (error) { return; } // Always set depth=1 at the start of document doc.iter._depth = 1; // consume comma if comma separated is allowed if (allow_comma_separated) { doc.iter.consume_character(','); } // Resets the string buffer at the beginning, thus invalidating the strings. doc.iter._string_buf_loc = parser->string_buf.get(); doc.iter._root = doc.iter.position(); } inline size_t document_stream::next_batch_start() const noexcept { return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; } inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { // This code only updates the structural index in the parser, it does not update any json_iterator // instance. size_t remaining = len - _batch_start; if (remaining <= batch_size) { return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); } else { return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); } } simdjson_inline size_t document_stream::iterator::current_index() const noexcept { return stream->doc_index; } simdjson_inline std::string_view document_stream::iterator::source() const noexcept { auto depth = stream->doc.iter.depth(); auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); // If at root, process the first token to determine if scalar value if (stream->doc.iter.at_root()) { switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { case '{': case '[': // Depth=1 already at start of document break; case '}': case ']': depth--; break; default: // Scalar value document // TODO: Remove any trailing whitespaces // This returns a string spanning from start of value to the beginning of the next document (excluded) return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1); } cur_struct_index++; } while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { case '{': case '[': depth++; break; case '}': case ']': depth--; break; } if (depth == 0) { break; } cur_struct_index++; } return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; } inline error_code document_stream::iterator::error() const noexcept { return stream->error; } #ifdef SIMDJSON_THREADS_ENABLED inline void document_stream::load_from_stage1_thread() noexcept { worker->finish(); // Swap to the parser that was loaded up in the thread. Make sure the parser has // enough memory to swap to, as well. std::swap(stage1_thread_parser,*parser); error = stage1_thread_error; if (error) { return; } // If there's anything left, start the stage 1 thread! if (next_batch_start() < len) { start_stage1_thread(); } } inline void document_stream::start_stage1_thread() noexcept { // we call the thread on a lambda that will update // this->stage1_thread_error // there is only one thread that may write to this value // TODO this is NOT exception-safe. this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error size_t _next_batch_start = this->next_batch_start(); worker->run(this, & this->stage1_thread_parser, _next_batch_start); } #endif // SIMDJSON_THREADS_ENABLED } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result::simdjson_result( ppc64::ondemand::document_stream &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } } #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H /* end file simdjson/generic/ondemand/document_stream-inl.h for ppc64 */ /* including simdjson/generic/ondemand/field-inl.h for ppc64: #include "simdjson/generic/ondemand/field-inl.h" */ /* begin file simdjson/generic/ondemand/field-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { // clang 6 doesn't think the default constructor can be noexcept, so we make it explicit simdjson_inline field::field() noexcept : std::pair() {} simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept : std::pair(key, std::forward(value)) { } simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { raw_json_string key; SIMDJSON_TRY( parent_iter.field_key().get(key) ); SIMDJSON_TRY( parent_iter.field_value() ); return field::start(parent_iter, key); } simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { return field(key, parent_iter.child()); } simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); first.consume(); return answer; } simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; } simdjson_inline value &field::value() & noexcept { return second; } simdjson_inline value field::value() && noexcept { return std::forward(*this).second; } } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( ppc64::ondemand::field &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result simdjson_result::key() noexcept { if (error()) { return error(); } return first.key(); } simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { if (error()) { return error(); } return first.unescaped_key(allow_replacement); } simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H /* end file simdjson/generic/ondemand/field-inl.h for ppc64 */ /* including simdjson/generic/ondemand/json_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/json_iterator-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept : token(std::forward(other.token)), parser{other.parser}, _string_buf_loc{other._string_buf_loc}, error{other.error}, _depth{other._depth}, _root{other._root}, _streaming{other._streaming} { other.parser = nullptr; } simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { token = other.token; parser = other.parser; _string_buf_loc = other._string_buf_loc; error = other.error; _depth = other._depth; _root = other._root; _streaming = other._streaming; other.parser = nullptr; return *this; } simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept : token(buf, &_parser->implementation->structural_indexes[0]), parser{_parser}, _string_buf_loc{parser->string_buf.get()}, _depth{1}, _root{parser->implementation->structural_indexes.get()}, _streaming{false} { logger::log_headers(); #if SIMDJSON_CHECK_EOF assert_more_tokens(); #endif } inline void json_iterator::rewind() noexcept { token.set_position( root_position() ); logger::log_headers(); // We start again _string_buf_loc = parser->string_buf.get(); _depth = 1; } inline bool json_iterator::balanced() const noexcept { token_iterator ti(token); int32_t count{0}; ti.set_position( root_position() ); while(ti.peek() <= peek_last()) { switch (*ti.return_current_and_advance()) { case '[': case '{': count++; break; case ']': case '}': count--; break; default: break; } } return count == 0; } // GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller // relating depth and parent_depth, which is a desired effect. The warning does not show up if the // skip_child() function is not marked inline). SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { if (depth() <= parent_depth) { return SUCCESS; } switch (*return_current_and_advance()) { // TODO consider whether matching braces is a requirement: if non-matching braces indicates // *missing* braces, then future lookups are not in the object/arrays they think they are, // violating the rule "validate enough structure that the user can be confident they are // looking at the right values." // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth // For the first open array/object in a value, we've already incremented depth, so keep it the same // We never stop at colon, but if we did, it wouldn't affect depth case '[': case '{': case ':': logger::log_start_value(*this, "skip"); break; // If there is a comma, we have just finished a value in an array/object, and need to get back in case ',': logger::log_value(*this, "skip"); break; // ] or } means we just finished a value and need to jump out of the array/object case ']': case '}': logger::log_end_value(*this, "skip"); _depth--; if (depth() <= parent_depth) { return SUCCESS; } #if SIMDJSON_CHECK_EOF // If there are no more tokens, the parent is incomplete. if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } #endif // SIMDJSON_CHECK_EOF break; case '"': if(*peek() == ':') { // We are at a key!!! // This might happen if you just started an object and you skip it immediately. // Performance note: it would be nice to get rid of this check as it is somewhat // expensive. // https://github.com/simdjson/simdjson/issues/1742 logger::log_value(*this, "key"); return_current_and_advance(); // eat up the ':' break; // important!!! } simdjson_fallthrough; // Anything else must be a scalar value default: // For the first scalar, we will have incremented depth already, so we decrement it here. logger::log_value(*this, "skip"); _depth--; if (depth() <= parent_depth) { return SUCCESS; } break; } // Now that we've considered the first value, we only increment/decrement for arrays/objects while (position() < end_position()) { switch (*return_current_and_advance()) { case '[': case '{': logger::log_start_value(*this, "skip"); _depth++; break; // TODO consider whether matching braces is a requirement: if non-matching braces indicates // *missing* braces, then future lookups are not in the object/arrays they think they are, // violating the rule "validate enough structure that the user can be confident they are // looking at the right values." // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth case ']': case '}': logger::log_end_value(*this, "skip"); _depth--; if (depth() <= parent_depth) { return SUCCESS; } break; default: logger::log_value(*this, "skip", ""); break; } } return report_error(TAPE_ERROR, "not enough close braces"); } SIMDJSON_POP_DISABLE_WARNINGS simdjson_inline bool json_iterator::at_root() const noexcept { return position() == root_position(); } simdjson_inline bool json_iterator::is_single_token() const noexcept { return parser->implementation->n_structural_indexes == 1; } simdjson_inline bool json_iterator::streaming() const noexcept { return _streaming; } simdjson_inline token_position json_iterator::root_position() const noexcept { return _root; } simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { SIMDJSON_ASSUME( _depth == 1 ); } simdjson_inline void json_iterator::assert_at_root() const noexcept { SIMDJSON_ASSUME( _depth == 1 ); #ifndef SIMDJSON_CLANG_VISUAL_STUDIO // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument // has side effects that will be discarded. SIMDJSON_ASSUME( token.position() == _root ); #endif } simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { assert_valid_position(token._position + required_tokens - 1); } simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { #ifndef SIMDJSON_CLANG_VISUAL_STUDIO SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); #endif } simdjson_inline bool json_iterator::at_end() const noexcept { return position() == end_position(); } simdjson_inline token_position json_iterator::end_position() const noexcept { uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; return &parser->implementation->structural_indexes[n_structural_indexes]; } inline std::string json_iterator::to_string() const noexcept { if( !is_alive() ) { return "dead json_iterator instance"; } const char * current_structural = reinterpret_cast(token.peek()); return std::string("json_iterator [ depth : ") + std::to_string(_depth) + std::string(", structural : '") + std::string(current_structural,1) + std::string("', offset : ") + std::to_string(token.current_offset()) + std::string("', error : ") + error_message(error) + std::string(" ]"); } inline simdjson_result json_iterator::current_location() const noexcept { if (!is_alive()) { // Unrecoverable error if (!at_root()) { return reinterpret_cast(token.peek(-1)); } else { return reinterpret_cast(token.peek()); } } if (at_end()) { return OUT_OF_BOUNDS; } return reinterpret_cast(token.peek()); } simdjson_inline bool json_iterator::is_alive() const noexcept { return parser; } simdjson_inline void json_iterator::abandon() noexcept { parser = nullptr; _depth = 0; } simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(); #endif // SIMDJSON_CHECK_EOF return token.return_current_and_advance(); } simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { // deliberately done without safety guard: return token.peek(); } simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(delta+1); #endif // SIMDJSON_CHECK_EOF return token.peek(delta); } simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(delta+1); #endif // #if SIMDJSON_CHECK_EOF return token.peek_length(delta); } simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { // todo: currently we require end-of-string buffering, but the following // assert_valid_position should be turned on if/when we lift that condition. // assert_valid_position(position); // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF // is ON by default, we have no choice but to disable it for real with a comment. return token.peek(position); } simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { #if SIMDJSON_CHECK_EOF assert_valid_position(position); #endif // SIMDJSON_CHECK_EOF return token.peek_length(position); } simdjson_inline token_position json_iterator::last_position() const noexcept { // The following line fails under some compilers... // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); // since it has side-effects. uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; SIMDJSON_ASSUME(n_structural_indexes > 0); return &parser->implementation->structural_indexes[n_structural_indexes - 1]; } simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { return token.peek(last_position()); } simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); SIMDJSON_ASSUME(_depth == parent_depth + 1); _depth = parent_depth; } simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); SIMDJSON_ASSUME(_depth == child_depth - 1); _depth = child_depth; } simdjson_inline depth_t json_iterator::depth() const noexcept { return _depth; } simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { return _string_buf_loc; } simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); logger::log_error(*this, message); error = _error; return error; } simdjson_inline token_position json_iterator::position() const noexcept { return token.position(); } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { return parser->unescape(in, _string_buf_loc, allow_replacement); } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { return parser->unescape_wobbly(in, _string_buf_loc); } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); SIMDJSON_ASSUME(_depth == child_depth - 1); #if SIMDJSON_DEVELOPMENT_CHECKS #ifndef SIMDJSON_CLANG_VISUAL_STUDIO SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); #endif #endif token.set_position(position); _depth = child_depth; } simdjson_inline error_code json_iterator::consume_character(char c) noexcept { if (*peek() == c) { return_current_and_advance(); return SUCCESS; } return TAPE_ERROR; } #if SIMDJSON_DEVELOPMENT_CHECKS simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; } simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } } #endif simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); logger::log_error(*this, message); return _error; } simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { // This function is not expected to be called in performance-sensitive settings. // Let us guard against silly cases: if((N < max_len) || (N == 0)) { return false; } // Copy to the buffer. std::memcpy(tmpbuf, json, max_len); if(N > max_len) { // We pad whatever remains with ' '. std::memset(tmpbuf + max_len, ' ', N - max_len); } return true; } } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::json_iterator &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H /* end file simdjson/generic/ondemand/json_iterator-inl.h for ppc64 */ /* including simdjson/generic/ondemand/json_type-inl.h for ppc64: #include "simdjson/generic/ondemand/json_type-inl.h" */ /* begin file simdjson/generic/ondemand/json_type-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { switch (type) { case json_type::array: out << "array"; break; case json_type::object: out << "object"; break; case json_type::number: out << "number"; break; case json_type::string: out << "string"; break; case json_type::boolean: out << "boolean"; break; case json_type::null: out << "null"; break; default: SIMDJSON_UNREACHABLE(); } return out; } #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { return out << type.value(); } #endif simdjson_inline number_type number::get_number_type() const noexcept { return type; } simdjson_inline bool number::is_uint64() const noexcept { return get_number_type() == number_type::unsigned_integer; } simdjson_inline uint64_t number::get_uint64() const noexcept { return payload.unsigned_integer; } simdjson_inline number::operator uint64_t() const noexcept { return get_uint64(); } simdjson_inline bool number::is_int64() const noexcept { return get_number_type() == number_type::signed_integer; } simdjson_inline int64_t number::get_int64() const noexcept { return payload.signed_integer; } simdjson_inline number::operator int64_t() const noexcept { return get_int64(); } simdjson_inline bool number::is_double() const noexcept { return get_number_type() == number_type::floating_point_number; } simdjson_inline double number::get_double() const noexcept { return payload.floating_point_number; } simdjson_inline number::operator double() const noexcept { return get_double(); } simdjson_inline double number::as_double() const noexcept { if(is_double()) { return payload.floating_point_number; } if(is_int64()) { return double(payload.signed_integer); } return double(payload.unsigned_integer); } simdjson_inline void number::append_s64(int64_t value) noexcept { payload.signed_integer = value; type = number_type::signed_integer; } simdjson_inline void number::append_u64(uint64_t value) noexcept { payload.unsigned_integer = value; type = number_type::unsigned_integer; } simdjson_inline void number::append_double(double value) noexcept { payload.floating_point_number = value; type = number_type::floating_point_number; } simdjson_inline void number::skip_double() noexcept { type = number_type::floating_point_number; } } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::json_type &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H /* end file simdjson/generic/ondemand/json_type-inl.h for ppc64 */ /* including simdjson/generic/ondemand/logger-inl.h for ppc64: #include "simdjson/generic/ondemand/logger-inl.h" */ /* begin file simdjson/generic/ondemand/logger-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #include namespace simdjson { namespace ppc64 { namespace ondemand { namespace logger { static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; static constexpr const int LOG_EVENT_LEN = 20; static constexpr const int LOG_BUFFER_LEN = 30; static constexpr const int LOG_SMALL_BUFFER_LEN = 10; static int log_depth = 0; // Not threadsafe. Log only. // Helper to turn unprintable or newline characters into spaces static inline char printable_char(char c) { if (c >= 0x20) { return c; } else { return ' '; } } template static inline std::string string_format(const std::string& format, const Args&... args) { SIMDJSON_PUSH_DISABLE_ALL_WARNINGS int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; auto size = static_cast(size_s); if (size <= 0) return std::string(); std::unique_ptr buf(new char[size]); std::snprintf(buf.get(), size, format.c_str(), args...); SIMDJSON_POP_DISABLE_WARNINGS return std::string(buf.get(), buf.get() + size - 1); } static inline log_level get_log_level_from_env() { SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe char *lvl = getenv("SIMDJSON_LOG_LEVEL"); SIMDJSON_POP_DISABLE_WARNINGS if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } return log_level::info; } static inline log_level log_threshold() { static log_level threshold = get_log_level_from_env(); return threshold; } static inline bool should_log(log_level level) { return level >= log_threshold(); } inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { log_line(iter, index, depth, "", type, detail, log_level::info); } inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { log_line(iter, index, depth, "+", type, detail, log_level::info); if (LOG_ENABLED) { log_depth++; } } inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { log_line(iter, "+", type, "", delta, depth_delta, log_level::info); if (LOG_ENABLED) { log_depth++; } } inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { if (LOG_ENABLED) { log_depth--; } log_line(iter, "-", type, "", delta, depth_delta, log_level::info); } inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); } inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); } inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_event(iter.json_iter(), type, detail, delta, depth_delta); } inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_value(iter.json_iter(), type, detail, delta, depth_delta); } inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { log_start_value(iter.json_iter(), type, delta, depth_delta); } inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { log_end_value(iter.json_iter(), type, delta, depth_delta); } inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { log_error(iter.json_iter(), error, detail, delta, depth_delta); } inline void log_headers() noexcept { if (LOG_ENABLED) { if (simdjson_unlikely(should_log(log_level::info))) { // Technically a static variable is not thread-safe, but if you are using threads and logging... well... static bool displayed_hint{false}; log_depth = 0; printf("\n"); if (!displayed_hint) { // We only print this helpful header once. printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); printf("# +array says 'this is where we were when we discovered the start array'\n"); printf( "# -array says 'this is where we were when we ended the array'\n"); printf("# skip says 'this is a structural or value I am skipping'\n"); printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); printf("#\n"); printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); printf("# in addition to the depth being displayed.\n"); printf("#\n"); printf("# Every token in the document has a single depth determined by the tokens before it,\n"); printf("# and is not affected by what the token actually is.\n"); printf("#\n"); printf("# Not all structural elements are presented as tokens in the logs.\n"); printf("#\n"); printf("# We never give control to the user within an empty array or an empty object.\n"); printf("#\n"); printf("# Inside an array, having a depth greater than the array's depth means that\n"); printf("# we are pointing inside a value.\n"); printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); displayed_hint = true; } printf("\n"); printf("| %-*s ", LOG_EVENT_LEN, "Event"); printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); // printf("| %-*s ", 5, "Next#"); printf("| %-*s ", 5, "Depth"); printf("| Detail "); printf("|\n"); printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); // printf("|%.*s", 5+2, DASHES); printf("|%.*s", 5 + 2, DASHES); printf("|--------"); printf("|\n"); fflush(stdout); } } } template inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); } template inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { if (LOG_ENABLED) { if (simdjson_unlikely(should_log(level))) { const int indent = depth * 2; const auto buf = iter.token.buf; auto msg = string_format(title, std::forward(args)...); printf("| %*s%s%-*s ", indent, "", title_prefix, LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); { // Print the current structural. printf("| "); // Before we begin, the index might point right before the document. // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 if (index < iter._root) { printf("%*s", LOG_BUFFER_LEN, ""); } else { auto current_structural = &buf[*index]; for (int i = 0; i < LOG_BUFFER_LEN; i++) { printf("%c", printable_char(current_structural[i])); } } printf(" "); } { // Print the next structural. printf("| "); auto next_structural = &buf[*(index + 1)]; for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { printf("%c", printable_char(next_structural[i])); } printf(" "); } // printf("| %5u ", *(index+1)); printf("| %5i ", depth); printf("| %6.*s ", int(detail.size()), detail.data()); printf("|\n"); fflush(stdout); } } } } // namespace logger } // namespace ondemand } // namespace ppc64 } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* end file simdjson/generic/ondemand/logger-inl.h for ppc64 */ /* including simdjson/generic/ondemand/object-inl.h for ppc64: #include "simdjson/generic/ondemand/object-inl.h" */ /* begin file simdjson/generic/ondemand/object-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { return find_field_unordered(key); } simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { return std::forward(*this).find_field_unordered(key); } simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { SIMDJSON_TRY( iter.start_object().error() ); return object(iter); } simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { SIMDJSON_TRY( iter.start_root_object().error() ); return object(iter); } simdjson_inline error_code object::consume() noexcept { if(iter.is_at_key()) { /** * whenever you are pointing at a key, calling skip_child() is * unsafe because you will hit a string and you will assume that * it is string value, and this mistake will lead you to make bad * depth computation. */ /** * We want to 'consume' the key. We could really * just do _json_iter->return_current_and_advance(); at this * point, but, for clarity, we will use the high-level API to * eat the key. We assume that the compiler optimizes away * most of the work. */ simdjson_unused raw_json_string actual_key; auto error = iter.field_key().get(actual_key); if (error) { iter.abandon(); return error; }; // Let us move to the value while we are at it. if ((error = iter.field_value())) { iter.abandon(); return error; } } auto error_skip = iter.json_iter().skip_child(iter.depth()-1); if(error_skip) { iter.abandon(); } return error_skip; } simdjson_inline simdjson_result object::raw_json() noexcept { const uint8_t * starting_point{iter.peek_start()}; auto error = consume(); if(error) { return error; } const uint8_t * final_point{iter._json_iter->peek()}; return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { SIMDJSON_TRY( iter.started_object().error() ); return object(iter); } simdjson_inline object object::resume(const value_iterator &iter) noexcept { return iter; } simdjson_inline object::object(const value_iterator &_iter) noexcept : iter{_iter} { } simdjson_inline simdjson_result object::begin() noexcept { #if SIMDJSON_DEVELOPMENT_CHECKS if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif return object_iterator(iter); } simdjson_inline simdjson_result object::end() noexcept { return object_iterator(iter); } inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } json_pointer = json_pointer.substr(1); size_t slash = json_pointer.find('/'); std::string_view key = json_pointer.substr(0, slash); // Grab the child with the given key simdjson_result child; // If there is an escape character in the key, unescape it and then get the child. size_t escape = key.find('~'); if (escape != std::string_view::npos) { // Unescape the key std::string unescaped(key); do { switch (unescaped[escape+1]) { case '0': unescaped.replace(escape, 2, "~"); break; case '1': unescaped.replace(escape, 2, "/"); break; default: return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); } escape = unescaped.find('~', escape+1); } while (escape != std::string::npos); child = find_field(unescaped); // Take note find_field does not unescape keys when matching } else { child = find_field(key); } if(child.error()) { return child; // we do not continue if there was an error } // If there is a /, we have to recurse and look up more of the path if (slash != std::string_view::npos) { child = child.at_pointer(json_pointer.substr(slash)); } return child; } simdjson_inline simdjson_result object::count_fields() & noexcept { size_t count{0}; // Important: we do not consume any of the values. for(simdjson_unused auto v : *this) { count++; } // The above loop will always succeed, but we want to report errors. if(iter.error()) { return iter.error(); } // We need to move back at the start because we expect users to iterate through // the object after counting the number of elements. iter.reset_object(); return count; } simdjson_inline simdjson_result object::is_empty() & noexcept { bool is_not_empty; auto error = iter.reset_object().get(is_not_empty); if(error) { return error; } return !is_not_empty; } simdjson_inline simdjson_result object::reset() & noexcept { return iter.reset_object(); } } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::object &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} simdjson_inline simdjson_result simdjson_result::begin() noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() noexcept { if (error()) { return error(); } return first.end(); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first).find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first)[key]; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first).find_field(key); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } inline simdjson_result simdjson_result::reset() noexcept { if (error()) { return error(); } return first.reset(); } inline simdjson_result simdjson_result::is_empty() noexcept { if (error()) { return error(); } return first.is_empty(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* end file simdjson/generic/ondemand/object-inl.h for ppc64 */ /* including simdjson/generic/ondemand/object_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/object_iterator-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { // // object_iterator // simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept : iter{_iter} {} simdjson_inline simdjson_result object_iterator::operator*() noexcept { error_code error = iter.error(); if (error) { iter.abandon(); return error; } auto result = field::start(iter); // TODO this is a safety rail ... users should exit loops as soon as they receive an error. // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. if (result.error()) { iter.abandon(); } return result; } simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { return !(*this != other); } simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { return iter.is_open(); } SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_inline object_iterator &object_iterator::operator++() noexcept { // TODO this is a safety rail ... users should exit loops as soon as they receive an error. // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error simdjson_unused error_code error; if ((error = iter.skip_child() )) { return *this; } simdjson_unused bool has_value; if ((error = iter.has_next_field().get(has_value) )) { return *this; }; return *this; } SIMDJSON_POP_DISABLE_WARNINGS // // ### Live States // // While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is // always SUCCESS: // // - Start: This is the state when the object is first found and the iterator is just past the {. // In this state, at_start == true. // - Next: After we hand a scalar value to the user, or an array/object which they then fully // iterate over, the iterator is at the , or } before the next value. In this state, // depth == iter.depth, at_start == false, and error == SUCCESS. // - Unfinished Business: When we hand an array/object to the user which they do not fully // iterate over, we need to finish that iteration by skipping child values until we reach the // Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. // // ## Error States // // In error states, we will yield exactly one more value before stopping. iter.depth == depth // and at_start is always false. We decrement after yielding the error, moving to the Finished // state. // // - Chained Error: When the object iterator is part of an error chain--for example, in // `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an // object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and // iter.depth == depth, and at_start == false. We decrement depth when we yield the error. // - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, // we flag that as an error and treat it exactly the same as a Chained Error. In this state, // error == TAPE_ERROR, iter.depth == depth, and at_start == false. // // Errors that occur while reading a field to give to the user (such as when the key is not a // string or the field is missing a colon) are yielded immediately. Depth is then decremented, // moving to the Finished state without transitioning through an Error state at all. // // ## Terminal State // // The terminal state has iter.depth < depth. at_start is always false. // // - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. // In this state, iter.depth < depth, at_start == false, and error == SUCCESS. // } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( ppc64::ondemand::object_iterator &&value ) noexcept : implementation_simdjson_result_base(std::forward(value)) { first.iter.assert_is_valid(); } simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base({}, error) { } simdjson_inline simdjson_result simdjson_result::operator*() noexcept { if (error()) { return error(); } return *first; } // If we're iterating and there is an error, return the error once. simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return !error(); } return first == other.first; } // If we're iterating and there is an error, return the error once. simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return error(); } return first != other.first; } // Checks for ']' and ',' simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { // Clear the error if there is one, so we don't yield it twice if (error()) { second = SUCCESS; return *this; } ++first; return *this; } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H /* end file simdjson/generic/ondemand/object_iterator-inl.h for ppc64 */ /* including simdjson/generic/ondemand/parser-inl.h for ppc64: #include "simdjson/generic/ondemand/parser-inl.h" */ /* begin file simdjson/generic/ondemand/parser-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ /* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { simdjson_inline parser::parser(size_t max_capacity) noexcept : _max_capacity{max_capacity} { } simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { if (new_capacity > max_capacity()) { return CAPACITY; } if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } // string_capacity copied from document::allocate _capacity = 0; size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); #if SIMDJSON_DEVELOPMENT_CHECKS start_positions.reset(new (std::nothrow) token_position[new_max_depth]); #endif if (implementation) { SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); } else { SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); } _capacity = new_capacity; _max_depth = new_max_depth; return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); // Allocate if needed if (capacity() < json.length() || !string_buf) { SIMDJSON_TRY( allocate(json.length(), max_depth()) ); } // Run stage 1. SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); return document::start({ reinterpret_cast(json.data()), this }); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { return iterate(padded_string_view(json, allocated)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { if(json.capacity() - json.size() < SIMDJSON_PADDING) { json.reserve(json.size() + SIMDJSON_PADDING); } return iterate(padded_string_view(json)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { return iterate(padded_string_view(json)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception SIMDJSON_TRY( result.error() ); padded_string_view json = result.value_unsafe(); return iterate(json); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception SIMDJSON_TRY( result.error() ); const padded_string &json = result.value_unsafe(); return iterate(json); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); // Allocate if needed if (capacity() < json.length()) { SIMDJSON_TRY( allocate(json.length(), max_depth()) ); } // Run stage 1. SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); return json_iterator(reinterpret_cast(json.data()), this); } inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { buf += 3; len -= 3; } if(allow_comma_separated && batch_size < len) { batch_size = len; } return document_stream(*this, buf, len, batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } simdjson_inline size_t parser::max_depth() const noexcept { return _max_depth; } simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { _max_capacity = max_capacity; } else { _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; } } simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); if (!end) { return STRING_ERROR; } std::string_view result(reinterpret_cast(dst), end-dst); dst = end; return result; } simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); if (!end) { return STRING_ERROR; } std::string_view result(reinterpret_cast(dst), end-dst); dst = end; return result; } } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::parser &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* end file simdjson/generic/ondemand/parser-inl.h for ppc64 */ /* including simdjson/generic/ondemand/raw_json_string-inl.h for ppc64: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ /* begin file simdjson/generic/ondemand/raw_json_string-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; // if the content has no escape character, just scan through it quickly! for(;pos < target.size() && target[pos] != '\\';pos++) {} // slow path may begin. bool escaping{false}; for(;pos < target.size();pos++) { if((target[pos] == '"') && !escaping) { return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } return true; } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { size_t pos{0}; // if the content has no escape character, just scan through it quickly! for(;target[pos] && target[pos] != '\\';pos++) {} // slow path may begin. bool escaping{false}; for(;target[pos];pos++) { if((target[pos] == '"') && !escaping) { return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } return true; } simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { // If we are going to call memcmp, then we must know something about the length of the raw_json_string. return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { // Assumptions: does not contain unescaped quote characters, and // the raw content is quote terminated within a valid JSON string. if(target.size() <= SIMDJSON_PADDING) { return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } const char * r{raw()}; size_t pos{0}; for(;pos < target.size();pos++) { if(r[pos] != target[pos]) { return false; } } if(r[pos] != '"') { return false; } return true; } simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { const char * r{raw()}; size_t pos{0}; bool escaping{false}; for(;pos < target.size();pos++) { if(r[pos] != target[pos]) { return false; } // if target is a compile-time constant and it is free from // quotes, then the next part could get optimized away through // inlining. if((target[pos] == '"') && !escaping) { // We have reached the end of the raw_json_string but // the target is not done. return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } if(r[pos] != '"') { return false; } return true; } simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and // the raw content is quote terminated within a valid JSON string. const char * r{raw()}; size_t pos{0}; for(;target[pos];pos++) { if(r[pos] != target[pos]) { return false; } } if(r[pos] != '"') { return false; } return true; } simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { // Assumptions: does not contain unescaped quote characters, and // the raw content is quote terminated within a valid JSON string. const char * r{raw()}; size_t pos{0}; bool escaping{false}; for(;target[pos];pos++) { if(r[pos] != target[pos]) { return false; } // if target is a compile-time constant and it is free from // quotes, then the next part could get optimized away through // inlining. if((target[pos] == '"') && !escaping) { // We have reached the end of the raw_json_string but // the target is not done. return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } if(r[pos] != '"') { return false; } return true; } simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { return a.unsafe_is_equal(c); } simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { return a == c; } simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { return !(a == c); } simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { return !(a == c); } simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { return iter.unescape(*this, allow_replacement); } simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { return iter.unescape_wobbly(*this); } simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { bool in_escape = false; const char *s = str.raw(); while (true) { switch (*s) { case '\\': in_escape = !in_escape; break; case '"': if (in_escape) { in_escape = false; } else { return out; } break; default: if (in_escape) { in_escape = false; } } out << *s; s++; } } } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::raw_json_string &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} simdjson_inline simdjson_result simdjson_result::raw() const noexcept { if (error()) { return error(); } return first.raw(); } simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(ppc64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { if (error()) { return error(); } return first.unescape(iter, allow_replacement); } simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(ppc64::ondemand::json_iterator &iter) const noexcept { if (error()) { return error(); } return first.unescape_wobbly(iter); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* end file simdjson/generic/ondemand/raw_json_string-inl.h for ppc64 */ /* including simdjson/generic/ondemand/serialization-inl.h for ppc64: #include "simdjson/generic/ondemand/serialization-inl.h" */ /* begin file simdjson/generic/ondemand/serialization-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { inline std::string_view trim(const std::string_view str) noexcept { // We can almost surely do better by rolling our own find_first_not_of function. size_t first = str.find_first_not_of(" \t\n\r"); // If we have the empty string (just white space), then no trimming is possible, and // we return the empty string_view. if (std::string_view::npos == first) { return std::string_view(); } size_t last = str.find_last_not_of(" \t\n\r"); return str.substr(first, (last - first + 1)); } inline simdjson_result to_json_string(ppc64::ondemand::document& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(ppc64::ondemand::document_reference& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(ppc64::ondemand::value& x) noexcept { /** * If we somehow receive a value that has already been consumed, * then the following code could be in trouble. E.g., we create * an array as needed, but if an array was already created, then * it could be bad. */ using namespace ppc64::ondemand; ppc64::ondemand::json_type t; auto error = x.type().get(t); if(error != SUCCESS) { return error; } switch (t) { case json_type::array: { ppc64::ondemand::array array; error = x.get_array().get(array); if(error) { return error; } return to_json_string(array); } case json_type::object: { ppc64::ondemand::object object; error = x.get_object().get(object); if(error) { return error; } return to_json_string(object); } default: return trim(x.raw_json_token()); } } inline simdjson_result to_json_string(ppc64::ondemand::object& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(ppc64::ondemand::array& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } } // namespace simdjson namespace simdjson { namespace ppc64 { namespace ondemand { #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::value x) { std::string_view v; auto error = simdjson::to_json_string(x).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::value x) { std::string_view v; auto error = simdjson::to_json_string(x).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::array value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::array value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document_reference& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::object value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::object value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif }}} // namespace simdjson::ppc64::ondemand #endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* end file simdjson/generic/ondemand/serialization-inl.h for ppc64 */ /* including simdjson/generic/ondemand/token_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/token_iterator-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { simdjson_inline token_iterator::token_iterator( const uint8_t *_buf, token_position position ) noexcept : buf{_buf}, _position{position} { } simdjson_inline uint32_t token_iterator::current_offset() const noexcept { return *(_position); } simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { return &buf[*(_position++)]; } simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { return &buf[*position]; } simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { return *position; } simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { return *(position+1) - *position; } simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { return &buf[*(_position+delta)]; } simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { return *(_position+delta); } simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { return *(_position+delta+1) - *(_position+delta); } simdjson_inline token_position token_iterator::position() const noexcept { return _position; } simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { _position = target_position; } simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { return _position == other._position; } simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { return _position != other._position; } simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { return _position > other._position; } simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { return _position >= other._position; } simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { return _position < other._position; } simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { return _position <= other._position; } } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::token_iterator &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H /* end file simdjson/generic/ondemand/token_iterator-inl.h for ppc64 */ /* including simdjson/generic/ondemand/value-inl.h for ppc64: #include "simdjson/generic/ondemand/value-inl.h" */ /* begin file simdjson/generic/ondemand/value-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { simdjson_inline value::value(const value_iterator &_iter) noexcept : iter{_iter} { } simdjson_inline value value::start(const value_iterator &iter) noexcept { return iter; } simdjson_inline value value::resume(const value_iterator &iter) noexcept { return iter; } simdjson_inline simdjson_result value::get_array() noexcept { return array::start(iter); } simdjson_inline simdjson_result value::get_object() noexcept { return object::start(iter); } simdjson_inline simdjson_result value::start_or_resume_object() noexcept { if (iter.at_start()) { return get_object(); } else { return object::resume(iter); } } simdjson_inline simdjson_result value::get_raw_json_string() noexcept { return iter.get_raw_json_string(); } simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { return iter.get_string(allow_replacement); } template simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { return iter.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result value::get_wobbly_string() noexcept { return iter.get_wobbly_string(); } simdjson_inline simdjson_result value::get_double() noexcept { return iter.get_double(); } simdjson_inline simdjson_result value::get_double_in_string() noexcept { return iter.get_double_in_string(); } simdjson_inline simdjson_result value::get_uint64() noexcept { return iter.get_uint64(); } simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { return iter.get_uint64_in_string(); } simdjson_inline simdjson_result value::get_int64() noexcept { return iter.get_int64(); } simdjson_inline simdjson_result value::get_int64_in_string() noexcept { return iter.get_int64_in_string(); } simdjson_inline simdjson_result value::get_bool() noexcept { return iter.get_bool(); } simdjson_inline simdjson_result value::is_null() noexcept { return iter.is_null(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } template simdjson_inline error_code value::get(T &out) noexcept { return get().get(out); } #if SIMDJSON_EXCEPTIONS simdjson_inline value::operator array() noexcept(false) { return get_array(); } simdjson_inline value::operator object() noexcept(false) { return get_object(); } simdjson_inline value::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline value::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline value::operator double() noexcept(false) { return get_double(); } simdjson_inline value::operator std::string_view() noexcept(false) { return get_string(false); } simdjson_inline value::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } simdjson_inline value::operator bool() noexcept(false) { return get_bool(); } #endif simdjson_inline simdjson_result value::begin() & noexcept { return get_array().begin(); } simdjson_inline simdjson_result value::end() & noexcept { return {}; } simdjson_inline simdjson_result value::count_elements() & noexcept { simdjson_result answer; auto a = get_array(); answer = a.count_elements(); // count_elements leaves you pointing inside the array, at the first element. // We need to move back so that the user can create a new array (which requires that // we point at '['). iter.move_at_start(); return answer; } simdjson_inline simdjson_result value::count_fields() & noexcept { simdjson_result answer; auto a = get_object(); answer = a.count_fields(); iter.move_at_start(); return answer; } simdjson_inline simdjson_result value::at(size_t index) noexcept { auto a = get_array(); return a.at(index); } simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result value::find_field(const char *key) noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { return start_or_resume_object()[key]; } simdjson_inline simdjson_result value::operator[](const char *key) noexcept { return start_or_resume_object()[key]; } simdjson_inline simdjson_result value::type() noexcept { return iter.type(); } simdjson_inline simdjson_result value::is_scalar() noexcept { json_type this_type; auto error = type().get(this_type); if(error) { return error; } return ! ((this_type == json_type::array) || (this_type == json_type::object)); } simdjson_inline bool value::is_negative() noexcept { return iter.is_negative(); } simdjson_inline simdjson_result value::is_integer() noexcept { return iter.is_integer(); } simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { return iter.get_number_type(); } simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { return iter.get_number(); } simdjson_inline std::string_view value::raw_json_token() noexcept { return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); } simdjson_inline simdjson_result value::raw_json() noexcept { json_type t; SIMDJSON_TRY(type().get(t)); switch (t) { case json_type::array: { ondemand::array array; SIMDJSON_TRY(get_array().get(array)); return array.raw_json(); } case json_type::object: { ondemand::object object; SIMDJSON_TRY(get_object().get(object)); return object.raw_json(); } default: return raw_json_token(); } } simdjson_inline simdjson_result value::current_location() noexcept { return iter.json_iter().current_location(); } simdjson_inline int32_t value::current_depth() const noexcept{ return iter.json_iter().depth(); } simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { json_type t; SIMDJSON_TRY(type().get(t)); switch (t) { case json_type::array: return (*this).get_array().at_pointer(json_pointer); case json_type::object: return (*this).get_object().at_pointer(json_pointer); default: return INVALID_JSON_POINTER; } } } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( ppc64::ondemand::value &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() & noexcept { if (error()) { return error(); } return {}; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::get_array() noexcept { if (error()) { return error(); } return first.get_array(); } simdjson_inline simdjson_result simdjson_result::get_object() noexcept { if (error()) { return error(); } return first.get_object(); } simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } template simdjson_inline simdjson_result simdjson_result::get() noexcept { if (error()) { return error(); } return first.get(); } template simdjson_inline error_code simdjson_result::get(T &out) noexcept { if (error()) { return error(); } return first.get(out); } template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { if (error()) { return error(); } return std::move(first); } template<> simdjson_inline error_code simdjson_result::get(ppc64::ondemand::value &out) noexcept { if (error()) { return error(); } out = first; return SUCCESS; } simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS simdjson_inline simdjson_result::operator ppc64::ondemand::array() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator ppc64::ondemand::object() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator ppc64::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); } simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { if (error()) { return error(); } return first.current_depth(); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H /* end file simdjson/generic/ondemand/value-inl.h for ppc64 */ /* including simdjson/generic/ondemand/value_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/value_iterator-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { simdjson_inline value_iterator::value_iterator( json_iterator *json_iter, depth_t depth, token_position start_position ) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} { } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { SIMDJSON_TRY( start_container('{', "Not an object", "object") ); return started_object(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { SIMDJSON_TRY( start_container('{', "Not an object", "object") ); return started_root_object(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { assert_at_container_start(); #if SIMDJSON_DEVELOPMENT_CHECKS _json_iter->set_start_position(_depth, start_position()); #endif if (*_json_iter->peek() == '}') { logger::log_value(*_json_iter, "empty object"); _json_iter->return_current_and_advance(); end_container(); return false; } return true; } simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { // When in streaming mode, we cannot expect peek_last() to be the last structural element of the // current document. It only works in the normal mode where we have indexed a single document. // Note that adding a check for 'streaming' is not expensive since we only have at most // one root element. if ( ! _json_iter->streaming() ) { // The following lines do not fully protect against garbage content within the // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should // call `at_end()` on the document instance at the end of the processing to // ensure that the processing has finished at the end. // if (*_json_iter->peek_last() != '}') { _json_iter->abandon(); return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); } // If the last character is } *and* the first gibberish character is also '}' // then on-demand could accidentally go over. So we need additional checks. // https://github.com/simdjson/simdjson/issues/1834 // Checking that the document is balanced requires a full scan which is potentially // expensive, but it only happens in edge cases where the first padding character is // a closing bracket. if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { _json_iter->abandon(); // The exact error would require more work. It will typically be an unclosed object. return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); } } return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { auto error = check_root_object(); if(error) { return error; } return started_object(); } simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { #if SIMDJSON_CHECK_EOF if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } #endif // SIMDJSON_CHECK_EOF _json_iter->ascend_to(depth()-1); return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { assert_at_next(); // It's illegal to call this unless there are more tokens: anything that ends in } or ] is // obligated to verify there are more tokens if they are not the top level. switch (*_json_iter->return_current_and_advance()) { case '}': logger::log_end_value(*_json_iter, "object"); SIMDJSON_TRY( end_container() ); return false; case ',': return true; default: return report_error(TAPE_ERROR, "Missing comma between object fields"); } } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { error_code error; bool has_value; // // Initially, the object can be in one of a few different places: // // 1. The start of the object, at the first field: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2, index 1) // ``` if (at_first_field()) { has_value = true; // // 2. When a previous search did not yield a value or the object is empty: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 0) // { } // ^ (depth 0, index 2) // ``` // } else if (!is_open()) { #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif return false; // 3. When a previous search found a field or an iterator yielded a value: // // ``` // // When a field was not fully consumed (or not even touched at all) // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2) // // When a field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // // When the last field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // ``` // } else { if ((error = skip_child() )) { abandon(); return error; } if ((error = has_next_field().get(has_value) )) { abandon(); return error; } #if SIMDJSON_DEVELOPMENT_CHECKS if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif } while (has_value) { // Get the key and colon, stopping at the value. raw_json_string actual_key; // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. // field_key() advances the pointer and checks that '"' is found (corresponding to a key). // The depth is left unchanged by field_key(). if ((error = field_key().get(actual_key) )) { abandon(); return error; }; // field_value() will advance and check that we find a ':' separating the // key and the value. It will also increment the depth by one. if ((error = field_value() )) { abandon(); return error; } // If it matches, stop and return // We could do it this way if we wanted to allow arbitrary // key content (including escaped quotes). //if (actual_key.unsafe_is_equal(max_key_length, key)) { // Instead we do the following which may trigger buffer overruns if the // user provides an adversarial key (containing a well placed unescaped quote // character and being longer than the number of bytes remaining in the JSON // input). if (actual_key.unsafe_is_equal(key)) { logger::log_event(*this, "match", key, -2); // If we return here, then we return while pointing at the ':' that we just checked. return true; } // No match: skip the value and see if , or } is next logger::log_event(*this, "no match", key, -2); // The call to skip_child is meant to skip over the value corresponding to the key. // After skip_child(), we are right before the next comma (',') or the final brace ('}'). SIMDJSON_TRY( skip_child() ); // Skip the value entirely // The has_next_field() advances the pointer and check that either ',' or '}' is found. // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, // then we are in error and we abort. if ((error = has_next_field().get(has_value) )) { abandon(); return error; } } // If the loop ended, we're out of fields to look at. return false; } SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { /** * When find_field_unordered_raw is called, we can either be pointing at the * first key, pointing outside (at the closing brace) or if a key was matched * we can be either pointing right afterthe ':' right before the value (that we need skip), * or we may have consumed the value and we might be at a comma or at the * final brace (ready for a call to has_next_field()). */ error_code error; bool has_value; // First, we scan from that point to the end. // If we don't find a match, we may loop back around, and scan from the beginning to that point. token_position search_start = _json_iter->position(); // We want to know whether we need to go back to the beginning. bool at_first = at_first_field(); /////////////// // Initially, the object can be in one of a few different places: // // 1. At the first key: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2, index 1) // ``` // if (at_first) { has_value = true; // 2. When a previous search did not yield a value or the object is empty: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 0) // { } // ^ (depth 0, index 2) // ``` // } else if (!is_open()) { #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif SIMDJSON_TRY(reset_object().get(has_value)); at_first = true; // 3. When a previous search found a field or an iterator yielded a value: // // ``` // // When a field was not fully consumed (or not even touched at all) // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2) // // When a field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // // When the last field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // ``` // } else { // If someone queried a key but they not did access the value, then we are left pointing // at the ':' and we need to move forward through the value... If the value was // processed then skip_child() does not move the iterator (but may adjust the depth). if ((error = skip_child() )) { abandon(); return error; } search_start = _json_iter->position(); if ((error = has_next_field().get(has_value) )) { abandon(); return error; } #if SIMDJSON_DEVELOPMENT_CHECKS if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif } // After initial processing, we will be in one of two states: // // ``` // // At the beginning of a field // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // // At the end of the object // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 0) // ``` // // Next, we find a match starting from the current position. while (has_value) { SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field // Get the key and colon, stopping at the value. raw_json_string actual_key; // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. // field_key() advances the pointer and checks that '"' is found (corresponding to a key). // The depth is left unchanged by field_key(). if ((error = field_key().get(actual_key) )) { abandon(); return error; }; // field_value() will advance and check that we find a ':' separating the // key and the value. It will also increment the depth by one. if ((error = field_value() )) { abandon(); return error; } // If it matches, stop and return // We could do it this way if we wanted to allow arbitrary // key content (including escaped quotes). // if (actual_key.unsafe_is_equal(max_key_length, key)) { // Instead we do the following which may trigger buffer overruns if the // user provides an adversarial key (containing a well placed unescaped quote // character and being longer than the number of bytes remaining in the JSON // input). if (actual_key.unsafe_is_equal(key)) { logger::log_event(*this, "match", key, -2); // If we return here, then we return while pointing at the ':' that we just checked. return true; } // No match: skip the value and see if , or } is next logger::log_event(*this, "no match", key, -2); // The call to skip_child is meant to skip over the value corresponding to the key. // After skip_child(), we are right before the next comma (',') or the final brace ('}'). SIMDJSON_TRY( skip_child() ); // The has_next_field() advances the pointer and check that either ',' or '}' is found. // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, // then we are in error and we abort. if ((error = has_next_field().get(has_value) )) { abandon(); return error; } } // Performance note: it maybe wasteful to rewind to the beginning when there might be // no other query following. Indeed, it would require reskipping the whole object. // Instead, you can just stay where you are. If there is a new query, there is always time // to rewind. if(at_first) { return false; } // If we reach the end without finding a match, search the rest of the fields starting at the // beginning of the object. // (We have already run through the object before, so we've already validated its structure. We // don't check errors in this bit.) SIMDJSON_TRY(reset_object().get(has_value)); while (true) { SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field // Get the key and colon, stopping at the value. raw_json_string actual_key; // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. // field_key() advances the pointer and checks that '"' is found (corresponding to a key). // The depth is left unchanged by field_key(). error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); // field_value() will advance and check that we find a ':' separating the // key and the value. It will also increment the depth by one. error = field_value(); SIMDJSON_ASSUME(!error); // If it matches, stop and return // We could do it this way if we wanted to allow arbitrary // key content (including escaped quotes). // if (actual_key.unsafe_is_equal(max_key_length, key)) { // Instead we do the following which may trigger buffer overruns if the // user provides an adversarial key (containing a well placed unescaped quote // character and being longer than the number of bytes remaining in the JSON // input). if (actual_key.unsafe_is_equal(key)) { logger::log_event(*this, "match", key, -2); // If we return here, then we return while pointing at the ':' that we just checked. return true; } // No match: skip the value and see if , or } is next logger::log_event(*this, "no match", key, -2); // The call to skip_child is meant to skip over the value corresponding to the key. // After skip_child(), we are right before the next comma (',') or the final brace ('}'). SIMDJSON_TRY( skip_child() ); // If we reached the end of the key-value pair we started from, then we know // that the key is not there so we return false. We are either right before // the next comma or the final brace. if(_json_iter->position() == search_start) { return false; } // The has_next_field() advances the pointer and check that either ',' or '}' is found. // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, // then we are in error and we abort. error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); // If we make the mistake of exiting here, then we could be left pointing at a key // in the middle of an object. That's not an allowable state. } // If the loop ended, we're out of fields to look at. The program should // never reach this point. return false; } SIMDJSON_POP_DISABLE_WARNINGS simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { assert_at_next(); const uint8_t *key = _json_iter->return_current_and_advance(); if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } return raw_json_string(key); } simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { assert_at_next(); if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } _json_iter->descend_to(depth()+1); return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { SIMDJSON_TRY( start_container('[', "Not an array", "array") ); return started_array(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { SIMDJSON_TRY( start_container('[', "Not an array", "array") ); return started_root_array(); } inline std::string value_iterator::to_string() const noexcept { auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); if(_json_iter != nullptr) { answer += _json_iter->to_string(); } answer += std::string(" ]"); return answer; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { assert_at_container_start(); if (*_json_iter->peek() == ']') { logger::log_value(*_json_iter, "empty array"); _json_iter->return_current_and_advance(); SIMDJSON_TRY( end_container() ); return false; } _json_iter->descend_to(depth()+1); #if SIMDJSON_DEVELOPMENT_CHECKS _json_iter->set_start_position(_depth, start_position()); #endif return true; } simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { // When in streaming mode, we cannot expect peek_last() to be the last structural element of the // current document. It only works in the normal mode where we have indexed a single document. // Note that adding a check for 'streaming' is not expensive since we only have at most // one root element. if ( ! _json_iter->streaming() ) { // The following lines do not fully protect against garbage content within the // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should // also call `at_end()` on the document instance at the end of the processing to // ensure that the processing has finished at the end. // if (*_json_iter->peek_last() != ']') { _json_iter->abandon(); return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); } // If the last character is ] *and* the first gibberish character is also ']' // then on-demand could accidentally go over. So we need additional checks. // https://github.com/simdjson/simdjson/issues/1834 // Checking that the document is balanced requires a full scan which is potentially // expensive, but it only happens in edge cases where the first padding character is // a closing bracket. if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { _json_iter->abandon(); // The exact error would require more work. It will typically be an unclosed array. return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); } } return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { auto error = check_root_array(); if (error) { return error; } return started_array(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { assert_at_next(); logger::log_event(*this, "has_next_element"); switch (*_json_iter->return_current_and_advance()) { case ']': logger::log_end_value(*_json_iter, "array"); SIMDJSON_TRY( end_container() ); return false; case ',': _json_iter->descend_to(depth()+1); return true; default: return report_error(TAPE_ERROR, "Missing comma between array elements"); } } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { auto not_true = atomparsing::str4ncmp(json, "true"); auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); if (error) { return incorrect_type_error("Not a boolean"); } return simdjson_result(!not_true); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); // if we start with 'n', we must be a null if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } return is_null_string; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { return get_raw_json_string().unescape(json_iter(), allow_replacement); } template simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { std::string_view content; auto err = get_string(allow_replacement).get(content); if (err) { return err; } receiver = content; return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { return get_raw_json_string().unescape_wobbly(json_iter()); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { auto json = peek_scalar("string"); if (*json != '"') { return incorrect_type_error("Not a string"); } advance_scalar("string"); return raw_json_string(json+1); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { auto result = numberparsing::parse_double(peek_non_root_scalar("double")); if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { auto result = parse_bool(peek_non_root_scalar("bool")); if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } return result; } simdjson_inline simdjson_result value_iterator::is_null() noexcept { bool is_null_value; SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); if(is_null_value) { advance_non_root_scalar("null"); } return is_null_value; } simdjson_inline bool value_iterator::is_negative() noexcept { return numberparsing::is_negative(peek_non_root_scalar("numbersign")); } simdjson_inline bool value_iterator::is_root_negative() noexcept { return numberparsing::is_negative(peek_root_scalar("numbersign")); } simdjson_inline simdjson_result value_iterator::is_integer() noexcept { return numberparsing::is_integer(peek_non_root_scalar("integer")); } simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { return numberparsing::get_number_type(peek_non_root_scalar("integer")); } simdjson_inline simdjson_result value_iterator::get_number() noexcept { number num; error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); if(error) { return error; } return num; } simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("is_root_integer"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { return false; // if there are more than 20 characters, it cannot be represented as an integer. } auto answer = numberparsing::is_integer(tmpbuf); // If the parsing was a success, we must still check that it is // a single scalar. Note that we parse first because of cases like '[]' where // getting TRAILING_CONTENT is wrong. if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } return answer; } simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } auto answer = numberparsing::get_number_type(tmpbuf); if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } return answer; } simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } number num; error_code error = numberparsing::parse_number(tmpbuf, num); if(error) { return error; } if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("number"); return num; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); } template simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { std::string_view content; auto err = get_root_string(check_trailing, allow_replacement).get(content); if (err) { return err; } receiver = content; return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { auto json = peek_scalar("string"); if (*json != '"') { return incorrect_type_error("Not a string"); } if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_scalar("string"); return raw_json_string(json+1); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_unsigned(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_unsigned_in_string(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_integer(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_integer_in_string(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_double(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_double_in_string(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("bool"); uint8_t tmpbuf[5+1+1]; // +1 for null termination tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } auto result = parse_bool(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("bool"); } return result; } simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("null"); bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); if(result) { // we have something that looks like a null. if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("null"); } return result; } simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); return _json_iter->skip_child(depth()); } simdjson_inline value_iterator value_iterator::child() const noexcept { assert_at_child(); return { _json_iter, depth()+1, _json_iter->token.position() }; } // GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller // relating depth and iterator depth, which is a desired effect. It does not happen if is_open is // marked non-inline. SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_inline bool value_iterator::is_open() const noexcept { return _json_iter->depth() >= depth(); } SIMDJSON_POP_DISABLE_WARNINGS simdjson_inline bool value_iterator::at_end() const noexcept { return _json_iter->at_end(); } simdjson_inline bool value_iterator::at_start() const noexcept { return _json_iter->token.position() == start_position(); } simdjson_inline bool value_iterator::at_first_field() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); return _json_iter->token.position() == start_position() + 1; } simdjson_inline void value_iterator::abandon() noexcept { _json_iter->abandon(); } simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { return _depth; } simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { return _json_iter->error; } simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { return _json_iter->string_buf_loc(); } simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { return *_json_iter; } simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { return *_json_iter; } simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { return _json_iter->peek(start_position()); } simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { return _json_iter->peek_length(start_position()); } simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. if (!is_at_start()) { return peek_start(); } // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. assert_at_start(); return _json_iter->peek(); } simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. if (!is_at_start()) { return; } // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. assert_at_start(); _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); } simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { logger::log_start_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. const uint8_t *json; if (!is_at_start()) { #if SIMDJSON_DEVELOPMENT_CHECKS if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif json = peek_start(); if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } } else { assert_at_start(); /** * We should be prudent. Let us peek. If it is not the right type, we * return an error. Only once we have determined that we have the right * type are we allowed to advance! */ json = _json_iter->peek(); if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } _json_iter->return_current_and_advance(); } return SUCCESS; } simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return peek_start(); } assert_at_root(); return _json_iter->peek(); } simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return peek_start(); } assert_at_non_root_start(); return _json_iter->peek(); } simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return; } assert_at_root(); _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); } simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return; } assert_at_non_root_start(); _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); } simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { logger::log_error(*_json_iter, start_position(), depth(), message); return INCORRECT_TYPE; } simdjson_inline bool value_iterator::is_at_start() const noexcept { return position() == start_position(); } simdjson_inline bool value_iterator::is_at_key() const noexcept { // Keys are at the same depth as the object. // Note here that we could be safer and check that we are within an object, // but we do not. return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; } simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). auto delta = position() - start_position(); return delta == 1 || delta == 2; } inline void value_iterator::assert_at_start() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth ); SIMDJSON_ASSUME( _depth > 0 ); } inline void value_iterator::assert_at_container_start() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); SIMDJSON_ASSUME( _json_iter->_depth == _depth ); SIMDJSON_ASSUME( _depth > 0 ); } inline void value_iterator::assert_at_next() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth ); SIMDJSON_ASSUME( _depth > 0 ); } simdjson_inline void value_iterator::move_at_start() noexcept { _json_iter->_depth = _depth; _json_iter->token.set_position(_start_position); } simdjson_inline void value_iterator::move_at_container_start() noexcept { _json_iter->_depth = _depth; _json_iter->token.set_position(_start_position + 1); } simdjson_inline simdjson_result value_iterator::reset_array() noexcept { if(error()) { return error(); } move_at_container_start(); return started_array(); } simdjson_inline simdjson_result value_iterator::reset_object() noexcept { if(error()) { return error(); } move_at_container_start(); return started_object(); } inline void value_iterator::assert_at_child() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); SIMDJSON_ASSUME( _depth > 0 ); } inline void value_iterator::assert_at_root() const noexcept { assert_at_start(); SIMDJSON_ASSUME( _depth == 1 ); } inline void value_iterator::assert_at_non_root_start() const noexcept { assert_at_start(); SIMDJSON_ASSUME( _depth > 1 ); } inline void value_iterator::assert_is_valid() const noexcept { SIMDJSON_ASSUME( _json_iter != nullptr ); } simdjson_inline bool value_iterator::is_valid() const noexcept { return _json_iter != nullptr; } simdjson_inline simdjson_result value_iterator::type() const noexcept { switch (*peek_start()) { case '{': return json_type::object; case '[': return json_type::array; case '"': return json_type::string; case 'n': return json_type::null; case 't': case 'f': return json_type::boolean; case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return json_type::number; default: return TAPE_ERROR; } } simdjson_inline token_position value_iterator::start_position() const noexcept { return _start_position; } simdjson_inline token_position value_iterator::position() const noexcept { return _json_iter->position(); } simdjson_inline token_position value_iterator::end_position() const noexcept { return _json_iter->end_position(); } simdjson_inline token_position value_iterator::last_position() const noexcept { return _json_iter->last_position(); } simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { return _json_iter->report_error(error, message); } } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::value_iterator &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* end file simdjson/generic/ondemand/value_iterator-inl.h for ppc64 */ /* end file simdjson/generic/ondemand/amalgamated.h for ppc64 */ /* including simdjson/ppc64/end.h: #include "simdjson/ppc64/end.h" */ /* begin file simdjson/ppc64/end.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT /* undefining SIMDJSON_IMPLEMENTATION from "ppc64" */ #undef SIMDJSON_IMPLEMENTATION /* end file simdjson/ppc64/end.h */ #endif // SIMDJSON_PPC64_ONDEMAND_H /* end file simdjson/ppc64/ondemand.h */ #elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(westmere) /* including simdjson/westmere/ondemand.h: #include "simdjson/westmere/ondemand.h" */ /* begin file simdjson/westmere/ondemand.h */ #ifndef SIMDJSON_WESTMERE_ONDEMAND_H #define SIMDJSON_WESTMERE_ONDEMAND_H /* including simdjson/westmere/begin.h: #include "simdjson/westmere/begin.h" */ /* begin file simdjson/westmere/begin.h */ /* defining SIMDJSON_IMPLEMENTATION to "westmere" */ #define SIMDJSON_IMPLEMENTATION westmere /* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ /* begin file simdjson/westmere/base.h */ #ifndef SIMDJSON_WESTMERE_BASE_H #define SIMDJSON_WESTMERE_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ // The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE namespace simdjson { /** * Implementation for Westmere (Intel SSE4.2). */ namespace westmere { class implementation; namespace { namespace simd { template struct simd8; template struct simd8x64; } // namespace simd } // unnamed namespace } // namespace westmere } // namespace simdjson #endif // SIMDJSON_WESTMERE_BASE_H /* end file simdjson/westmere/base.h */ /* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ /* begin file simdjson/westmere/intrinsics.h */ #ifndef SIMDJSON_WESTMERE_INTRINSICS_H #define SIMDJSON_WESTMERE_INTRINSICS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #if SIMDJSON_VISUAL_STUDIO // under clang within visual studio, this will include #include // visual studio or clang #else #include // elsewhere #endif // SIMDJSON_VISUAL_STUDIO #if SIMDJSON_CLANG_VISUAL_STUDIO /** * You are not supposed, normally, to include these * headers directly. Instead you should either include intrin.h * or x86intrin.h. However, when compiling with clang * under Windows (i.e., when _MSC_VER is set), these headers * only get included *if* the corresponding features are detected * from macros: */ #include // for _mm_alignr_epi8 #include // for _mm_clmulepi64_si128 #endif static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); #endif // SIMDJSON_WESTMERE_INTRINSICS_H /* end file simdjson/westmere/intrinsics.h */ #if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE SIMDJSON_TARGET_REGION("sse4.2,pclmul,popcnt") #endif /* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ /* begin file simdjson/westmere/bitmanipulation.h */ #ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H #define SIMDJSON_WESTMERE_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace { // We sometimes call trailing_zero on inputs that are zero, // but the algorithms do not end up using the returned value. // Sadly, sanitizers are not smart enough to figure it out. SIMDJSON_NO_SANITIZE_UNDEFINED // This function can be used safely even if not all bytes have been // initialized. // See issue https://github.com/simdjson/simdjson/issues/1965 SIMDJSON_NO_SANITIZE_MEMORY simdjson_inline int trailing_zeroes(uint64_t input_num) { #if SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long ret; // Search the mask data from least significant bit (LSB) // to the most significant bit (MSB) for a set bit (1). _BitScanForward64(&ret, input_num); return (int)ret; #else // SIMDJSON_REGULAR_VISUAL_STUDIO return __builtin_ctzll(input_num); #endif // SIMDJSON_REGULAR_VISUAL_STUDIO } /* result might be undefined when input_num is zero */ simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return input_num & (input_num-1); } /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { #if SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long leading_zero = 0; // Search the mask data from most significant bit (MSB) // to least significant bit (LSB) for a set bit (1). if (_BitScanReverse64(&leading_zero, input_num)) return (int)(63 - leading_zero); else return 64; #else return __builtin_clzll(input_num); #endif// SIMDJSON_REGULAR_VISUAL_STUDIO } #if SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { // note: we do not support legacy 32-bit Windows in this kernel return __popcnt64(input_num);// Visual Studio wants two underscores } #else simdjson_inline long long int count_ones(uint64_t input_num) { return _popcnt64(input_num); } #endif simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { #if SIMDJSON_REGULAR_VISUAL_STUDIO return _addcarry_u64(0, value1, value2, reinterpret_cast(result)); #else return __builtin_uaddll_overflow(value1, value2, reinterpret_cast(result)); #endif } } // unnamed namespace } // namespace westmere } // namespace simdjson #endif // SIMDJSON_WESTMERE_BITMANIPULATION_H /* end file simdjson/westmere/bitmanipulation.h */ /* including simdjson/westmere/bitmask.h: #include "simdjson/westmere/bitmask.h" */ /* begin file simdjson/westmere/bitmask.h */ #ifndef SIMDJSON_WESTMERE_BITMASK_H #define SIMDJSON_WESTMERE_BITMASK_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace { // // Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. // // For example, prefix_xor(00100100) == 00011100 // simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { // There should be no such thing with a processing supporting avx2 // but not clmul. __m128i all_ones = _mm_set1_epi8('\xFF'); __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); return _mm_cvtsi128_si64(result); } } // unnamed namespace } // namespace westmere } // namespace simdjson #endif // SIMDJSON_WESTMERE_BITMASK_H /* end file simdjson/westmere/bitmask.h */ /* including simdjson/westmere/numberparsing_defs.h: #include "simdjson/westmere/numberparsing_defs.h" */ /* begin file simdjson/westmere/numberparsing_defs.h */ #ifndef SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H #define SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H /* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ /* begin file simdjson/westmere/base.h */ #ifndef SIMDJSON_WESTMERE_BASE_H #define SIMDJSON_WESTMERE_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ // The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE namespace simdjson { /** * Implementation for Westmere (Intel SSE4.2). */ namespace westmere { class implementation; namespace { namespace simd { template struct simd8; template struct simd8x64; } // namespace simd } // unnamed namespace } // namespace westmere } // namespace simdjson #endif // SIMDJSON_WESTMERE_BASE_H /* end file simdjson/westmere/base.h */ /* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ /* begin file simdjson/westmere/intrinsics.h */ #ifndef SIMDJSON_WESTMERE_INTRINSICS_H #define SIMDJSON_WESTMERE_INTRINSICS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #if SIMDJSON_VISUAL_STUDIO // under clang within visual studio, this will include #include // visual studio or clang #else #include // elsewhere #endif // SIMDJSON_VISUAL_STUDIO #if SIMDJSON_CLANG_VISUAL_STUDIO /** * You are not supposed, normally, to include these * headers directly. Instead you should either include intrin.h * or x86intrin.h. However, when compiling with clang * under Windows (i.e., when _MSC_VER is set), these headers * only get included *if* the corresponding features are detected * from macros: */ #include // for _mm_alignr_epi8 #include // for _mm_clmulepi64_si128 #endif static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); #endif // SIMDJSON_WESTMERE_INTRINSICS_H /* end file simdjson/westmere/intrinsics.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace numberparsing { /** @private */ static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { // this actually computes *16* values so we are being wasteful. const __m128i ascii0 = _mm_set1_epi8('0'); const __m128i mul_1_10 = _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); const __m128i mul_1_10000 = _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); const __m128i input = _mm_sub_epi8( _mm_loadu_si128(reinterpret_cast(chars)), ascii0); const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); const __m128i t3 = _mm_packus_epi32(t2, t2); const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); return _mm_cvtsi128_si32( t4); // only captures the sum of the first 8 digits, drop the rest } /** @private */ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { internal::value128 answer; #if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS #ifdef _M_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); answer.low = value1 * value2; #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 #endif // _M_ARM64 #else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); #endif return answer; } } // namespace numberparsing } // namespace westmere } // namespace simdjson #define SIMDJSON_SWAR_NUMBER_PARSING 1 #endif // SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H /* end file simdjson/westmere/numberparsing_defs.h */ /* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ /* begin file simdjson/westmere/simd.h */ #ifndef SIMDJSON_WESTMERE_SIMD_H #define SIMDJSON_WESTMERE_SIMD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace { namespace simd { template struct base { __m128i value; // Zero constructor simdjson_inline base() : value{__m128i()} {} // Conversion from SIMD register simdjson_inline base(const __m128i _value) : value(_value) {} // Conversion to SIMD register simdjson_inline operator const __m128i&() const { return this->value; } simdjson_inline operator __m128i&() { return this->value; } // Bit operations simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } }; template> struct base8: base> { typedef uint16_t bitmask_t; typedef uint32_t bitmask2_t; simdjson_inline base8() : base>() {} simdjson_inline base8(const __m128i _value) : base>(_value) {} friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } static const int SIZE = sizeof(base>::value); template simdjson_inline simd8 prev(const simd8 prev_chunk) const { return _mm_alignr_epi8(*this, prev_chunk, 16 - N); } }; // SIMD byte mask type (returned by things like eq and gt) template<> struct simd8: base8 { static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } simdjson_inline simd8() : base8() {} simdjson_inline simd8(const __m128i _value) : base8(_value) {} // Splat constructor simdjson_inline simd8(bool _value) : base8(splat(_value)) {} simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } simdjson_inline simd8 operator~() const { return *this ^ true; } }; template struct base8_numeric: base8 { static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } static simdjson_inline simd8 load(const T values[16]) { return _mm_loadu_si128(reinterpret_cast(values)); } // Repeat 16 values as many times as necessary (usually for lookup tables) static simdjson_inline simd8 repeat_16( T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } simdjson_inline base8_numeric() : base8() {} simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} // Store to array simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } // Override to distinguish from bool version simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } // Addition/subtraction are the same for signed and unsigned simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) template simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return _mm_shuffle_epi8(lookup_table, *this); } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes // get written. // Design consideration: it seems like a function with the // signature simd8 compress(uint32_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template simdjson_inline void compress(uint16_t mask, L * output) const { using internal::thintable_epi8; using internal::BitsSetTable256mul2; using internal::pshufb_combine_table; // this particular implementation was inspired by work done by @animetosho // we do it in two steps, first 8 bytes and then second 8 bytes uint8_t mask1 = uint8_t(mask); // least significant 8 bits uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits // next line just loads the 64-bit values thintable_epi8[mask1] and // thintable_epi8[mask2] into a 128-bit register, using only // two instructions on most compilers. __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); // we increment by 0x08 the second half of the mask shufmask = _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); // this is the version "nearly pruned" __m128i pruned = _mm_shuffle_epi8(*this, shufmask); // we still need to put the two halves together. // we compute the popcount of the first half: int pop1 = BitsSetTable256mul2[mask1]; // then load the corresponding mask, what it does is to write // only the first pop1 bytes from the first 8 bytes, and then // it fills in with the bytes from the second 8 bytes + some filling // at the end. __m128i compactmask = _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); __m128i answer = _mm_shuffle_epi8(pruned, compactmask); _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); } template simdjson_inline simd8 lookup_16( L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, L replace12, L replace13, L replace14, L replace15) const { return lookup_16(simd8::repeat_16( replace0, replace1, replace2, replace3, replace4, replace5, replace6, replace7, replace8, replace9, replace10, replace11, replace12, replace13, replace14, replace15 )); } }; // Signed bytes template<> struct simd8 : base8_numeric { simdjson_inline simd8() : base8_numeric() {} simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) : simd8(_mm_setr_epi8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Order-sensitive comparisons simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } }; // Unsigned bytes template<> struct simd8: base8_numeric { simdjson_inline simd8() : base8_numeric() {} simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Array constructor simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) : simd8(_mm_setr_epi8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Saturated math simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } // Order-specific operations simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } // Same as >, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } // Same as <, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } // Bit-specific operations simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } template simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } template simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } // Get one of the bits and make a bitmask out of it. // e.g. value.get_bit<7>() gets the high bit template simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } }; template struct simd8x64 { static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); const simd8 chunks[NUM_CHUNKS]; simd8x64(const simd8x64& o) = delete; // no copy allowed simd8x64& operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr+sizeof(simd8)*0); this->chunks[1].store(ptr+sizeof(simd8)*1); this->chunks[2].store(ptr+sizeof(simd8)*2); this->chunks[3].store(ptr+sizeof(simd8)*3); } simdjson_inline simd8 reduce_or() const { return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); } simdjson_inline uint64_t compress(uint64_t mask, T * output) const { this->chunks[0].compress(uint16_t(mask), output); this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); return 64 - count_ones(mask); } simdjson_inline uint64_t to_bitmask() const { uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); uint64_t r1 = this->chunks[1].to_bitmask() ; uint64_t r2 = this->chunks[2].to_bitmask() ; uint64_t r3 = this->chunks[3].to_bitmask() ; return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); } simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] == mask, this->chunks[1] == mask, this->chunks[2] == mask, this->chunks[3] == mask ).to_bitmask(); } simdjson_inline uint64_t eq(const simd8x64 &other) const { return simd8x64( this->chunks[0] == other.chunks[0], this->chunks[1] == other.chunks[1], this->chunks[2] == other.chunks[2], this->chunks[3] == other.chunks[3] ).to_bitmask(); } simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] <= mask, this->chunks[1] <= mask, this->chunks[2] <= mask, this->chunks[3] <= mask ).to_bitmask(); } }; // struct simd8x64 } // namespace simd } // unnamed namespace } // namespace westmere } // namespace simdjson #endif // SIMDJSON_WESTMERE_SIMD_INPUT_H /* end file simdjson/westmere/simd.h */ /* including simdjson/westmere/stringparsing_defs.h: #include "simdjson/westmere/stringparsing_defs.h" */ /* begin file simdjson/westmere/stringparsing_defs.h */ #ifndef SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H #define SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H /* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ /* begin file simdjson/westmere/bitmanipulation.h */ #ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H #define SIMDJSON_WESTMERE_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace { // We sometimes call trailing_zero on inputs that are zero, // but the algorithms do not end up using the returned value. // Sadly, sanitizers are not smart enough to figure it out. SIMDJSON_NO_SANITIZE_UNDEFINED // This function can be used safely even if not all bytes have been // initialized. // See issue https://github.com/simdjson/simdjson/issues/1965 SIMDJSON_NO_SANITIZE_MEMORY simdjson_inline int trailing_zeroes(uint64_t input_num) { #if SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long ret; // Search the mask data from least significant bit (LSB) // to the most significant bit (MSB) for a set bit (1). _BitScanForward64(&ret, input_num); return (int)ret; #else // SIMDJSON_REGULAR_VISUAL_STUDIO return __builtin_ctzll(input_num); #endif // SIMDJSON_REGULAR_VISUAL_STUDIO } /* result might be undefined when input_num is zero */ simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return input_num & (input_num-1); } /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { #if SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long leading_zero = 0; // Search the mask data from most significant bit (MSB) // to least significant bit (LSB) for a set bit (1). if (_BitScanReverse64(&leading_zero, input_num)) return (int)(63 - leading_zero); else return 64; #else return __builtin_clzll(input_num); #endif// SIMDJSON_REGULAR_VISUAL_STUDIO } #if SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { // note: we do not support legacy 32-bit Windows in this kernel return __popcnt64(input_num);// Visual Studio wants two underscores } #else simdjson_inline long long int count_ones(uint64_t input_num) { return _popcnt64(input_num); } #endif simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { #if SIMDJSON_REGULAR_VISUAL_STUDIO return _addcarry_u64(0, value1, value2, reinterpret_cast(result)); #else return __builtin_uaddll_overflow(value1, value2, reinterpret_cast(result)); #endif } } // unnamed namespace } // namespace westmere } // namespace simdjson #endif // SIMDJSON_WESTMERE_BITMANIPULATION_H /* end file simdjson/westmere/bitmanipulation.h */ /* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ /* begin file simdjson/westmere/simd.h */ #ifndef SIMDJSON_WESTMERE_SIMD_H #define SIMDJSON_WESTMERE_SIMD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace { namespace simd { template struct base { __m128i value; // Zero constructor simdjson_inline base() : value{__m128i()} {} // Conversion from SIMD register simdjson_inline base(const __m128i _value) : value(_value) {} // Conversion to SIMD register simdjson_inline operator const __m128i&() const { return this->value; } simdjson_inline operator __m128i&() { return this->value; } // Bit operations simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } }; template> struct base8: base> { typedef uint16_t bitmask_t; typedef uint32_t bitmask2_t; simdjson_inline base8() : base>() {} simdjson_inline base8(const __m128i _value) : base>(_value) {} friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } static const int SIZE = sizeof(base>::value); template simdjson_inline simd8 prev(const simd8 prev_chunk) const { return _mm_alignr_epi8(*this, prev_chunk, 16 - N); } }; // SIMD byte mask type (returned by things like eq and gt) template<> struct simd8: base8 { static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } simdjson_inline simd8() : base8() {} simdjson_inline simd8(const __m128i _value) : base8(_value) {} // Splat constructor simdjson_inline simd8(bool _value) : base8(splat(_value)) {} simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } simdjson_inline simd8 operator~() const { return *this ^ true; } }; template struct base8_numeric: base8 { static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } static simdjson_inline simd8 load(const T values[16]) { return _mm_loadu_si128(reinterpret_cast(values)); } // Repeat 16 values as many times as necessary (usually for lookup tables) static simdjson_inline simd8 repeat_16( T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } simdjson_inline base8_numeric() : base8() {} simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} // Store to array simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } // Override to distinguish from bool version simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } // Addition/subtraction are the same for signed and unsigned simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) template simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return _mm_shuffle_epi8(lookup_table, *this); } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes // get written. // Design consideration: it seems like a function with the // signature simd8 compress(uint32_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template simdjson_inline void compress(uint16_t mask, L * output) const { using internal::thintable_epi8; using internal::BitsSetTable256mul2; using internal::pshufb_combine_table; // this particular implementation was inspired by work done by @animetosho // we do it in two steps, first 8 bytes and then second 8 bytes uint8_t mask1 = uint8_t(mask); // least significant 8 bits uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits // next line just loads the 64-bit values thintable_epi8[mask1] and // thintable_epi8[mask2] into a 128-bit register, using only // two instructions on most compilers. __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); // we increment by 0x08 the second half of the mask shufmask = _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); // this is the version "nearly pruned" __m128i pruned = _mm_shuffle_epi8(*this, shufmask); // we still need to put the two halves together. // we compute the popcount of the first half: int pop1 = BitsSetTable256mul2[mask1]; // then load the corresponding mask, what it does is to write // only the first pop1 bytes from the first 8 bytes, and then // it fills in with the bytes from the second 8 bytes + some filling // at the end. __m128i compactmask = _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); __m128i answer = _mm_shuffle_epi8(pruned, compactmask); _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); } template simdjson_inline simd8 lookup_16( L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, L replace12, L replace13, L replace14, L replace15) const { return lookup_16(simd8::repeat_16( replace0, replace1, replace2, replace3, replace4, replace5, replace6, replace7, replace8, replace9, replace10, replace11, replace12, replace13, replace14, replace15 )); } }; // Signed bytes template<> struct simd8 : base8_numeric { simdjson_inline simd8() : base8_numeric() {} simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) : simd8(_mm_setr_epi8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Order-sensitive comparisons simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } }; // Unsigned bytes template<> struct simd8: base8_numeric { simdjson_inline simd8() : base8_numeric() {} simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Array constructor simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) : simd8(_mm_setr_epi8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) { return simd8( v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Saturated math simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } // Order-specific operations simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } // Same as >, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } // Same as <, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } // Bit-specific operations simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } template simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } template simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } // Get one of the bits and make a bitmask out of it. // e.g. value.get_bit<7>() gets the high bit template simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } }; template struct simd8x64 { static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); const simd8 chunks[NUM_CHUNKS]; simd8x64(const simd8x64& o) = delete; // no copy allowed simd8x64& operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr+sizeof(simd8)*0); this->chunks[1].store(ptr+sizeof(simd8)*1); this->chunks[2].store(ptr+sizeof(simd8)*2); this->chunks[3].store(ptr+sizeof(simd8)*3); } simdjson_inline simd8 reduce_or() const { return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); } simdjson_inline uint64_t compress(uint64_t mask, T * output) const { this->chunks[0].compress(uint16_t(mask), output); this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); return 64 - count_ones(mask); } simdjson_inline uint64_t to_bitmask() const { uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); uint64_t r1 = this->chunks[1].to_bitmask() ; uint64_t r2 = this->chunks[2].to_bitmask() ; uint64_t r3 = this->chunks[3].to_bitmask() ; return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); } simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] == mask, this->chunks[1] == mask, this->chunks[2] == mask, this->chunks[3] == mask ).to_bitmask(); } simdjson_inline uint64_t eq(const simd8x64 &other) const { return simd8x64( this->chunks[0] == other.chunks[0], this->chunks[1] == other.chunks[1], this->chunks[2] == other.chunks[2], this->chunks[3] == other.chunks[3] ).to_bitmask(); } simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] <= mask, this->chunks[1] <= mask, this->chunks[2] <= mask, this->chunks[3] <= mask ).to_bitmask(); } }; // struct simd8x64 } // namespace simd } // unnamed namespace } // namespace westmere } // namespace simdjson #endif // SIMDJSON_WESTMERE_SIMD_INPUT_H /* end file simdjson/westmere/simd.h */ namespace simdjson { namespace westmere { namespace { using namespace simd; // Holds backslashes and quotes locations. struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return bs_bits != 0; } simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } uint32_t bs_bits; uint32_t quote_bits; }; // struct backslash_and_quote simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); simd8 v0(src); simd8 v1(src + 16); v0.store(dst); v1.store(dst + 16); uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); return { uint32_t(bs_and_quote), // bs_bits uint32_t(bs_and_quote >> 32) // quote_bits }; } } // unnamed namespace } // namespace westmere } // namespace simdjson #endif // SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H /* end file simdjson/westmere/stringparsing_defs.h */ /* end file simdjson/westmere/begin.h */ /* including simdjson/generic/ondemand/amalgamated.h for westmere: #include "simdjson/generic/ondemand/amalgamated.h" */ /* begin file simdjson/generic/ondemand/amalgamated.h for westmere */ #if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) #error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! #endif // Stuff other things depend on /* including simdjson/generic/ondemand/base.h for westmere: #include "simdjson/generic/ondemand/base.h" */ /* begin file simdjson/generic/ondemand/base.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { /** * A fast, simple, DOM-like interface that parses JSON as you use it. * * Designed for maximum speed and a lower memory profile. */ namespace ondemand { /** Represents the depth of a JSON value (number of nested arrays/objects). */ using depth_t = int32_t; /** @copydoc simdjson::westmere::number_type */ using number_type = simdjson::westmere::number_type; /** @private Position in the JSON buffer indexes */ using token_position = const uint32_t *; class array; class array_iterator; class document; class document_reference; class document_stream; class field; class json_iterator; enum class json_type; struct number; class object; class object_iterator; class parser; class raw_json_string; class token_iterator; class value; class value_iterator; } // namespace ondemand } // namespace westmere } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H /* end file simdjson/generic/ondemand/base.h for westmere */ /* including simdjson/generic/ondemand/value_iterator.h for westmere: #include "simdjson/generic/ondemand/value_iterator.h" */ /* begin file simdjson/generic/ondemand/value_iterator.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { /** * Iterates through a single JSON value at a particular depth. * * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects * the caller to call the right ones. * * @private This is not intended for external use. */ class value_iterator { protected: /** The underlying JSON iterator */ json_iterator *_json_iter{}; /** The depth of this value */ depth_t _depth{}; /** * The starting token index for this value */ token_position _start_position{}; public: simdjson_inline value_iterator() noexcept = default; /** * Denote that we're starting a document. */ simdjson_inline void start_document() noexcept; /** * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. * * Optimized for scalars. */ simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; /** * Tell whether the iterator is at the EOF mark */ simdjson_inline bool at_end() const noexcept; /** * Tell whether the iterator is at the start of the value */ simdjson_inline bool at_start() const noexcept; /** * Tell whether the value is open--if the value has not been used, or the array/object is still open. */ simdjson_inline bool is_open() const noexcept; /** * Tell whether the value is at an object's first field (just after the {). */ simdjson_inline bool at_first_field() const noexcept; /** * Abandon all iteration. */ simdjson_inline void abandon() noexcept; /** * Get the child value as a value_iterator. */ simdjson_inline value_iterator child_value() const noexcept; /** * Get the depth of this value. */ simdjson_inline int32_t depth() const noexcept; /** * Get the JSON type of this value. * * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result type() const noexcept; /** * @addtogroup object Object iteration * * Methods to iterate and find object fields. These methods generally *assume* the value is * actually an object; the caller is responsible for keeping track of that fact. * * @{ */ /** * Start an object iteration. * * @returns Whether the object had any fields (returns false for empty). * @error INCORRECT_TYPE if there is no opening { */ simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; /** * Start an object iteration from the root. * * @returns Whether the object had any fields (returns false for empty). * @error INCORRECT_TYPE if there is no opening { * @error TAPE_ERROR if there is no matching } at end of document */ simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; /** * Checks whether an object could be started from the root. May be called by start_root_object. * * @returns SUCCESS if it is possible to safely start an object from the root (document level). * @error INCORRECT_TYPE if there is no opening { * @error TAPE_ERROR if there is no matching } at end of document */ simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; /** * Start an object iteration after the user has already checked and moved past the {. * * Does not move the iterator unless the object is empty ({}). * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** * Start an object iteration from the root, after the user has already checked and moved past the {. * * Does not move the iterator unless the object is empty ({}). * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; /** * Moves to the next field in an object. * * Looks for , and }. If } is found, the object is finished and the iterator advances past it. * Otherwise, it advances to the next value. * * @return whether there is another field in the object. * @error TAPE_ERROR If there is a comma missing between fields. * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. */ simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; /** * Get the current field's key. */ simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; /** * Pass the : in the field and move to its value. */ simdjson_warn_unused simdjson_inline error_code field_value() noexcept; /** * Find the next field with the given key. * * Assumes you have called next_field() or otherwise matched the previous value. * * This means the iterator must be sitting at the next key: * * ``` * { "a": 1, "b": 2 } * ^ * ``` * * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; /** * Find the next field with the given key, *without* unescaping. This assumes object order: it * will not find the field if it was already passed when looking for some *other* field. * * Assumes you have called next_field() or otherwise matched the previous value. * * This means the iterator must be sitting at the next key: * * ``` * { "a": 1, "b": 2 } * ^ * ``` * * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; /** * Find the field with the given key without regard to order, and *without* unescaping. * * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. * * Assumes you have called next_field() or otherwise matched the previous value. * * This means the iterator must be sitting at the next key: * * ``` * { "a": 1, "b": 2 } * ^ * ``` * * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; /** @} */ /** * @addtogroup array Array iteration * Methods to iterate over array elements. These methods generally *assume* the value is actually * an object; the caller is responsible for keeping track of that fact. * @{ */ /** * Check for an opening [ and start an array iteration. * * @returns Whether the array had any elements (returns false for empty). * @error INCORRECT_TYPE If there is no [. */ simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; /** * Check for an opening [ and start an array iteration while at the root. * * @returns Whether the array had any elements (returns false for empty). * @error INCORRECT_TYPE If there is no [. * @error TAPE_ERROR if there is no matching ] at end of document */ simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; /** * Checks whether an array could be started from the root. May be called by start_root_array. * * @returns SUCCESS if it is possible to safely start an array from the root (document level). * @error INCORRECT_TYPE If there is no [. * @error TAPE_ERROR if there is no matching ] at end of document */ simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; /** * Start an array iteration, after the user has already checked and moved past the [. * * Does not move the iterator unless the array is empty ([]). * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** * Start an array iteration from the root, after the user has already checked and moved past the [. * * Does not move the iterator unless the array is empty ([]). * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; /** * Moves to the next element in an array. * * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. * Otherwise, it advances to the next value. * * @return Whether there is another element in the array. * @error TAPE_ERROR If there is a comma missing between elements. */ simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; /** * Get a child value iterator. */ simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; /** @} */ /** * @defgroup scalar Scalar values * @addtogroup scalar * @{ */ simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; template simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; simdjson_warn_unused simdjson_inline bool is_negative() noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; template simdjson_warn_unused simdjson_inline error_code get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; simdjson_inline error_code error() const noexcept; simdjson_inline uint8_t *&string_buf_loc() noexcept; simdjson_inline const json_iterator &json_iter() const noexcept; simdjson_inline json_iterator &json_iter() noexcept; simdjson_inline void assert_is_valid() const noexcept; simdjson_inline bool is_valid() const noexcept; /** @} */ protected: /** * Restarts an array iteration. * @returns Whether the array has any elements (returns false for empty). */ simdjson_inline simdjson_result reset_array() noexcept; /** * Restarts an object iteration. * @returns Whether the object has any fields (returns false for empty). */ simdjson_inline simdjson_result reset_object() noexcept; /** * move_at_start(): moves us so that we are pointing at the beginning of * the container. It updates the index so that at_start() is true and it * syncs the depth. The user can then create a new container instance. * * Usage: used with value::count_elements(). **/ simdjson_inline void move_at_start() noexcept; /** * move_at_container_start(): moves us so that we are pointing at the beginning of * the container so that assert_at_container_start() passes. * * Usage: used with reset_array() and reset_object(). **/ simdjson_inline void move_at_container_start() noexcept; /* Useful for debugging and logging purposes. */ inline std::string to_string() const noexcept; simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; simdjson_inline const uint8_t *peek_start() const noexcept; simdjson_inline uint32_t peek_start_length() const noexcept; /** * The general idea of the advance_... methods and the peek_* methods * is that you first peek and check that you have desired type. If you do, * and only if you do, then you advance. * * We used to unconditionally advance. But this made reasoning about our * current state difficult. * Suppose you always advance. Look at the 'value' matching the key * "shadowable" in the following example... * * ({"globals":{"a":{"shadowable":[}}}}) * * If the user thinks it is a Boolean and asks for it, then we check the '[', * decide it is not a Boolean, but still move into the next character ('}'). Now * we are left pointing at '}' right after a '['. And we have not yet reported * an error, only that we do not have a Boolean. * * If, instead, you just stand your ground until it is content that you know, then * you will only even move beyond the '[' if the user tells you that you have an * array. So you will be at the '}' character inside the array and, hopefully, you * will then catch the error because an array cannot start with '}', but the code * processing Boolean values does not know this. * * So the contract is: first call 'peek_...' and then call 'advance_...' only * if you have determined that it is a type you can handle. * * Unfortunately, it makes the code more verbose, longer and maybe more error prone. */ simdjson_inline void advance_scalar(const char *type) noexcept; simdjson_inline void advance_root_scalar(const char *type) noexcept; simdjson_inline void advance_non_root_scalar(const char *type) noexcept; simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; simdjson_inline error_code end_container() noexcept; /** * Advance to a place expecting a value (increasing depth). * * @return The current token (the one left behind). * @error TAPE_ERROR If the document ended early. */ simdjson_inline simdjson_result advance_to_value() noexcept; simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; simdjson_inline bool is_at_start() const noexcept; /** * is_at_iterator_start() returns true on an array or object after it has just been * created, whether the instance is empty or not. * * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) */ simdjson_inline bool is_at_iterator_start() const noexcept; /** * Assuming that we are within an object, this returns true if we * are pointing at a key. * * Usage: the skip_child() method should never be used while we are pointing * at a key inside an object. */ simdjson_inline bool is_at_key() const noexcept; inline void assert_at_start() const noexcept; inline void assert_at_container_start() const noexcept; inline void assert_at_root() const noexcept; inline void assert_at_child() const noexcept; inline void assert_at_next() const noexcept; inline void assert_at_non_root_start() const noexcept; /** Get the starting position of this value */ simdjson_inline token_position start_position() const noexcept; /** @copydoc error_code json_iterator::position() const noexcept; */ simdjson_inline token_position position() const noexcept; /** @copydoc error_code json_iterator::end_position() const noexcept; */ simdjson_inline token_position last_position() const noexcept; /** @copydoc error_code json_iterator::end_position() const noexcept; */ simdjson_inline token_position end_position() const noexcept; /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ simdjson_inline error_code report_error(error_code error, const char *message) noexcept; friend class document; friend class object; friend class array; friend class value; }; // value_iterator } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public westmere::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(westmere::ondemand::value_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H /* end file simdjson/generic/ondemand/value_iterator.h for westmere */ /* including simdjson/generic/ondemand/value.h for westmere: #include "simdjson/generic/ondemand/value.h" */ /* begin file simdjson/generic/ondemand/value.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { /** * An ephemeral JSON value returned during iteration. It is only valid for as long as you do * not access more data in the JSON document. */ class value { public: /** * Create a new invalid value. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline value() noexcept = default; /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); } /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template simdjson_inline error_code get(T &out) noexcept; /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @returns INCORRECT_TYPE If the JSON value is not an array. */ simdjson_inline simdjson_result get_array() noexcept; /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @returns INCORRECT_TYPE If the JSON value is not an object. */ simdjson_inline simdjson_result get_object() noexcept; /** * Cast this JSON value to an unsigned integer. * * @returns A unsigned 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64() noexcept; /** * Cast this JSON value (inside string) to a unsigned integer. * * @returns A unsigned 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64_in_string() noexcept; /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64() noexcept; /** * Cast this JSON value (inside string) to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64_in_string() noexcept; /** * Cast this JSON value to a double. * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double() noexcept; /** * Cast this JSON value (inside string) to a double * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double_in_string() noexcept; /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). * * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; /** * Attempts to fill the provided std::string reference with the parsed value of the current string. * * The string is guaranteed to be valid UTF-8. * * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. * We recommend you avoid allocating an std::string unless you need to. * * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a "wobbly" string. * * The string is may not be a valid UTF-8 string. * See https://simonsapin.github.io/wtf-8/ * * Important: a value should be consumed once. Calling get_wobbly_string() twice on the same value * is an error. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_wobbly_string() noexcept; /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_raw_json_string() noexcept; /** * Cast this JSON value to a bool. * * @returns A bool value. * @returns INCORRECT_TYPE if the JSON value is not true or false. */ simdjson_inline simdjson_result get_bool() noexcept; /** * Checks if this JSON value is null. If and only if the value is * null, then it is consumed (we advance). If we find a token that * begins with 'n' but is not 'null', then an error is returned. * * @returns Whether the value is null. * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. */ simdjson_inline simdjson_result is_null() noexcept; #if SIMDJSON_EXCEPTIONS /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. */ simdjson_inline operator array() noexcept(false); /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. */ simdjson_inline operator object() noexcept(false); /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline operator uint64_t() noexcept(false); /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. */ simdjson_inline operator int64_t() noexcept(false); /** * Cast this JSON value to a double. * * @returns A double. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. */ simdjson_inline operator double() noexcept(false); /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * Equivalent to get(). * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator std::string_view() noexcept(false); /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator raw_json_string() noexcept(false); /** * Cast this JSON value to a bool. * * @returns A bool value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. */ simdjson_inline operator bool() noexcept(false); #endif /** * Begin array iteration. * * Part of the std::iterable interface. * * @returns INCORRECT_TYPE If the JSON value is not an array. */ simdjson_inline simdjson_result begin() & noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result end() & noexcept; /** * This method scans the array and counts the number of elements. * The count_elements method should always be called before you have begun * iterating through the array: it is expected that you are pointing at * the beginning of the array. * The runtime complexity is linear in the size of the array. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * Performance hint: You should only call count_elements() as a last * resort as it may require scanning the document twice or more. */ simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the object and counts the number of key-value pairs. * The count_fields method should always be called before you have begun * iterating through the object: it is expected that you are pointing at * the beginning of the object. * The runtime complexity is linear in the size of the object. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an object is empty, it is more performant to use * the is_empty() method on the object instance. * * Performance hint: You should only call count_fields() as a last * resort as it may require scanning the document twice or more. */ simdjson_inline simdjson_result count_fields() & noexcept; /** * Get the value at the given index in the array. This function has linear-time complexity. * This function should only be called once on an array instance since the array iterator is not reset between each call. * * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field(const char *key) noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](const char *key) noexcept; /** * Get the type of this JSON value. It does not validate or consume the value. * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * * @return The type of JSON value (json_type::array, json_type::object, json_type::string, * json_type::number, json_type::boolean, or json_type::null). * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result type() noexcept; /** * Checks whether the value is a scalar (string, number, null, Boolean). * Returns false when there it is an array or object. * * @returns true if the type is string, number, null, Boolean * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result is_scalar() noexcept; /** * Checks whether the value is a negative number. * * @returns true if the number if negative. */ simdjson_inline bool is_negative() noexcept; /** * Checks whether the value is an integer number. Note that * this requires to partially parse the number string. If * the value is determined to be an integer, it may still * not parse properly as an integer in subsequent steps * (e.g., it might overflow). * * Performance note: if you call this function systematically * before parsing a number, you may have fallen for a performance * anti-pattern. * * @returns true if the number if negative. */ simdjson_inline simdjson_result is_integer() noexcept; /** * Determine the number type (integer or floating-point number) as quickly * as possible. This function does not fully validate the input. It is * useful when you only need to classify the numbers, without parsing them. * * If you are planning to retrieve the value or you need full validation, * consider using the get_number() method instead: it will fully parse * and validate the input, and give you access to the type: * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have * an integer greater or equal to 9223372036854775808 * get_number_type() is number_type::signed_integer if we have an * integer that is less than 9223372036854775808 * Otherwise, get_number_type() has value number_type::floating_point_number * * This function requires processing the number string, but it is expected * to be faster than get_number().get_number_type() because it is does not * parse the number value. * * @returns the type of the number */ simdjson_inline simdjson_result get_number_type() noexcept; /** * Attempt to parse an ondemand::number. An ondemand::number may * contain an integer value or a floating-point value, the simdjson * library will autodetect the type. Thus it is a dynamically typed * number. Before accessing the value, you must determine the detected * type. * * number.get_number_type() is number_type::signed_integer if we have * an integer in [-9223372036854775808,9223372036854775808) * You can recover the value by calling number.get_int64() and you * have that number.is_int64() is true. * * number.get_number_type() is number_type::unsigned_integer if we have * an integer in [9223372036854775808,18446744073709551616) * You can recover the value by calling number.get_uint64() and you * have that number.is_uint64() is true. * * Otherwise, number.get_number_type() has value number_type::floating_point_number * and we have a binary64 number. * You can recover the value by calling number.get_double() and you * have that number.is_double() is true. * * You must check the type before accessing the value: it is an error * to call "get_int64()" when number.get_number_type() is not * number_type::signed_integer and when number.is_int64() is false. * * Performance note: this is designed with performance in mind. When * calling 'get_number()', you scan the number string only once, determining * efficiently the type and storing it in an efficient manner. */ simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; /** * Get the raw JSON for this token. * * The string_view will always point into the input buffer. * * The string_view will start at the beginning of the token, and include the entire token * *as well as all spaces until the next token (or EOF).* This means, for example, that a * string token always begins with a " and is always terminated by the final ", possibly * followed by a number of spaces. * * The string_view is *not* null-terminated. However, if this is a scalar (string, number, * boolean, or null), the character after the end of the string_view is guaranteed to be * a non-space token. * * Tokens include: * - { * - [ * - "a string (possibly with UTF-8 or backslashed characters like \\\")". * - -1.2e-100 * - true * - false * - null * * See also value::raw_json(). */ simdjson_inline std::string_view raw_json_token() noexcept; /** * Get a string_view pointing at this value in the JSON document. * If this element is an array or an object, it consumes the array or the object * and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. * If this element is a scalar (string, number, Boolean, null), it returns what * raw_json_token() would return. */ simdjson_inline simdjson_result raw_json() noexcept; /** * Returns the current location in the document if in bounds. */ simdjson_inline simdjson_result current_location() noexcept; /** * Returns the current depth in the document if in bounds. * * E.g., * 0 = finished with document * 1 = document root value (could be [ or {, not yet known) * 2 = , or } inside root array/object * 3 = key or value inside root array/object. */ simdjson_inline int32_t current_depth() const noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard. * * ondemand::parser parser; * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/foo/a/1") == 20 * * It is allowed for a key to be the empty string: * * ondemand::parser parser; * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * * Note that at_pointer() called on the document automatically calls the document's rewind * method between each call. It invalidates all previously accessed arrays, objects and values * that have not been consumed. * * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not * standardized (by RFC 6901). We provide some experimental support for JSON pointers * on non-document instances. Yet it is not the case when calling at_pointer on an array * or an object instance: there is no rewind and no invalidation. * * You may only call at_pointer on an array after it has been created, but before it has * been first accessed. When calling at_pointer on an array, the pointer is advanced to * the location indicated by the JSON pointer (in case of success). It is no longer possible * to call at_pointer on the same array. * * You may call at_pointer more than once on an object, but each time the pointer is advanced * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding * key (as well as the current key) can no longer be used with following JSON pointer calls. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; protected: /** * Create a value. */ simdjson_inline value(const value_iterator &iter) noexcept; /** * Skip this value, allowing iteration to continue. */ simdjson_inline void skip() noexcept; /** * Start a value at the current position. * * (It should already be started; this is just a self-documentation method.) */ static simdjson_inline value start(const value_iterator &iter) noexcept; /** * Resume a value. */ static simdjson_inline value resume(const value_iterator &iter) noexcept; /** * Get the object, starting or resuming it as necessary */ simdjson_inline simdjson_result start_or_resume_object() noexcept; // simdjson_inline void log_value(const char *type) const noexcept; // simdjson_inline void log_error(const char *message) const noexcept; value_iterator iter{}; friend class document; friend class array_iterator; friend class field; friend class object; friend struct simdjson_result; friend struct simdjson_result; }; } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public westmere::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(westmere::ondemand::value &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result get_array() noexcept; simdjson_inline simdjson_result get_object() noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() noexcept; template simdjson_inline error_code get(T &out) noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator westmere::ondemand::array() noexcept(false); simdjson_inline operator westmere::ondemand::object() noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator westmere::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field(const char *key) noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](const char *key) noexcept; /** * Get the type of this JSON value. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). */ simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result raw_json() noexcept; /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ simdjson_inline simdjson_result current_location() noexcept; /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H /* end file simdjson/generic/ondemand/value.h for westmere */ /* including simdjson/generic/ondemand/logger.h for westmere: #include "simdjson/generic/ondemand/logger.h" */ /* begin file simdjson/generic/ondemand/logger.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { // Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical // that the call to the log functions be side-effect free. Thus, for example, you should not // create temporary std::string instances. namespace logger { enum class log_level : int32_t { info = 0, error = 1 }; #if SIMDJSON_VERBOSE_LOGGING static constexpr const bool LOG_ENABLED = true; #else static constexpr const bool LOG_ENABLED = false; #endif // We do not want these functions to be 'really inlined' since real inlining is // for performance purposes and if you are using the loggers, you do not care about // performance (or should not). static inline void log_headers() noexcept; // If args are provided, title will be treated as format string template static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; template static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; } // namespace logger } // namespace ondemand } // namespace westmere } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H /* end file simdjson/generic/ondemand/logger.h for westmere */ /* including simdjson/generic/ondemand/token_iterator.h for westmere: #include "simdjson/generic/ondemand/token_iterator.h" */ /* begin file simdjson/generic/ondemand/token_iterator.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { /** * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) * detected by stage 1. * * @private This is not intended for external use. */ class token_iterator { public: /** * Create a new invalid token_iterator. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline token_iterator() noexcept = default; simdjson_inline token_iterator(token_iterator &&other) noexcept = default; simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; simdjson_inline token_iterator(const token_iterator &other) noexcept = default; simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; /** * Advance to the next token (returning the current one). */ simdjson_inline const uint8_t *return_current_and_advance() noexcept; /** * Reports the current offset in bytes from the start of the underlying buffer. */ simdjson_inline uint32_t current_offset() const noexcept; /** * Get the JSON text for a given token (relative). * * This is not null-terminated; it is a view into the JSON. * * @param delta The relative position of the token to retrieve. e.g. 0 = current token, * 1 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** * Get the maximum length of the JSON text for a given token. * * The length will include any whitespace at the end of the token. * * @param delta The relative position of the token to retrieve. e.g. 0 = current token, * 1 = next token, -1 = prev token. */ simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; /** * Get the JSON text for a given token. * * This is not null-terminated; it is a view into the JSON. * * @param position The position of the token. * */ simdjson_inline const uint8_t *peek(token_position position) const noexcept; /** * Get the maximum length of the JSON text for a given token. * * The length will include any whitespace at the end of the token. * * @param position The position of the token. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; /** * Return the current index. */ simdjson_inline token_position position() const noexcept; /** * Reset to a previously saved index. */ simdjson_inline void set_position(token_position target_position) noexcept; // NOTE: we don't support a full C++ iterator interface, because we expect people to make // different calls to advance the iterator based on *their own* state. simdjson_inline bool operator==(const token_iterator &other) const noexcept; simdjson_inline bool operator!=(const token_iterator &other) const noexcept; simdjson_inline bool operator>(const token_iterator &other) const noexcept; simdjson_inline bool operator>=(const token_iterator &other) const noexcept; simdjson_inline bool operator<(const token_iterator &other) const noexcept; simdjson_inline bool operator<=(const token_iterator &other) const noexcept; protected: simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; /** * Get the index of the JSON text for a given token (relative). * * This is not null-terminated; it is a view into the JSON. * * @param delta The relative position of the token to retrieve. e.g. 0 = current token, * 1 = next token, -1 = prev token. */ simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; /** * Get the index of the JSON text for a given token. * * This is not null-terminated; it is a view into the JSON. * * @param position The position of the token. * */ simdjson_inline uint32_t peek_index(token_position position) const noexcept; const uint8_t *buf{}; token_position _position{}; friend class json_iterator; friend class value_iterator; friend class object; template friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; template friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; }; } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public westmere::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(westmere::ondemand::token_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H /* end file simdjson/generic/ondemand/token_iterator.h for westmere */ /* including simdjson/generic/ondemand/json_iterator.h for westmere: #include "simdjson/generic/ondemand/json_iterator.h" */ /* begin file simdjson/generic/ondemand/json_iterator.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { /** * Iterates through JSON tokens, keeping track of depth and string buffer. * * @private This is not intended for external use. */ class json_iterator { protected: token_iterator token{}; ondemand::parser *parser{}; /** * Next free location in the string buffer. * * Used by raw_json_string::unescape() to have a place to unescape strings to. */ uint8_t *_string_buf_loc{}; /** * JSON error, if there is one. * * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. * * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If * this is not elided, we should make sure it's at least not using up a register. Failing that, * we should store it in document so there's only one of them. */ error_code error{SUCCESS}; /** * Depth of the current token in the JSON. * * - 0 = finished with document * - 1 = document root value (could be [ or {, not yet known) * - 2 = , or } inside root array/object * - 3 = key or value inside root array/object. */ depth_t _depth{}; /** * Beginning of the document indexes. * Normally we have root == parser->implementation->structural_indexes.get() * but this may differ, especially in streaming mode (where we have several * documents); */ token_position _root{}; /** * Normally, a json_iterator operates over a single document, but in * some cases, we may have a stream of documents. This attribute is meant * as meta-data: the json_iterator works the same irrespective of the * value of this attribute. */ bool _streaming{false}; public: simdjson_inline json_iterator() noexcept = default; simdjson_inline json_iterator(json_iterator &&other) noexcept; simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; /** * Skips a JSON value, whether it is a scalar, array or object. */ simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; /** * Tell whether the iterator is still at the start */ simdjson_inline bool at_root() const noexcept; /** * Tell whether we should be expected to run in streaming * mode (iterating over many documents). It is pure metadata * that does not affect how the iterator works. It is used by * start_root_array() and start_root_object(). */ simdjson_inline bool streaming() const noexcept; /** * Get the root value iterator */ simdjson_inline token_position root_position() const noexcept; /** * Assert that we are at the document depth (== 1) */ simdjson_inline void assert_at_document_depth() const noexcept; /** * Assert that we are at the root of the document */ simdjson_inline void assert_at_root() const noexcept; /** * Tell whether the iterator is at the EOF mark */ simdjson_inline bool at_end() const noexcept; /** * Tell whether the iterator is live (has not been moved). */ simdjson_inline bool is_alive() const noexcept; /** * Abandon this iterator, setting depth to 0 (as if the document is finished). */ simdjson_inline void abandon() noexcept; /** * Advance the current token without modifying depth. */ simdjson_inline const uint8_t *return_current_and_advance() noexcept; /** * Returns true if there is a single token in the index (i.e., it is * a JSON with a scalar value such as a single number). * * @return whether there is a single token */ simdjson_inline bool is_single_token() const noexcept; /** * Assert that there are at least the given number of tokens left. * * Has no effect in release builds. */ simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; /** * Assert that the given position addresses an actual token (is within bounds). * * Has no effect in release builds. */ simdjson_inline void assert_valid_position(token_position position) const noexcept; /** * Get the JSON text for a given token (relative). * * This is not null-terminated; it is a view into the JSON. * * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** * Get the maximum length of the JSON text for the current token (or relative). * * The length will include any whitespace at the end of the token. * * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. */ simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; /** * Get a pointer to the current location in the input buffer. * * This is not null-terminated; it is a view into the JSON. * * You may be pointing outside of the input buffer: it is not generally * safe to dereference this pointer. */ simdjson_inline const uint8_t *unsafe_pointer() const noexcept; /** * Get the JSON text for a given token. * * This is not null-terminated; it is a view into the JSON. * * @param position The position of the token to retrieve. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek(token_position position) const noexcept; /** * Get the maximum length of the JSON text for the current token (or relative). * * The length will include any whitespace at the end of the token. * * @param position The position of the token to retrieve. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; /** * Get the JSON text for the last token in the document. * * This is not null-terminated; it is a view into the JSON. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ simdjson_inline const uint8_t *peek_last() const noexcept; /** * Ascend one level. * * Validates that the depth - 1 == parent_depth. * * @param parent_depth the expected parent depth. */ simdjson_inline void ascend_to(depth_t parent_depth) noexcept; /** * Descend one level. * * Validates that the new depth == child_depth. * * @param child_depth the expected child depth. */ simdjson_inline void descend_to(depth_t child_depth) noexcept; simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; /** * Get current depth. */ simdjson_inline depth_t depth() const noexcept; /** * Get current (writeable) location in the string buffer. */ simdjson_inline uint8_t *&string_buf_loc() noexcept; /** * Report an unrecoverable error, preventing further iteration. * * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ simdjson_inline error_code report_error(error_code error, const char *message) noexcept; /** * Log error, but don't stop iteration. * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; /** * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with * N bytes of capacity. It will return false if N is too small (smaller than max_len) of if it is zero. * The buffer (tmpbuf) is padded with space characters. */ simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept; simdjson_inline token_position position() const noexcept; /** * Write the raw_json_string to the string buffer and return a string_view. * Each raw_json_string should be unescaped once, or else the string buffer might * overflow. */ simdjson_inline simdjson_result unescape(raw_json_string in, bool allow_replacement) noexcept; simdjson_inline simdjson_result unescape_wobbly(raw_json_string in) noexcept; simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; simdjson_inline error_code consume_character(char c) noexcept; #if SIMDJSON_DEVELOPMENT_CHECKS simdjson_inline token_position start_position(depth_t depth) const noexcept; simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; #endif /* Useful for debugging and logging purposes. */ inline std::string to_string() const noexcept; /** * Returns the current location in the document if in bounds. */ inline simdjson_result current_location() const noexcept; /** * Updates this json iterator so that it is back at the beginning of the document, * as if it had just been created. */ inline void rewind() noexcept; /** * This checks whether the {,},[,] are balanced so that the document * ends with proper zero depth. This requires scanning the whole document * and it may be expensive. It is expected that it will be rarely called. * It does not attempt to match { with } and [ with ]. */ inline bool balanced() const noexcept; protected: simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; /// The last token before the end simdjson_inline token_position last_position() const noexcept; /// The token *at* the end. This points at gibberish and should only be used for comparison. simdjson_inline token_position end_position() const noexcept; /// The end of the buffer. simdjson_inline token_position end() const noexcept; friend class document; friend class document_stream; friend class object; friend class array; friend class value; friend class raw_json_string; friend class parser; friend class value_iterator; template friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; template friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; }; // json_iterator } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public westmere::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(westmere::ondemand::json_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H /* end file simdjson/generic/ondemand/json_iterator.h for westmere */ /* including simdjson/generic/ondemand/json_type.h for westmere: #include "simdjson/generic/ondemand/json_type.h" */ /* begin file simdjson/generic/ondemand/json_type.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { /** * The type of a JSON value. */ enum class json_type { // Start at 1 to catch uninitialized / default values more easily array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) string, ///< A JSON string ( "a" or "hello world\n" ...) boolean, ///< A JSON boolean (true or false) null ///< A JSON null (null) }; /** * A type representing a JSON number. * The design of the struct is deliberately straight-forward. All * functions return standard values with no error check. */ struct number { /** * return the automatically determined type of * the number: number_type::floating_point_number, * number_type::signed_integer or number_type::unsigned_integer. * * enum class number_type { * floating_point_number=1, /// a binary64 number * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement * unsigned_integer /// a positive integer larger or equal to 1<<63 * }; */ simdjson_inline ondemand::number_type get_number_type() const noexcept; /** * return true if the automatically determined type of * the number is number_type::unsigned_integer. */ simdjson_inline bool is_uint64() const noexcept; /** * return the value as a uint64_t, only valid if is_uint64() is true. */ simdjson_inline uint64_t get_uint64() const noexcept; simdjson_inline operator uint64_t() const noexcept; /** * return true if the automatically determined type of * the number is number_type::signed_integer. */ simdjson_inline bool is_int64() const noexcept; /** * return the value as a int64_t, only valid if is_int64() is true. */ simdjson_inline int64_t get_int64() const noexcept; simdjson_inline operator int64_t() const noexcept; /** * return true if the automatically determined type of * the number is number_type::floating_point_number. */ simdjson_inline bool is_double() const noexcept; /** * return the value as a double, only valid if is_double() is true. */ simdjson_inline double get_double() const noexcept; simdjson_inline operator double() const noexcept; /** * Convert the number to a double. Though it always succeed, the conversion * may be lossy if the number cannot be represented exactly. */ simdjson_inline double as_double() const noexcept; protected: /** * The next block of declaration is designed so that we can call the number parsing * functions on a number type. They are protected and should never be used outside * of the core simdjson library. */ friend class value_iterator; template friend error_code numberparsing::slow_float_parsing(simdjson_unused const uint8_t * src, W writer); template friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); template friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); /** Store a signed 64-bit value to the number. */ simdjson_inline void append_s64(int64_t value) noexcept; /** Store an unsigned 64-bit value to the number. */ simdjson_inline void append_u64(uint64_t value) noexcept; /** Store a double value to the number. */ simdjson_inline void append_double(double value) noexcept; /** Specifies that the value is a double, but leave it undefined. */ simdjson_inline void skip_double() noexcept; /** * End of friend declarations. */ /** * Our attributes are a union type (size = 64 bits) * followed by a type indicator. */ union { double floating_point_number; int64_t signed_integer; uint64_t unsigned_integer; } payload{0}; number_type type{number_type::signed_integer}; }; /** * Write the JSON type to the output stream * * @param out The output stream. * @param type The json_type. */ inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; #if SIMDJSON_EXCEPTIONS /** * Send JSON type to an output stream. * * @param out The output stream. * @param type The json_type. * @throw simdjson_error if the result being printed has an error. If there is an error with the * underlying output stream, that error will be propagated (simdjson_error will not be * thrown). */ inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); #endif } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public westmere::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(westmere::ondemand::json_type &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H /* end file simdjson/generic/ondemand/json_type.h for westmere */ /* including simdjson/generic/ondemand/raw_json_string.h for westmere: #include "simdjson/generic/ondemand/raw_json_string.h" */ /* begin file simdjson/generic/ondemand/raw_json_string.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { /** * A string escaped per JSON rules, terminated with quote ("). They are used to represent * unescaped keys inside JSON documents. * * (In other words, a pointer to the beginning of a string, just after the start quote, inside a * JSON file.) * * This class is deliberately simplistic and has little functionality. You can * compare a raw_json_string instance with an unescaped C string, but * that is nearly all you can do. * * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser * instance. Doing so requires you to have a sufficiently large buffer. * * The raw_json_string instances originate typically from field instance which in turn represent * key-value pairs from object instances. From a field instance, you get the raw_json_string * instance by calling key(). You can, if you want a more usable string_view instance, call * the unescaped_key() method on the field instance. You may also create a raw_json_string from * any other string value, with the value.get_raw_json_string() method. Again, you can get * a more usable string_view instance by calling get_string(). * */ class raw_json_string { public: /** * Create a new invalid raw_json_string. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline raw_json_string() noexcept = default; /** * Create a new invalid raw_json_string pointed at the given location in the JSON. * * The given location must be just *after* the beginning quote (") in the JSON file. * * It *must* be terminated by a ", and be a valid JSON string. */ simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; /** * Get the raw pointer to the beginning of the string in the JSON (just after the "). * * It is possible for this function to return a null pointer if the instance * has outlived its existence. */ simdjson_inline const char * raw() const noexcept; /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done) on target.size() characters, * and if the raw_json_string instance has a quote character at byte index target.size(). * We never read more than length + 1 bytes in the raw_json_string instance. * If length is smaller than target.size(), this will return false. * * The std::string_view instance may contain any characters. However, the caller * is responsible for setting length so that length bytes may be read in the * raw_json_string. * * Performance: the comparison may be done using memcmp which may be efficient * for long strings. */ simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done). * The std::string_view instance should not contain unescaped quote characters: * the caller is responsible for this check. See is_free_from_unescaped_quote. * * Performance: the comparison is done byte-by-byte which might be inefficient for * long strings. * * If target is a compile-time constant, and your compiler likes you, * you should be able to do the following without performance penalty... * * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); * s.unsafe_is_equal(target); */ simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; /** * This compares the current instance to the C string target: returns true if * they are byte-by-byte equal (no escaping is done). * The provided C string should not contain an unescaped quote character: * the caller is responsible for this check. See is_free_from_unescaped_quote. * * If target is a compile-time constant, and your compiler likes you, * you should be able to do the following without performance penalty... * * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); * s.unsafe_is_equal(target); */ simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done). */ simdjson_inline bool is_equal(std::string_view target) const noexcept; /** * This compares the current instance to the C string target: returns true if * they are byte-by-byte equal (no escaping is done). */ simdjson_inline bool is_equal(const char* target) const noexcept; /** * Returns true if target is free from unescaped quote. If target is known at * compile-time, we might expect the computation to happen at compile time with * many compilers (not all!). */ static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; private: /** * This will set the inner pointer to zero, effectively making * this instance unusable. */ simdjson_inline void consume() noexcept { buf = nullptr; } /** * Checks whether the inner pointer is non-null and thus usable. */ simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. * The result will be a valid UTF-8. * * ## IMPORTANT: string_view lifetime * * The string_view is only valid until the next parse() call on the parser. * * @param iter A json_iterator, which contains a buffer where the string will be written. * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. */ simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter, bool allow_replacement) const noexcept; /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ * * ## IMPORTANT: string_view lifetime * * The string_view is only valid until the next parse() call on the parser. * * @param iter A json_iterator, which contains a buffer where the string will be written. */ simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(json_iterator &iter) const noexcept; const uint8_t * buf{}; friend class object; friend class field; friend class parser; friend struct simdjson_result; }; simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; /** * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. */ simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public westmere::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(westmere::ondemand::raw_json_string &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape(westmere::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(westmere::ondemand::json_iterator &iter) const noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H /* end file simdjson/generic/ondemand/raw_json_string.h for westmere */ /* including simdjson/generic/ondemand/parser.h for westmere: #include "simdjson/generic/ondemand/parser.h" */ /* begin file simdjson/generic/ondemand/parser.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include namespace simdjson { namespace westmere { namespace ondemand { /** * The default batch size for document_stream instances for this On Demand kernel. * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; /** * Some adversary might try to set the batch size to 0 or 1, which might cause problems. * We set a minimum of 32B since anything else is highly likely to be an error. In practice, * most users will want a much larger batch size. * * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. */ static constexpr size_t MINIMAL_BATCH_SIZE = 32; /** * A JSON fragment iterator. * * This holds the actual iterator as well as the buffer for writing strings. */ class parser { public: /** * Create a JSON parser. * * The new parser will have zero capacity. */ inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; inline parser(parser &&other) noexcept = default; simdjson_inline parser(const parser &other) = delete; simdjson_inline parser &operator=(const parser &other) = delete; simdjson_inline parser &operator=(parser &&other) noexcept = default; /** Deallocate the JSON parser. */ inline ~parser() noexcept = default; /** * Start iterating an on-demand JSON document. * * ondemand::parser parser; * document doc = parser.iterate(json); * * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. * Otherwise the iterate method may return an error. In particular, the whole input should be * valid: we do not attempt to tolerate incorrect content either before or after a JSON * document. If there is a UTF-8 BOM, the parser skips it. * * ### IMPORTANT: Validate what you use * * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to * iterate does not parse and validate the whole document. * * ### IMPORTANT: Buffer Lifetime * * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as * long as the document iteration. * * ### IMPORTANT: Document Lifetime * * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before * you call parse() again or destroy the parser. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * * @param json The JSON to parse. * @param len The length of the JSON. * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). * * @return The document, or an error: * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory * allocation fails. * - EMPTY if the document is all whitespace. * - UTF8_ERROR if the document is not valid UTF-8. * - UNESCAPED_CHARS if a string contains control characters that must be escaped * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; /** * @private * * Start iterating an on-demand JSON document. * * ondemand::parser parser; * json_iterator doc = parser.iterate(json); * * ### IMPORTANT: Buffer Lifetime * * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as * long as the document iteration. * * ### IMPORTANT: Document Lifetime * * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before * you call parse() again or destroy the parser. * * The ondemand::document instance holds the iterator. The document must remain in scope * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * * @param json The JSON to parse. * * @return The iterator, or an error: * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory * allocation fails. * - EMPTY if the document is all whitespace. * - UTF8_ERROR if the document is not valid UTF-8. * - UNESCAPED_CHARS if a string contains control characters that must be escaped * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; /** * Parse a buffer containing many JSON documents. * * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; * ondemand::parser parser; * ondemand::document_stream docs = parser.iterate_many(json); * for (auto & doc : docs) { * std::cout << doc["foo"] << std::endl; * } * // Prints 1 2 3 * * No copy of the input buffer is made. * * The function is lazy: it may be that no more than one JSON document at a time is parsed. * * The caller is responsabile to ensure that the input string data remains unchanged and is * not deleted during the loop. * * ### Format * * The buffer must contain a series of one or more JSON documents, concatenated into a single * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, * then starts parsing the next document at that point. (It does this with more parallelism and * lookahead than you might think, though.) * * documents that consist of an object or array may omit the whitespace between them, concatenating * with no separator. Documents that consist of a single primitive (i.e. documents that are not * arrays or objects) MUST be separated with ASCII whitespace. * * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). * If there is a UTF-8 BOM, the parser skips it. * * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * * ### Threads * * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the * hood to do some lookahead. * * ### Parser Capacity * * If the parser's current capacity is less than batch_size, it will allocate enough capacity * to handle it (up to max_capacity). * * @param buf The concatenated JSON to parse. * @param len The length of the concatenated JSON. * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet * spot is cache-related: small enough to fit in cache, yet big enough to * parse as many documents as possible in one tight loop. * Defaults to 10MB, which has been a reasonable sweet spot in our tests. * @param allow_comma_separated (defaults on false) This allows a mode where the documents are * separated by commas instead of whitespace. It comes with a performance * penalty because the entire document is indexed at once (and the document must be * less than 4 GB), and there is no multithreading. In this mode, the batch_size parameter * is effectively ignored, as it is set to at least the document size. * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: * - MEMALLOC if the parser does not have enough capacity and memory allocation fails * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. * - other json errors if parsing fails. You should not rely on these errors to always the same for the * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). */ inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** The capacity of this parser (the largest document it can process). */ simdjson_inline size_t capacity() const noexcept; /** The maximum capacity of this parser (the largest document it is allowed to process). */ simdjson_inline size_t max_capacity() const noexcept; simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; /** * The maximum depth of this parser (the most deeply nested objects and arrays it can process). * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. */ simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length * and `max_depth` depth. * * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. * * @param capacity The new capacity. * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. * @return The error, if there is one. */ simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; #ifdef SIMDJSON_THREADS_ENABLED /** * The parser instance can use threads when they are available to speed up some * operations. It is enabled by default. Changing this attribute will change the * behavior of the parser for future operations. */ bool threaded{true}; #endif /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. * The result must be valid UTF-8. * The provided pointer is advanced to the end of the string by reference, and a string_view instance * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. * * This unescape function is a low-level function. If you want a more user-friendly approach, you should * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() * instead of get_raw_json_string()). * * ## IMPORTANT: string_view lifetime * * The string_view is only valid as long as the bytes in dst. * * @param raw_json_string input * @param dst A pointer to a buffer at least large enough to write this string as well as * an additional SIMDJSON_PADDING bytes. * @param allow_replacement Whether we allow a replacement if the input string contains unmatched surrogate pairs. * @return A string_view pointing at the unescaped string in dst * @error STRING_ERROR if escapes are incorrect. */ simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement = false) const noexcept; /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. * The result may not be valid UTF-8. See https://simonsapin.github.io/wtf-8/ * The provided pointer is advanced to the end of the string by reference, and a string_view instance * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. * * This unescape function is a low-level function. If you want a more user-friendly approach, you should * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() * instead of get_raw_json_string()). * * ## IMPORTANT: string_view lifetime * * The string_view is only valid as long as the bytes in dst. * * @param raw_json_string input * @param dst A pointer to a buffer at least large enough to write this string as well as * an additional SIMDJSON_PADDING bytes. * @return A string_view pointing at the unescaped string in dst * @error STRING_ERROR if escapes are incorrect. */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; std::unique_ptr string_buf{}; #if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif friend class json_iterator; friend class document_stream; }; } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public westmere::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(westmere::ondemand::parser &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* end file simdjson/generic/ondemand/parser.h for westmere */ // All other declarations /* including simdjson/generic/ondemand/array.h for westmere: #include "simdjson/generic/ondemand/array.h" */ /* begin file simdjson/generic/ondemand/array.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { /** * A forward-only JSON array. */ class array { public: /** * Create a new invalid array. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline array() noexcept = default; /** * Begin array iteration. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result begin() noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result end() noexcept; /** * This method scans the array and counts the number of elements. * The count_elements method should always be called before you have begun * iterating through the array: it is expected that you are pointing at * the beginning of the array. * The runtime complexity is linear in the size of the array. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an array is empty, it is more performant to use * the is_empty() method. */ simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the beginning of the array and checks whether the * array is empty. * The runtime complexity is constant time. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. */ simdjson_inline simdjson_result is_empty() & noexcept; /** * Reset the iterator so that we are pointing back at the * beginning of the array. You should still consume values only once even if you * can iterate through the array more than once. If you unescape a string * within the array more than once, you have unsafe code. Note that rewinding * an array means that you may need to reparse it anew: it is not a free * operation. * * @returns true if the array contains some elements (not empty) */ inline simdjson_result reset() & noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node * as the root of its own JSON document. * * ondemand::parser parser; * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/0/foo/a/1") == 20 * * Note that at_pointer() called on the document automatically calls the document's rewind * method between each call. It invalidates all previously accessed arrays, objects and values * that have not been consumed. Yet it is not the case when calling at_pointer on an array * instance: there is no rewind and no invalidation. * * You may only call at_pointer on an array after it has been created, but before it has * been first accessed. When calling at_pointer on an array, the pointer is advanced to * the location indicated by the JSON pointer (in case of success). It is no longer possible * to call at_pointer on the same array. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; /** * Consumes the array and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. */ simdjson_inline simdjson_result raw_json() noexcept; /** * Get the value at the given index. This function has linear-time complexity. * This function should only be called once on an array instance since the array iterator is not reset between each call. * * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; protected: /** * Go to the end of the array, no matter where you are right now. */ simdjson_inline error_code consume() noexcept; /** * Begin array iteration. * * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the * resulting array. * @error INCORRECT_TYPE if the iterator is not at [. */ static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; /** * Begin array iteration from the root. * * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the * resulting array. * @error INCORRECT_TYPE if the iterator is not at [. * @error TAPE_ERROR if there is no closing ] at the end of the document. */ static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; /** * Begin array iteration. * * This version of the method should be called after the initial [ has been verified, and is * intended for use by switch statements that check the type of a value. * * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. */ static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; /** * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. * * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* * into the resulting array. */ simdjson_inline array(const value_iterator &iter) noexcept; /** * Iterator marking current position. * * iter.is_alive() == false indicates iteration is complete. */ value_iterator iter{}; friend class value; friend class document; friend struct simdjson_result; friend struct simdjson_result; friend class array_iterator; }; } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public westmere::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(westmere::ondemand::array &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; inline simdjson_result count_elements() & noexcept; inline simdjson_result is_empty() & noexcept; inline simdjson_result reset() & noexcept; simdjson_inline simdjson_result at(size_t index) noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result raw_json() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H /* end file simdjson/generic/ondemand/array.h for westmere */ /* including simdjson/generic/ondemand/array_iterator.h for westmere: #include "simdjson/generic/ondemand/array_iterator.h" */ /* begin file simdjson/generic/ondemand/array_iterator.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { /** * A forward-only JSON array. * * This is an input_iterator, meaning: * - It is forward-only * - * must be called exactly once per element. * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) */ class array_iterator { public: /** Create a new, invalid array iterator. */ simdjson_inline array_iterator() noexcept = default; // // Iterator interface // /** * Get the current element. * * Part of the std::iterator interface. */ simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * * Part of the std::iterator interface. * * @return true if there are no more elements in the JSON array. */ simdjson_inline bool operator==(const array_iterator &) const noexcept; /** * Check if there are more elements in the JSON array. * * Part of the std::iterator interface. * * @return true if there are more elements in the JSON array. */ simdjson_inline bool operator!=(const array_iterator &) const noexcept; /** * Move to the next element. * * Part of the std::iterator interface. */ simdjson_inline array_iterator &operator++() noexcept; private: value_iterator iter{}; simdjson_inline array_iterator(const value_iterator &iter) noexcept; friend class array; friend class value; friend struct simdjson_result; }; } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public westmere::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(westmere::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; // // Iterator interface // simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. simdjson_inline bool operator==(const simdjson_result &) const noexcept; simdjson_inline bool operator!=(const simdjson_result &) const noexcept; simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H /* end file simdjson/generic/ondemand/array_iterator.h for westmere */ /* including simdjson/generic/ondemand/document.h for westmere: #include "simdjson/generic/ondemand/document.h" */ /* begin file simdjson/generic/ondemand/document.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { /** * A JSON document. It holds a json_iterator instance. * * Used by tokens to get text, and string buffer location. * * You must keep the document around during iteration. */ class document { public: /** * Create a new invalid document. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline document() noexcept = default; simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy simdjson_inline document(document &&other) noexcept = default; simdjson_inline document &operator=(const document &other) noexcept = delete; simdjson_inline document &operator=(document &&other) noexcept = default; /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @returns INCORRECT_TYPE If the JSON value is not an array. */ simdjson_inline simdjson_result get_array() & noexcept; /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @returns INCORRECT_TYPE If the JSON value is not an object. */ simdjson_inline simdjson_result get_object() & noexcept; /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64() noexcept; /** * Cast this JSON value (inside string) to an unsigned integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline simdjson_result get_uint64_in_string() noexcept; /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64() noexcept; /** * Cast this JSON value (inside string) to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_inline simdjson_result get_int64_in_string() noexcept; /** * Cast this JSON value to a double. * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double() noexcept; /** * Cast this JSON value (inside string) to a double. * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_inline simdjson_result get_double_in_string() noexcept; /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * Important: Calling get_string() twice on the same document is an error. * * @param Whether to allow a replacement character for unmatched surrogate pairs. * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; /** * Attempts to fill the provided std::string reference with the parsed value of the current string. * * The string is guaranteed to be valid UTF-8. * * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. * We recommend you avoid allocating an std::string unless you need to. * * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a string. * * The string is not guaranteed to be valid UTF-8. See https://simonsapin.github.io/wtf-8/ * * Important: Calling get_wobbly_string() twice on the same document is an error. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_wobbly_string() noexcept; /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @returns INCORRECT_TYPE if the JSON value is not a string. */ simdjson_inline simdjson_result get_raw_json_string() noexcept; /** * Cast this JSON value to a bool. * * @returns A bool value. * @returns INCORRECT_TYPE if the JSON value is not true or false. */ simdjson_inline simdjson_result get_bool() noexcept; /** * Cast this JSON value to a value when the document is an object or an array. * * You must not have begun iterating through the object or array. When * SIMDJSON_DEVELOPMENT_CHECKS is set to 1 (which is the case when building in Debug mode * by default), and you have already begun iterating, * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use * rewind() to reset the document to its initial state before calling this method. * * @returns A value if a JSON array or object cannot be found. * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). */ simdjson_inline simdjson_result get_value() noexcept; /** * Checks if this JSON value is null. If and only if the value is * null, then it is consumed (we advance). If we find a token that * begins with 'n' but is not 'null', then an error is returned. * * @returns Whether the value is null. * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. */ simdjson_inline simdjson_result is_null() noexcept; /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() & noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); } /** @overload template simdjson_result get() & noexcept */ template simdjson_inline simdjson_result get() && noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); } /** * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value * * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template simdjson_inline error_code get(T &out) & noexcept; /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS /** * Cast this JSON value to an array. * * @returns An object that can be used to iterate the array. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. */ simdjson_inline operator array() & noexcept(false); /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. */ simdjson_inline operator object() & noexcept(false); /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. */ simdjson_inline operator uint64_t() noexcept(false); /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. */ simdjson_inline operator int64_t() noexcept(false); /** * Cast this JSON value to a double. * * @returns A double. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. */ simdjson_inline operator double() noexcept(false); /** * Cast this JSON value to a string. * * The string is guaranteed to be valid UTF-8. * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator std::string_view() noexcept(false); /** * Cast this JSON value to a raw_json_string. * * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). * * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ simdjson_inline operator raw_json_string() noexcept(false); /** * Cast this JSON value to a bool. * * @returns A bool value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. */ simdjson_inline operator bool() noexcept(false); /** * Cast this JSON value to a value when the document is an object or an array. * * You must not have begun iterating through the object or array. When * SIMDJSON_DEVELOPMENT_CHECKS is defined, and you have already begun iterating, * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use * rewind() to reset the document to its initial state before calling this method. * * @returns A value value if a JSON array or object cannot be found. * @exception SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). */ simdjson_inline operator value() noexcept(false); #endif /** * This method scans the array and counts the number of elements. * The count_elements method should always be called before you have begun * iterating through the array: it is expected that you are pointing at * the beginning of the array. * The runtime complexity is linear in the size of the array. After * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. */ simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the object and counts the number of key-value pairs. * The count_fields method should always be called before you have begun * iterating through the object: it is expected that you are pointing at * the beginning of the object. * The runtime complexity is linear in the size of the object. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an object is empty, it is more performant to use * the is_empty() method. */ simdjson_inline simdjson_result count_fields() & noexcept; /** * Get the value at the given index in the array. This function has linear-time complexity. * This function should only be called once on an array instance since the array iterator is not reset between each call. * * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) & noexcept; /** * Begin array iteration. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result begin() & noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ simdjson_inline simdjson_result end() & noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. E.g., the array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() * is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field(const char *key) & noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. E.g., the array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to a key * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() * is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](const char *key) & noexcept; /** * Get the type of this JSON value. It does not validate or consume the value. * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result type() noexcept; /** * Checks whether the document is a scalar (string, number, null, Boolean). * Returns false when there it is an array or object. * * @returns true if the type is string, number, null, Boolean * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result is_scalar() noexcept; /** * Checks whether the document is a negative number. * * @returns true if the number if negative. */ simdjson_inline bool is_negative() noexcept; /** * Checks whether the document is an integer number. Note that * this requires to partially parse the number string. If * the value is determined to be an integer, it may still * not parse properly as an integer in subsequent steps * (e.g., it might overflow). * * @returns true if the number if negative. */ simdjson_inline simdjson_result is_integer() noexcept; /** * Determine the number type (integer or floating-point number) as quickly * as possible. This function does not fully validate the input. It is * useful when you only need to classify the numbers, without parsing them. * * If you are planning to retrieve the value or you need full validation, * consider using the get_number() method instead: it will fully parse * and validate the input, and give you access to the type: * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have * an integer greater or equal to 9223372036854775808 * get_number_type() is number_type::signed_integer if we have an * integer that is less than 9223372036854775808 * Otherwise, get_number_type() has value number_type::floating_point_number * * This function requires processing the number string, but it is expected * to be faster than get_number().get_number_type() because it is does not * parse the number value. * * @returns the type of the number */ simdjson_inline simdjson_result get_number_type() noexcept; /** * Attempt to parse an ondemand::number. An ondemand::number may * contain an integer value or a floating-point value, the simdjson * library will autodetect the type. Thus it is a dynamically typed * number. Before accessing the value, you must determine the detected * type. * * number.get_number_type() is number_type::signed_integer if we have * an integer in [-9223372036854775808,9223372036854775808) * You can recover the value by calling number.get_int64() and you * have that number.is_int64() is true. * * number.get_number_type() is number_type::unsigned_integer if we have * an integer in [9223372036854775808,18446744073709551616) * You can recover the value by calling number.get_uint64() and you * have that number.is_uint64() is true. * * Otherwise, number.get_number_type() has value number_type::floating_point_number * and we have a binary64 number. * You can recover the value by calling number.get_double() and you * have that number.is_double() is true. * * You must check the type before accessing the value: it is an error * to call "get_int64()" when number.get_number_type() is not * number_type::signed_integer and when number.is_int64() is false. */ simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; /** * Get the raw JSON for this token. * * The string_view will always point into the input buffer. * * The string_view will start at the beginning of the token, and include the entire token * *as well as all spaces until the next token (or EOF).* This means, for example, that a * string token always begins with a " and is always terminated by the final ", possibly * followed by a number of spaces. * * The string_view is *not* null-terminated. If this is a scalar (string, number, * boolean, or null), the character after the end of the string_view may be the padded buffer. * * Tokens include: * - { * - [ * - "a string (possibly with UTF-8 or backslashed characters like \\\")". * - -1.2e-100 * - true * - false * - null */ simdjson_inline simdjson_result raw_json_token() noexcept; /** * Reset the iterator inside the document instance so we are pointing back at the * beginning of the document, as if it had just been created. It invalidates all * values, objects and arrays that you have created so far (including unescaped strings). */ inline void rewind() noexcept; /** * Returns debugging information. */ inline std::string to_debug_string() noexcept; /** * Some unrecoverable error conditions may render the document instance unusable. * The is_alive() method returns true when the document is still suitable. */ inline bool is_alive() noexcept; /** * Returns the current location in the document if in bounds. */ inline simdjson_result current_location() const noexcept; /** * Returns true if this document has been fully parsed. * If you have consumed the whole document and at_end() returns * false, then there may be trailing content. */ inline bool at_end() const noexcept; /** * Returns the current depth in the document if in bounds. * * E.g., * 0 = finished with document * 1 = document root value (could be [ or {, not yet known) * 2 = , or } inside root array/object * 3 = key or value inside root array/object. */ simdjson_inline int32_t current_depth() const noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard. * * ondemand::parser parser; * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/foo/a/1") == 20 * * It is allowed for a key to be the empty string: * * ondemand::parser parser; * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * * Note that at_pointer() automatically calls rewind between each call. Thus * all values, objects and arrays that you have created so far (including unescaped strings) * are invalidated. After calling at_pointer, you need to consume the result: string values * should be stored in your own variables, arrays should be decoded and stored in your own array-like * structures and so forth. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; /** * Consumes the document and returns a string_view instance corresponding to the * document as represented in JSON. It points inside the original byte array containing * the JSON document. */ simdjson_inline simdjson_result raw_json() noexcept; protected: /** * Consumes the document. */ simdjson_inline error_code consume() noexcept; simdjson_inline document(ondemand::json_iterator &&iter) noexcept; simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; simdjson_inline value_iterator resume_value_iterator() noexcept; simdjson_inline value_iterator get_root_value_iterator() noexcept; simdjson_inline simdjson_result start_or_resume_object() noexcept; static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; // // Fields // json_iterator iter{}; ///< Current position in the document static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 friend class array_iterator; friend class value; friend class ondemand::parser; friend class object; friend class array; friend class field; friend class token; friend class document_stream; friend class document_reference; }; /** * A document_reference is a thin wrapper around a document reference instance. */ class document_reference { public: simdjson_inline document_reference() noexcept; simdjson_inline document_reference(document &d) noexcept; simdjson_inline document_reference(const document_reference &other) noexcept = default; simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; simdjson_inline void rewind() noexcept; simdjson_inline simdjson_result get_array() & noexcept; simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; simdjson_inline simdjson_result raw_json() noexcept; simdjson_inline operator document&() const noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator array() & noexcept(false); simdjson_inline operator object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); simdjson_inline operator value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) & noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; simdjson_inline bool is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; private: document *doc{nullptr}; }; } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public westmere::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(westmere::ondemand::document &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; simdjson_inline simdjson_result get_array() & noexcept; simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & noexcept; template simdjson_inline simdjson_result get() && noexcept; template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator westmere::ondemand::array() & noexcept(false); simdjson_inline operator westmere::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator westmere::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); simdjson_inline operator westmere::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) & noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; simdjson_inline bool at_end() const noexcept; simdjson_inline bool is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public westmere::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(westmere::ondemand::document_reference value, error_code error) noexcept; simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; simdjson_inline simdjson_result get_array() & noexcept; simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; simdjson_inline simdjson_result get_int64_in_string() noexcept; simdjson_inline simdjson_result get_double() noexcept; simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; #if SIMDJSON_EXCEPTIONS simdjson_inline operator westmere::ondemand::array() & noexcept(false); simdjson_inline operator westmere::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); simdjson_inline operator westmere::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); simdjson_inline operator westmere::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; simdjson_inline simdjson_result at(size_t index) & noexcept; simdjson_inline simdjson_result begin() & noexcept; simdjson_inline simdjson_result end() & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; simdjson_inline simdjson_result get_number_type() noexcept; simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H /* end file simdjson/generic/ondemand/document.h for westmere */ /* including simdjson/generic/ondemand/document_stream.h for westmere: #include "simdjson/generic/ondemand/document_stream.h" */ /* begin file simdjson/generic/ondemand/document_stream.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #ifdef SIMDJSON_THREADS_ENABLED #include #include #include #endif namespace simdjson { namespace westmere { namespace ondemand { #ifdef SIMDJSON_THREADS_ENABLED /** @private Custom worker class **/ struct stage1_worker { stage1_worker() noexcept = default; stage1_worker(const stage1_worker&) = delete; stage1_worker(stage1_worker&&) = delete; stage1_worker operator=(const stage1_worker&) = delete; ~stage1_worker(); /** * We only start the thread when it is needed, not at object construction, this may throw. * You should only call this once. **/ void start_thread(); /** * Start a stage 1 job. You should first call 'run', then 'finish'. * You must call start_thread once before. */ void run(document_stream * ds, parser * stage1, size_t next_batch_start); /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ void finish(); private: /** * Normally, we would never stop the thread. But we do in the destructor. * This function is only safe assuming that you are not waiting for results. You * should have called run, then finish, and be done. **/ void stop_thread(); std::thread thread{}; /** These three variables define the work done by the thread. **/ ondemand::parser * stage1_thread_parser{}; size_t _next_batch_start{}; document_stream * owner{}; /** * We have two state variables. This could be streamlined to one variable in the future but * we use two for clarity. */ bool has_work{false}; bool can_work{true}; /** * We lock using a mutex. */ std::mutex locking_mutex{}; std::condition_variable cond_var{}; friend class document_stream; }; #endif // SIMDJSON_THREADS_ENABLED /** * A forward-only stream of documents. * * Produced by parser::iterate_many. * */ class document_stream { public: /** * Construct an uninitialized document_stream. * * ```c++ * document_stream docs; * auto error = parser.iterate_many(json).get(docs); * ``` */ simdjson_inline document_stream() noexcept; /** Move one document_stream to another. */ simdjson_inline document_stream(document_stream &&other) noexcept = default; /** Move one document_stream to another. */ simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; simdjson_inline ~document_stream() noexcept; /** * Returns the input size in bytes. */ inline size_t size_in_bytes() const noexcept; /** * After iterating through the stream, this method * returns the number of bytes that were not parsed at the end * of the stream. If truncated_bytes() differs from zero, * then the input was truncated maybe because incomplete JSON * documents were found at the end of the stream. You * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). * * You should only call truncated_bytes() after streaming through all * documents, like so: * * document_stream stream = parser.iterate_many(json,window); * for(auto & doc : stream) { * // do something with doc * } * size_t truncated = stream.truncated_bytes(); * */ inline size_t truncated_bytes() const noexcept; class iterator { public: using value_type = simdjson_result; using reference = value_type; using difference_type = std::ptrdiff_t; using iterator_category = std::input_iterator_tag; /** * Default constructor. */ simdjson_inline iterator() noexcept; /** * Get the current document (or error). */ simdjson_inline simdjson_result operator*() noexcept; /** * Advance to the next document (prefix). */ inline iterator& operator++() noexcept; /** * Check if we're at the end yet. * @param other the end iterator to compare to. */ simdjson_inline bool operator!=(const iterator &other) const noexcept; /** * @private * * Gives the current index in the input document in bytes. * * document_stream stream = parser.parse_many(json,window); * for(auto i = stream.begin(); i != stream.end(); ++i) { * auto doc = *i; * size_t index = i.current_index(); * } * * This function (current_index()) is experimental and the usage * may change in future versions of simdjson: we find the API somewhat * awkward and we would like to offer something friendlier. */ simdjson_inline size_t current_index() const noexcept; /** * @private * * Gives a view of the current document at the current position. * * document_stream stream = parser.iterate_many(json,window); * for(auto i = stream.begin(); i != stream.end(); ++i) { * std::string_view v = i.source(); * } * * The returned string_view instance is simply a map to the (unparsed) * source string: it may thus include white-space characters and all manner * of padding. * * This function (source()) is experimental and the usage * may change in future versions of simdjson: we find the API somewhat * awkward and we would like to offer something friendlier. * */ simdjson_inline std::string_view source() const noexcept; /** * Returns error of the stream (if any). */ inline error_code error() const noexcept; private: simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ document_stream* stream; /** Whether we're finished or not. */ bool finished; friend class document; friend class document_stream; friend class json_iterator; }; /** * Start iterating the documents in the stream. */ simdjson_inline iterator begin() noexcept; /** * The end of the stream, for iterator comparison purposes. */ simdjson_inline iterator end() noexcept; private: document_stream &operator=(const document_stream &) = delete; // Disallow copying document_stream(const document_stream &other) = delete; // Disallow copying /** * Construct a document_stream. Does not allocate or parse anything until the iterator is * used. * * @param parser is a reference to the parser instance used to generate this document_stream * @param buf is the raw byte buffer we need to process * @param len is the length of the raw byte buffer in bytes * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) */ simdjson_inline document_stream( ondemand::parser &parser, const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated ) noexcept; /** * Parse the first document in the buffer. Used by begin(), to handle allocation and * initialization. */ inline void start() noexcept; /** * Parse the next document found in the buffer previously given to document_stream. * * The content should be a valid JSON document encoded as UTF-8. If there is a * UTF-8 BOM, the parser skips it. * * You do NOT need to pre-allocate a parser. This function takes care of * pre-allocating a capacity defined by the batch_size defined when creating the * document_stream object. * * The function returns simdjson::EMPTY if there is no more data to be parsed. * * The function returns simdjson::SUCCESS (as integer = 0) in case of success * and indicates that the buffer has successfully been parsed to the end. * Every document it contained has been parsed without error. * * The function returns an error code from simdjson/simdjson.h in case of failure * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; * the simdjson::error_message function converts these error codes into a string). * * You can also check validity by calling parser.is_valid(). The same parser can * and should be reused for the other documents in the buffer. */ inline void next() noexcept; /** Move the json_iterator of the document to the location of the next document in the stream. */ inline void next_document() noexcept; /** Get the next document index. */ inline size_t next_batch_start() const noexcept; /** Pass the next batch through stage 1 with the given parser. */ inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; // Fields ondemand::parser *parser; const uint8_t *buf; size_t len; size_t batch_size; bool allow_comma_separated; /** * We are going to use just one document instance. The document owns * the json_iterator. It implies that we only ever pass a reference * to the document to the users. */ document doc{}; /** The error (or lack thereof) from the current document. */ error_code error; size_t batch_start{0}; size_t doc_index{}; #ifdef SIMDJSON_THREADS_ENABLED /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ bool use_thread; inline void load_from_stage1_thread() noexcept; /** Start a thread to run stage 1 on the next batch. */ inline void start_stage1_thread() noexcept; /** Wait for the stage 1 thread to finish and capture the results. */ inline void finish_stage1_thread() noexcept; /** The error returned from the stage 1 thread. */ error_code stage1_thread_error{UNINITIALIZED}; /** The thread used to run stage 1 against the next batch in the background. */ std::unique_ptr worker{new(std::nothrow) stage1_worker()}; /** * The parser used to run stage 1 in the background. Will be swapped * with the regular parser when finished. */ ondemand::parser stage1_thread_parser{}; friend struct stage1_worker; #endif // SIMDJSON_THREADS_ENABLED friend class parser; friend class document; friend class json_iterator; friend struct simdjson_result; friend struct internal::simdjson_result_base; }; // document_stream } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public westmere::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(westmere::ondemand::document_stream &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H /* end file simdjson/generic/ondemand/document_stream.h for westmere */ /* including simdjson/generic/ondemand/field.h for westmere: #include "simdjson/generic/ondemand/field.h" */ /* begin file simdjson/generic/ondemand/field.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { /** * A JSON field (key/value pair) in an object. * * Returned from object iteration. * * Extends from std::pair so you can use C++ algorithms that rely on pairs. */ class field : public std::pair { public: /** * Create a new invalid field. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline field() noexcept; /** * Get the key as a string_view (for higher speed, consider raw_key). * We deliberately use a more cumbersome name (unescaped_key) to force users * to think twice about using it. * * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with * an unescaped C string: e.g., key() == "test". */ simdjson_inline raw_json_string key() const noexcept; /** * Get the field value. */ simdjson_inline ondemand::value &value() & noexcept; /** * @overload ondemand::value &ondemand::value() & noexcept */ simdjson_inline ondemand::value value() && noexcept; protected: simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; friend struct simdjson_result; friend class object_iterator; }; } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public westmere::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(westmere::ondemand::field &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; simdjson_inline simdjson_result key() noexcept; simdjson_inline simdjson_result value() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H /* end file simdjson/generic/ondemand/field.h for westmere */ /* including simdjson/generic/ondemand/object.h for westmere: #include "simdjson/generic/ondemand/object.h" */ /* begin file simdjson/generic/ondemand/object.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { /** * A forward-only JSON object field iterator. */ class object { public: /** * Create a new invalid object. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline object() noexcept = default; simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; /** * Look up a field by name on an object (order-sensitive). * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * * ```c++ * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); * double y = obj.find_field("y"); * double x = obj.find_field("x"); * ``` * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. The value instance you get * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to a * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() * is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; /** * Look up a field by name on an object, without regard to key order. * * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies * and often appears negligible. It starts out normally, starting out at the last field; but if * the field is not found, it scans from the beginning of the object to see if it missed it. That * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * * It is the default, however, because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the * field wasn't there when they aren't). * * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. The value instance you get * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an * OUT_OF_ORDER_ITERATION error is generated. * * You are expected to access keys only once. You should access the value corresponding to a key * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node * as the root of its own JSON document. * * ondemand::parser parser; * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("/foo/a/1") == 20 * * It is allowed for a key to be the empty string: * * ondemand::parser parser; * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * * Note that at_pointer() called on the document automatically calls the document's rewind * method between each call. It invalidates all previously accessed arrays, objects and values * that have not been consumed. Yet it is not the case when calling at_pointer on an object * instance: there is no rewind and no invalidation. * * You may call at_pointer more than once on an object, but each time the pointer is advanced * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding * key (as well as the current key) can no longer be used with following JSON pointer calls. * * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. * * @return The value associated with the given JSON pointer, or: * - NO_SUCH_FIELD if a field does not exist in an object * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you * can iterate through the object more than once. If you unescape a string within * the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) */ inline simdjson_result reset() & noexcept; /** * This method scans the beginning of the object and checks whether the * object is empty. * The runtime complexity is constant time. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. */ inline simdjson_result is_empty() & noexcept; /** * This method scans the object and counts the number of key-value pairs. * The count_fields method should always be called before you have begun * iterating through the object: it is expected that you are pointing at * the beginning of the object. * The runtime complexity is linear in the size of the object. After * calling this function, if successful, the object is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. * * To check that an object is empty, it is more performant to use * the is_empty() method. * * Performance hint: You should only call count_fields() as a last * resort as it may require scanning the document twice or more. */ simdjson_inline simdjson_result count_fields() & noexcept; /** * Consumes the object and returns a string_view instance corresponding to the * object as represented in JSON. It points inside the original byte array containing * the JSON document. */ simdjson_inline simdjson_result raw_json() noexcept; protected: /** * Go to the end of the object, no matter where you are right now. */ simdjson_inline error_code consume() noexcept; static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; static simdjson_inline object resume(const value_iterator &iter) noexcept; simdjson_inline object(const value_iterator &iter) noexcept; simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; value_iterator iter{}; friend class value; friend class document; friend struct simdjson_result; }; } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public westmere::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(westmere::ondemand::object &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; inline simdjson_result raw_json() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H /* end file simdjson/generic/ondemand/object.h for westmere */ /* including simdjson/generic/ondemand/object_iterator.h for westmere: #include "simdjson/generic/ondemand/object_iterator.h" */ /* begin file simdjson/generic/ondemand/object_iterator.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { class object_iterator { public: /** * Create a new invalid object_iterator. * * Exists so you can declare a variable and later assign to it before use. */ simdjson_inline object_iterator() noexcept = default; // // Iterator interface // // Reads key and value, yielding them to the user. // MUST ONLY BE CALLED ONCE PER ITERATION. simdjson_inline simdjson_result operator*() noexcept; // Assumes it's being compared with the end. true if depth < iter->depth. simdjson_inline bool operator==(const object_iterator &) const noexcept; // Assumes it's being compared with the end. true if depth >= iter->depth. simdjson_inline bool operator!=(const object_iterator &) const noexcept; // Checks for ']' and ',' simdjson_inline object_iterator &operator++() noexcept; private: /** * The underlying JSON iterator. * * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object * is first used, and never changes afterwards. */ value_iterator iter{}; simdjson_inline object_iterator(const value_iterator &iter) noexcept; friend struct simdjson_result; friend class object; }; } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { template<> struct simdjson_result : public westmere::implementation_simdjson_result_base { public: simdjson_inline simdjson_result(westmere::ondemand::object_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; // // Iterator interface // // Reads key and value, yielding them to the user. simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. // Assumes it's being compared with the end. true if depth < iter->depth. simdjson_inline bool operator==(const simdjson_result &) const noexcept; // Assumes it's being compared with the end. true if depth >= iter->depth. simdjson_inline bool operator!=(const simdjson_result &) const noexcept; // Checks for ']' and ',' simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H /* end file simdjson/generic/ondemand/object_iterator.h for westmere */ /* including simdjson/generic/ondemand/serialization.h for westmere: #include "simdjson/generic/ondemand/serialization.h" */ /* begin file simdjson/generic/ondemand/serialization.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { /** * Create a string-view instance out of a document instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ inline simdjson_result to_json_string(westmere::ondemand::document& x) noexcept; /** * Create a string-view instance out of a value instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. The value must * not have been accessed previously. It does not * validate the content. */ inline simdjson_result to_json_string(westmere::ondemand::value& x) noexcept; /** * Create a string-view instance out of an object instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ inline simdjson_result to_json_string(westmere::ondemand::object& x) noexcept; /** * Create a string-view instance out of an array instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ inline simdjson_result to_json_string(westmere::ondemand::array& x) noexcept; inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); } // namespace simdjson /** * We want to support argument-dependent lookup (ADL). * Hence we should define operator<< in the namespace * where the argument (here value, object, etc.) resides. * Credit: @madhur4127 * See https://github.com/simdjson/simdjson/issues/1768 */ namespace simdjson { namespace westmere { namespace ondemand { /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The element. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::value x); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The array. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::array value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The array. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document& value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document_reference& value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif /** * Print JSON to an output stream. It does not * validate the content. * * @param out The output stream. * @param value The object. * @throw if there is an error with the underlying output stream. simdjson itself will not throw. */ inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::object value); #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif }}} // namespace simdjson::westmere::ondemand #endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H /* end file simdjson/generic/ondemand/serialization.h for westmere */ // Inline definitions /* including simdjson/generic/ondemand/array-inl.h for westmere: #include "simdjson/generic/ondemand/array-inl.h" */ /* begin file simdjson/generic/ondemand/array-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { // // ### Live States // // While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is // always SUCCESS: // // - Start: This is the state when the array is first found and the iterator is just past the `{`. // In this state, at_start == true. // - Next: After we hand a scalar value to the user, or an array/object which they then fully // iterate over, the iterator is at the `,` before the next value (or `]`). In this state, // depth == iter->depth, at_start == false, and error == SUCCESS. // - Unfinished Business: When we hand an array/object to the user which they do not fully // iterate over, we need to finish that iteration by skipping child values until we reach the // Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. // // ## Error States // // In error states, we will yield exactly one more value before stopping. iter->depth == depth // and at_start is always false. We decrement after yielding the error, moving to the Finished // state. // // - Chained Error: When the array iterator is part of an error chain--for example, in // `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an // array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and // iter->depth == depth, and at_start == false. We decrement depth when we yield the error. // - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, // we flag that as an error and treat it exactly the same as a Chained Error. In this state, // error == TAPE_ERROR, iter->depth == depth, and at_start == false. // // ## Terminal State // // The terminal state has iter->depth < depth. at_start is always false. // // - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this // by decrementing depth. In this state, iter->depth < depth, at_start == false, and // error == SUCCESS. // simdjson_inline array::array(const value_iterator &_iter) noexcept : iter{_iter} { } simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { // We don't need to know if the array is empty to start iteration, but we do want to know if there // is an error--thus `simdjson_unused`. simdjson_unused bool has_value; SIMDJSON_TRY( iter.start_array().get(has_value) ); return array(iter); } simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { simdjson_unused bool has_value; SIMDJSON_TRY( iter.start_root_array().get(has_value) ); return array(iter); } simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { bool has_value; SIMDJSON_TRY(iter.started_array().get(has_value)); return array(iter); } simdjson_inline simdjson_result array::begin() noexcept { #if SIMDJSON_DEVELOPMENT_CHECKS if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif return array_iterator(iter); } simdjson_inline simdjson_result array::end() noexcept { return array_iterator(iter); } simdjson_inline error_code array::consume() noexcept { auto error = iter.json_iter().skip_child(iter.depth()-1); if(error) { iter.abandon(); } return error; } simdjson_inline simdjson_result array::raw_json() noexcept { const uint8_t * starting_point{iter.peek_start()}; auto error = consume(); if(error) { return error; } // After 'consume()', we could be left pointing just beyond the document, but that // is ok because we are not going to dereference the final pointer position, we just // use it to compute the length in bytes. const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_inline simdjson_result array::count_elements() & noexcept { size_t count{0}; // Important: we do not consume any of the values. for(simdjson_unused auto v : *this) { count++; } // The above loop will always succeed, but we want to report errors. if(iter.error()) { return iter.error(); } // We need to move back at the start because we expect users to iterate through // the array after counting the number of elements. iter.reset_array(); return count; } SIMDJSON_POP_DISABLE_WARNINGS simdjson_inline simdjson_result array::is_empty() & noexcept { bool is_not_empty; auto error = iter.reset_array().get(is_not_empty); if(error) { return error; } return !is_not_empty; } inline simdjson_result array::reset() & noexcept { return iter.reset_array(); } inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } json_pointer = json_pointer.substr(1); // - means "the append position" or "the element after the end of the array" // We don't support this, because we're returning a real element, not a position. if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } // Read the array index size_t array_index = 0; size_t i; for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { uint8_t digit = uint8_t(json_pointer[i] - '0'); // Check for non-digit in array index. If it's there, we're trying to get a field in an object if (digit > 9) { return INCORRECT_TYPE; } array_index = array_index*10 + digit; } // 0 followed by other digits is invalid if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" // Empty string is invalid; so is a "/" with no digits before it if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" // Get the child auto child = at(array_index); // If there is an error, it ends here if(child.error()) { return child; } // If there is a /, we're not done yet, call recursively. if (i < json_pointer.length()) { child = child.at_pointer(json_pointer.substr(i)); } return child; } simdjson_inline simdjson_result array::at(size_t index) noexcept { size_t i = 0; for (auto value : *this) { if (i == index) { return value; } i++; } return INDEX_OUT_OF_BOUNDS; } } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( westmere::ondemand::array &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result simdjson_result::begin() noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() noexcept { if (error()) { return error(); } return first.end(); } simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { if (error()) { return error(); } return first.is_empty(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H /* end file simdjson/generic/ondemand/array-inl.h for westmere */ /* including simdjson/generic/ondemand/array_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/array_iterator-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept : iter{_iter} {} simdjson_inline simdjson_result array_iterator::operator*() noexcept { if (iter.error()) { iter.abandon(); return iter.error(); } return value(iter.child()); } simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { return !(*this != other); } simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { return iter.is_open(); } simdjson_inline array_iterator &array_iterator::operator++() noexcept { error_code error; // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. if (( error = iter.error() )) { return *this; } if (( error = iter.skip_child() )) { return *this; } if (( error = iter.has_next_element().error() )) { return *this; } return *this; } } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( westmere::ondemand::array_iterator &&value ) noexcept : westmere::implementation_simdjson_result_base(std::forward(value)) { first.iter.assert_is_valid(); } simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : westmere::implementation_simdjson_result_base({}, error) { } simdjson_inline simdjson_result simdjson_result::operator*() noexcept { if (error()) { return error(); } return *first; } simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return !error(); } return first == other.first; } simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return error(); } return first != other.first; } simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { // Clear the error if there is one, so we don't yield it twice if (error()) { second = SUCCESS; return *this; } ++(first); return *this; } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H /* end file simdjson/generic/ondemand/array_iterator-inl.h for westmere */ /* including simdjson/generic/ondemand/document-inl.h for westmere: #include "simdjson/generic/ondemand/document-inl.h" */ /* begin file simdjson/generic/ondemand/document-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept : iter{std::forward(_iter)} { logger::log_start_value(iter, "document"); } simdjson_inline document document::start(json_iterator &&iter) noexcept { return document(std::forward(iter)); } inline void document::rewind() noexcept { iter.rewind(); } inline std::string document::to_debug_string() noexcept { return iter.to_string(); } inline simdjson_result document::current_location() const noexcept { return iter.current_location(); } inline int32_t document::current_depth() const noexcept { return iter.depth(); } inline bool document::at_end() const noexcept { return iter.at_end(); } inline bool document::is_alive() noexcept { return iter.is_alive(); } simdjson_inline value_iterator document::resume_value_iterator() noexcept { return value_iterator(&iter, 1, iter.root_position()); } simdjson_inline value_iterator document::get_root_value_iterator() noexcept { return resume_value_iterator(); } simdjson_inline simdjson_result document::start_or_resume_object() noexcept { if (iter.at_root()) { return get_object(); } else { return object::resume(resume_value_iterator()); } } simdjson_inline simdjson_result document::get_value() noexcept { // Make sure we start any arrays or objects before returning, so that start_root_() // gets called. // It is the convention throughout the code that the macro `SIMDJSON_DEVELOPMENT_CHECKS` determines whether // we check for OUT_OF_ORDER_ITERATION. Proper on::demand code should never trigger this error. #if SIMDJSON_DEVELOPMENT_CHECKS if (!iter.at_root()) { return OUT_OF_ORDER_ITERATION; } #endif // assert_at_root() serves two purposes: in Debug mode, whether or not // SIMDJSON_DEVELOPMENT_CHECKS is set or not, it checks that we are at the root of // the document (this will typically be redundant). In release mode, it generates // SIMDJSON_ASSUME statements to allow the compiler to make assumptions. iter.assert_at_root(); switch (*iter.peek()) { case '[': { // The following lines check that the document ends with ]. auto value_iterator = get_root_value_iterator(); auto error = value_iterator.check_root_array(); if(error) { return error; } return value(get_root_value_iterator()); } case '{': { // The following lines would check that the document ends with }. auto value_iterator = get_root_value_iterator(); auto error = value_iterator.check_root_object(); if(error) { return error; } return value(get_root_value_iterator()); } default: // Unfortunately, scalar documents are a special case in simdjson and they cannot // be safely converted to value instances. return SCALAR_DOCUMENT_AS_VALUE; } } simdjson_inline simdjson_result document::get_array() & noexcept { auto value = get_root_value_iterator(); return array::start_root(value); } simdjson_inline simdjson_result document::get_object() & noexcept { auto value = get_root_value_iterator(); return object::start_root(value); } /** * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should * give an error, so we check for trailing content. We want to disallow trailing * content. * Thus, in several implementations below, we pass a 'true' parameter value to * a get_root_value_iterator() method: this indicates that we disallow trailing content. */ simdjson_inline simdjson_result document::get_uint64() noexcept { return get_root_value_iterator().get_root_uint64(true); } simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { return get_root_value_iterator().get_root_uint64_in_string(true); } simdjson_inline simdjson_result document::get_int64() noexcept { return get_root_value_iterator().get_root_int64(true); } simdjson_inline simdjson_result document::get_int64_in_string() noexcept { return get_root_value_iterator().get_root_int64_in_string(true); } simdjson_inline simdjson_result document::get_double() noexcept { return get_root_value_iterator().get_root_double(true); } simdjson_inline simdjson_result document::get_double_in_string() noexcept { return get_root_value_iterator().get_root_double_in_string(true); } simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { return get_root_value_iterator().get_root_string(true, allow_replacement); } template simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); } simdjson_inline simdjson_result document::get_wobbly_string() noexcept { return get_root_value_iterator().get_root_wobbly_string(true); } simdjson_inline simdjson_result document::get_raw_json_string() noexcept { return get_root_value_iterator().get_root_raw_json_string(true); } simdjson_inline simdjson_result document::get_bool() noexcept { return get_root_value_iterator().get_root_bool(true); } simdjson_inline simdjson_result document::is_null() noexcept { return get_root_value_iterator().is_root_null(true); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } template simdjson_inline error_code document::get(T &out) & noexcept { return get().get(out); } template simdjson_inline error_code document::get(T &out) && noexcept { return std::forward(*this).get().get(out); } #if SIMDJSON_EXCEPTIONS simdjson_inline document::operator array() & noexcept(false) { return get_array(); } simdjson_inline document::operator object() & noexcept(false) { return get_object(); } simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document::operator double() noexcept(false) { return get_double(); } simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document::operator value() noexcept(false) { return get_value(); } #endif simdjson_inline simdjson_result document::count_elements() & noexcept { auto a = get_array(); simdjson_result answer = a.count_elements(); /* If there was an array, we are now left pointing at its first element. */ if(answer.error() == SUCCESS) { rewind(); } return answer; } simdjson_inline simdjson_result document::count_fields() & noexcept { auto a = get_object(); simdjson_result answer = a.count_fields(); /* If there was an object, we are now left pointing at its first element. */ if(answer.error() == SUCCESS) { rewind(); } return answer; } simdjson_inline simdjson_result document::at(size_t index) & noexcept { auto a = get_array(); return a.at(index); } simdjson_inline simdjson_result document::begin() & noexcept { return get_array().begin(); } simdjson_inline simdjson_result document::end() & noexcept { return {}; } simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { return start_or_resume_object()[key]; } simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { return start_or_resume_object()[key]; } simdjson_inline error_code document::consume() noexcept { auto error = iter.skip_child(0); if(error) { iter.abandon(); } return error; } simdjson_inline simdjson_result document::raw_json() noexcept { auto _iter = get_root_value_iterator(); const uint8_t * starting_point{_iter.peek_start()}; auto error = consume(); if(error) { return error; } // After 'consume()', we could be left pointing just beyond the document, but that // is ok because we are not going to dereference the final pointer position, we just // use it to compute the length in bytes. const uint8_t * final_point{iter.unsafe_pointer()}; return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } simdjson_inline simdjson_result document::type() noexcept { return get_root_value_iterator().type(); } simdjson_inline simdjson_result document::is_scalar() noexcept { json_type this_type; auto error = type().get(this_type); if(error) { return error; } return ! ((this_type == json_type::array) || (this_type == json_type::object)); } simdjson_inline bool document::is_negative() noexcept { return get_root_value_iterator().is_root_negative(); } simdjson_inline simdjson_result document::is_integer() noexcept { return get_root_value_iterator().is_root_integer(true); } simdjson_inline simdjson_result document::get_number_type() noexcept { return get_root_value_iterator().get_root_number_type(true); } simdjson_inline simdjson_result document::get_number() noexcept { return get_root_value_iterator().get_root_number(true); } simdjson_inline simdjson_result document::raw_json_token() noexcept { auto _iter = get_root_value_iterator(); return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); } simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { rewind(); // Rewind the document each time at_pointer is called if (json_pointer.empty()) { return this->get_value(); } json_type t; SIMDJSON_TRY(type().get(t)); switch (t) { case json_type::array: return (*this).get_array().at_pointer(json_pointer); case json_type::object: return (*this).get_object().at_pointer(json_pointer); default: return INVALID_JSON_POINTER; } } } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( westmere::ondemand::document &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base( error ) { } simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } template simdjson_inline simdjson_result simdjson_result::get() & noexcept { if (error()) { return error(); } return first.get(); } template simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first).get(); } template simdjson_inline error_code simdjson_result::get(T &out) & noexcept { if (error()) { return error(); } return first.get(out); } template simdjson_inline error_code simdjson_result::get(T &out) && noexcept { if (error()) { return error(); } return std::forward(first).get(out); } template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first); } template<> simdjson_inline error_code simdjson_result::get(westmere::ondemand::document &out) & noexcept = delete; template<> simdjson_inline error_code simdjson_result::get(westmere::ondemand::document &out) && noexcept { if (error()) { return error(); } out = std::forward(first); return SUCCESS; } simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } simdjson_inline bool simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS simdjson_inline simdjson_result::operator westmere::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator westmere::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator westmere::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator westmere::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } simdjson_inline bool simdjson_result::at_end() const noexcept { if (error()) { return error(); } return first.at_end(); } simdjson_inline int32_t simdjson_result::current_depth() const noexcept { if (error()) { return error(); } return first.current_depth(); } simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } } // namespace simdjson namespace simdjson { namespace westmere { namespace ondemand { simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } /** * The document_reference instances are used primarily/solely for streams of JSON * documents. * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should * give an error, so we check for trailing content. * * However, for streams of JSON documents, we want to be able to start from * "321" "321" "321" * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() * successfully each time. * * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: * this indicates that we allow trailing content. */ simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } template simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } #if SIMDJSON_EXCEPTIONS simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } #endif simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} simdjson_inline document_reference::operator document&() const noexcept { return *doc; } } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::document_reference value, error_code error) noexcept : implementation_simdjson_result_base(std::forward(value), error) {} simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS simdjson_inline simdjson_result::operator westmere::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator westmere::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator westmere::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator westmere::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H /* end file simdjson/generic/ondemand/document-inl.h for westmere */ /* including simdjson/generic/ondemand/document_stream-inl.h for westmere: #include "simdjson/generic/ondemand/document_stream-inl.h" */ /* begin file simdjson/generic/ondemand/document_stream-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #include namespace simdjson { namespace westmere { namespace ondemand { #ifdef SIMDJSON_THREADS_ENABLED inline void stage1_worker::finish() { // After calling "run" someone would call finish() to wait // for the end of the processing. // This function will wait until either the thread has done // the processing or, else, the destructor has been called. std::unique_lock lock(locking_mutex); cond_var.wait(lock, [this]{return has_work == false;}); } inline stage1_worker::~stage1_worker() { // The thread may never outlive the stage1_worker instance // and will always be stopped/joined before the stage1_worker // instance is gone. stop_thread(); } inline void stage1_worker::start_thread() { std::unique_lock lock(locking_mutex); if(thread.joinable()) { return; // This should never happen but we never want to create more than one thread. } thread = std::thread([this]{ while(true) { std::unique_lock thread_lock(locking_mutex); // We wait for either "run" or "stop_thread" to be called. cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); // If, for some reason, the stop_thread() method was called (i.e., the // destructor of stage1_worker is called, then we want to immediately destroy // the thread (and not do any more processing). if(!can_work) { break; } this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, this->_next_batch_start); this->has_work = false; // The condition variable call should be moved after thread_lock.unlock() for performance // reasons but thread sanitizers may report it as a data race if we do. // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock cond_var.notify_one(); // will notify "finish" thread_lock.unlock(); } } ); } inline void stage1_worker::stop_thread() { std::unique_lock lock(locking_mutex); // We have to make sure that all locks can be released. can_work = false; has_work = false; cond_var.notify_all(); lock.unlock(); if(thread.joinable()) { thread.join(); } } inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { std::unique_lock lock(locking_mutex); owner = ds; _next_batch_start = next_batch_start; stage1_thread_parser = stage1; has_work = true; // The condition variable call should be moved after thread_lock.unlock() for performance // reasons but thread sanitizers may report it as a data race if we do. // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock cond_var.notify_one(); // will notify the thread lock that we have work lock.unlock(); } #endif // SIMDJSON_THREADS_ENABLED simdjson_inline document_stream::document_stream( ondemand::parser &_parser, const uint8_t *_buf, size_t _len, size_t _batch_size, bool _allow_comma_separated ) noexcept : parser{&_parser}, buf{_buf}, len{_len}, batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, allow_comma_separated{_allow_comma_separated}, error{SUCCESS} #ifdef SIMDJSON_THREADS_ENABLED , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change #endif { #ifdef SIMDJSON_THREADS_ENABLED if(worker.get() == nullptr) { error = MEMALLOC; } #endif } simdjson_inline document_stream::document_stream() noexcept : parser{nullptr}, buf{nullptr}, len{0}, batch_size{0}, allow_comma_separated{false}, error{UNINITIALIZED} #ifdef SIMDJSON_THREADS_ENABLED , use_thread(false) #endif { } simdjson_inline document_stream::~document_stream() noexcept { #ifdef SIMDJSON_THREADS_ENABLED worker.reset(); #endif } inline size_t document_stream::size_in_bytes() const noexcept { return len; } inline size_t document_stream::truncated_bytes() const noexcept { if(error == CAPACITY) { return len - batch_start; } return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; } simdjson_inline document_stream::iterator::iterator() noexcept : stream{nullptr}, finished{true} { } simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept : stream{_stream}, finished{is_end} { } simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { //if(stream->error) { return stream->error; } return simdjson_result(stream->doc, stream->error); } simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { // If there is an error, then we want the iterator // to be finished, no matter what. (E.g., we do not // keep generating documents with errors, or go beyond // a document with errors.) // // Users do not have to call "operator*()" when they use operator++, // so we need to end the stream in the operator++ function. // // Note that setting finished = true is essential otherwise // we would enter an infinite loop. if (stream->error) { finished = true; } // Note that stream->error() is guarded against error conditions // (it will immediately return if stream->error casts to false). // In effect, this next function does nothing when (stream->error) // is true (hence the risk of an infinite loop). stream->next(); // If that was the last document, we're finished. // It is the only type of error we do not want to appear // in operator*. if (stream->error == EMPTY) { finished = true; } // If we had any other kind of error (not EMPTY) then we want // to pass it along to the operator* and we cannot mark the result // as "finished" just yet. return *this; } simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { return finished != other.finished; } simdjson_inline document_stream::iterator document_stream::begin() noexcept { start(); // If there are no documents, we're finished. return iterator(this, error == EMPTY); } simdjson_inline document_stream::iterator document_stream::end() noexcept { return iterator(this, true); } inline void document_stream::start() noexcept { if (error) { return; } error = parser->allocate(batch_size); if (error) { return; } // Always run the first stage 1 parse immediately batch_start = 0; error = run_stage1(*parser, batch_start); while(error == EMPTY) { // In exceptional cases, we may start with an empty block batch_start = next_batch_start(); if (batch_start >= len) { return; } error = run_stage1(*parser, batch_start); } if (error) { return; } doc_index = batch_start; doc = document(json_iterator(&buf[batch_start], parser)); doc.iter._streaming = true; #ifdef SIMDJSON_THREADS_ENABLED if (use_thread && next_batch_start() < len) { // Kick off the first thread on next batch if needed error = stage1_thread_parser.allocate(batch_size); if (error) { return; } worker->start_thread(); start_stage1_thread(); if (error) { return; } } #endif // SIMDJSON_THREADS_ENABLED } inline void document_stream::next() noexcept { // We always enter at once once in an error condition. if (error) { return; } next_document(); if (error) { return; } auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; // Check if at end of structural indexes (i.e. at end of batch) if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { error = EMPTY; // Load another batch (if available) while (error == EMPTY) { batch_start = next_batch_start(); if (batch_start >= len) { break; } #ifdef SIMDJSON_THREADS_ENABLED if(use_thread) { load_from_stage1_thread(); } else { error = run_stage1(*parser, batch_start); } #else error = run_stage1(*parser, batch_start); #endif /** * Whenever we move to another window, we need to update all pointers to make * it appear as if the input buffer started at the beginning of the window. * * Take this input: * * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] * * Say you process the following window... * * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' * * When you do so, the json_iterator has a pointer at the beginning of the memory region * (pointing at the beginning of '{"z"...'. * * When you move to the window that starts at... * * '[7, 10, 9] [15, 11, 12, 13] ... * * then it is not sufficient to just run stage 1. You also need to re-anchor the * json_iterator so that it believes we are starting at '[7, 10, 9]...'. * * Under the DOM front-end, this gets done automatically because the parser owns * the pointer the data, and when you call stage1 and then stage2 on the same * parser, then stage2 will run on the pointer acquired by stage1. * * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that * we used. But json_iterator has no callback when stage1 is called on the parser. * In fact, I think that the parser is unaware of json_iterator. * * * So we need to re-anchor the json_iterator after each call to stage 1 so that * all of the pointers are in sync. */ doc.iter = json_iterator(&buf[batch_start], parser); doc.iter._streaming = true; /** * End of resync. */ if (error) { continue; } // If the error was EMPTY, we may want to load another batch. doc_index = batch_start; } } } inline void document_stream::next_document() noexcept { // Go to next place where depth=0 (document depth) error = doc.iter.skip_child(0); if (error) { return; } // Always set depth=1 at the start of document doc.iter._depth = 1; // consume comma if comma separated is allowed if (allow_comma_separated) { doc.iter.consume_character(','); } // Resets the string buffer at the beginning, thus invalidating the strings. doc.iter._string_buf_loc = parser->string_buf.get(); doc.iter._root = doc.iter.position(); } inline size_t document_stream::next_batch_start() const noexcept { return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; } inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { // This code only updates the structural index in the parser, it does not update any json_iterator // instance. size_t remaining = len - _batch_start; if (remaining <= batch_size) { return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); } else { return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); } } simdjson_inline size_t document_stream::iterator::current_index() const noexcept { return stream->doc_index; } simdjson_inline std::string_view document_stream::iterator::source() const noexcept { auto depth = stream->doc.iter.depth(); auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); // If at root, process the first token to determine if scalar value if (stream->doc.iter.at_root()) { switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { case '{': case '[': // Depth=1 already at start of document break; case '}': case ']': depth--; break; default: // Scalar value document // TODO: Remove any trailing whitespaces // This returns a string spanning from start of value to the beginning of the next document (excluded) return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1); } cur_struct_index++; } while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { case '{': case '[': depth++; break; case '}': case ']': depth--; break; } if (depth == 0) { break; } cur_struct_index++; } return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; } inline error_code document_stream::iterator::error() const noexcept { return stream->error; } #ifdef SIMDJSON_THREADS_ENABLED inline void document_stream::load_from_stage1_thread() noexcept { worker->finish(); // Swap to the parser that was loaded up in the thread. Make sure the parser has // enough memory to swap to, as well. std::swap(stage1_thread_parser,*parser); error = stage1_thread_error; if (error) { return; } // If there's anything left, start the stage 1 thread! if (next_batch_start() < len) { start_stage1_thread(); } } inline void document_stream::start_stage1_thread() noexcept { // we call the thread on a lambda that will update // this->stage1_thread_error // there is only one thread that may write to this value // TODO this is NOT exception-safe. this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error size_t _next_batch_start = this->next_batch_start(); worker->run(this, & this->stage1_thread_parser, _next_batch_start); } #endif // SIMDJSON_THREADS_ENABLED } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result::simdjson_result( westmere::ondemand::document_stream &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } } #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H /* end file simdjson/generic/ondemand/document_stream-inl.h for westmere */ /* including simdjson/generic/ondemand/field-inl.h for westmere: #include "simdjson/generic/ondemand/field-inl.h" */ /* begin file simdjson/generic/ondemand/field-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { // clang 6 doesn't think the default constructor can be noexcept, so we make it explicit simdjson_inline field::field() noexcept : std::pair() {} simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept : std::pair(key, std::forward(value)) { } simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { raw_json_string key; SIMDJSON_TRY( parent_iter.field_key().get(key) ); SIMDJSON_TRY( parent_iter.field_value() ); return field::start(parent_iter, key); } simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { return field(key, parent_iter.child()); } simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); first.consume(); return answer; } simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; } simdjson_inline value &field::value() & noexcept { return second; } simdjson_inline value field::value() && noexcept { return std::forward(*this).second; } } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( westmere::ondemand::field &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result simdjson_result::key() noexcept { if (error()) { return error(); } return first.key(); } simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { if (error()) { return error(); } return first.unescaped_key(allow_replacement); } simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H /* end file simdjson/generic/ondemand/field-inl.h for westmere */ /* including simdjson/generic/ondemand/json_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/json_iterator-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept : token(std::forward(other.token)), parser{other.parser}, _string_buf_loc{other._string_buf_loc}, error{other.error}, _depth{other._depth}, _root{other._root}, _streaming{other._streaming} { other.parser = nullptr; } simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { token = other.token; parser = other.parser; _string_buf_loc = other._string_buf_loc; error = other.error; _depth = other._depth; _root = other._root; _streaming = other._streaming; other.parser = nullptr; return *this; } simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept : token(buf, &_parser->implementation->structural_indexes[0]), parser{_parser}, _string_buf_loc{parser->string_buf.get()}, _depth{1}, _root{parser->implementation->structural_indexes.get()}, _streaming{false} { logger::log_headers(); #if SIMDJSON_CHECK_EOF assert_more_tokens(); #endif } inline void json_iterator::rewind() noexcept { token.set_position( root_position() ); logger::log_headers(); // We start again _string_buf_loc = parser->string_buf.get(); _depth = 1; } inline bool json_iterator::balanced() const noexcept { token_iterator ti(token); int32_t count{0}; ti.set_position( root_position() ); while(ti.peek() <= peek_last()) { switch (*ti.return_current_and_advance()) { case '[': case '{': count++; break; case ']': case '}': count--; break; default: break; } } return count == 0; } // GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller // relating depth and parent_depth, which is a desired effect. The warning does not show up if the // skip_child() function is not marked inline). SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { if (depth() <= parent_depth) { return SUCCESS; } switch (*return_current_and_advance()) { // TODO consider whether matching braces is a requirement: if non-matching braces indicates // *missing* braces, then future lookups are not in the object/arrays they think they are, // violating the rule "validate enough structure that the user can be confident they are // looking at the right values." // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth // For the first open array/object in a value, we've already incremented depth, so keep it the same // We never stop at colon, but if we did, it wouldn't affect depth case '[': case '{': case ':': logger::log_start_value(*this, "skip"); break; // If there is a comma, we have just finished a value in an array/object, and need to get back in case ',': logger::log_value(*this, "skip"); break; // ] or } means we just finished a value and need to jump out of the array/object case ']': case '}': logger::log_end_value(*this, "skip"); _depth--; if (depth() <= parent_depth) { return SUCCESS; } #if SIMDJSON_CHECK_EOF // If there are no more tokens, the parent is incomplete. if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } #endif // SIMDJSON_CHECK_EOF break; case '"': if(*peek() == ':') { // We are at a key!!! // This might happen if you just started an object and you skip it immediately. // Performance note: it would be nice to get rid of this check as it is somewhat // expensive. // https://github.com/simdjson/simdjson/issues/1742 logger::log_value(*this, "key"); return_current_and_advance(); // eat up the ':' break; // important!!! } simdjson_fallthrough; // Anything else must be a scalar value default: // For the first scalar, we will have incremented depth already, so we decrement it here. logger::log_value(*this, "skip"); _depth--; if (depth() <= parent_depth) { return SUCCESS; } break; } // Now that we've considered the first value, we only increment/decrement for arrays/objects while (position() < end_position()) { switch (*return_current_and_advance()) { case '[': case '{': logger::log_start_value(*this, "skip"); _depth++; break; // TODO consider whether matching braces is a requirement: if non-matching braces indicates // *missing* braces, then future lookups are not in the object/arrays they think they are, // violating the rule "validate enough structure that the user can be confident they are // looking at the right values." // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth case ']': case '}': logger::log_end_value(*this, "skip"); _depth--; if (depth() <= parent_depth) { return SUCCESS; } break; default: logger::log_value(*this, "skip", ""); break; } } return report_error(TAPE_ERROR, "not enough close braces"); } SIMDJSON_POP_DISABLE_WARNINGS simdjson_inline bool json_iterator::at_root() const noexcept { return position() == root_position(); } simdjson_inline bool json_iterator::is_single_token() const noexcept { return parser->implementation->n_structural_indexes == 1; } simdjson_inline bool json_iterator::streaming() const noexcept { return _streaming; } simdjson_inline token_position json_iterator::root_position() const noexcept { return _root; } simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { SIMDJSON_ASSUME( _depth == 1 ); } simdjson_inline void json_iterator::assert_at_root() const noexcept { SIMDJSON_ASSUME( _depth == 1 ); #ifndef SIMDJSON_CLANG_VISUAL_STUDIO // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument // has side effects that will be discarded. SIMDJSON_ASSUME( token.position() == _root ); #endif } simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { assert_valid_position(token._position + required_tokens - 1); } simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { #ifndef SIMDJSON_CLANG_VISUAL_STUDIO SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); #endif } simdjson_inline bool json_iterator::at_end() const noexcept { return position() == end_position(); } simdjson_inline token_position json_iterator::end_position() const noexcept { uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; return &parser->implementation->structural_indexes[n_structural_indexes]; } inline std::string json_iterator::to_string() const noexcept { if( !is_alive() ) { return "dead json_iterator instance"; } const char * current_structural = reinterpret_cast(token.peek()); return std::string("json_iterator [ depth : ") + std::to_string(_depth) + std::string(", structural : '") + std::string(current_structural,1) + std::string("', offset : ") + std::to_string(token.current_offset()) + std::string("', error : ") + error_message(error) + std::string(" ]"); } inline simdjson_result json_iterator::current_location() const noexcept { if (!is_alive()) { // Unrecoverable error if (!at_root()) { return reinterpret_cast(token.peek(-1)); } else { return reinterpret_cast(token.peek()); } } if (at_end()) { return OUT_OF_BOUNDS; } return reinterpret_cast(token.peek()); } simdjson_inline bool json_iterator::is_alive() const noexcept { return parser; } simdjson_inline void json_iterator::abandon() noexcept { parser = nullptr; _depth = 0; } simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(); #endif // SIMDJSON_CHECK_EOF return token.return_current_and_advance(); } simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { // deliberately done without safety guard: return token.peek(); } simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(delta+1); #endif // SIMDJSON_CHECK_EOF return token.peek(delta); } simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(delta+1); #endif // #if SIMDJSON_CHECK_EOF return token.peek_length(delta); } simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { // todo: currently we require end-of-string buffering, but the following // assert_valid_position should be turned on if/when we lift that condition. // assert_valid_position(position); // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF // is ON by default, we have no choice but to disable it for real with a comment. return token.peek(position); } simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { #if SIMDJSON_CHECK_EOF assert_valid_position(position); #endif // SIMDJSON_CHECK_EOF return token.peek_length(position); } simdjson_inline token_position json_iterator::last_position() const noexcept { // The following line fails under some compilers... // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); // since it has side-effects. uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; SIMDJSON_ASSUME(n_structural_indexes > 0); return &parser->implementation->structural_indexes[n_structural_indexes - 1]; } simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { return token.peek(last_position()); } simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); SIMDJSON_ASSUME(_depth == parent_depth + 1); _depth = parent_depth; } simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); SIMDJSON_ASSUME(_depth == child_depth - 1); _depth = child_depth; } simdjson_inline depth_t json_iterator::depth() const noexcept { return _depth; } simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { return _string_buf_loc; } simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); logger::log_error(*this, message); error = _error; return error; } simdjson_inline token_position json_iterator::position() const noexcept { return token.position(); } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { return parser->unescape(in, _string_buf_loc, allow_replacement); } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { return parser->unescape_wobbly(in, _string_buf_loc); } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); SIMDJSON_ASSUME(_depth == child_depth - 1); #if SIMDJSON_DEVELOPMENT_CHECKS #ifndef SIMDJSON_CLANG_VISUAL_STUDIO SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); #endif #endif token.set_position(position); _depth = child_depth; } simdjson_inline error_code json_iterator::consume_character(char c) noexcept { if (*peek() == c) { return_current_and_advance(); return SUCCESS; } return TAPE_ERROR; } #if SIMDJSON_DEVELOPMENT_CHECKS simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; } simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } } #endif simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); logger::log_error(*this, message); return _error; } simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { // This function is not expected to be called in performance-sensitive settings. // Let us guard against silly cases: if((N < max_len) || (N == 0)) { return false; } // Copy to the buffer. std::memcpy(tmpbuf, json, max_len); if(N > max_len) { // We pad whatever remains with ' '. std::memset(tmpbuf + max_len, ' ', N - max_len); } return true; } } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::json_iterator &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H /* end file simdjson/generic/ondemand/json_iterator-inl.h for westmere */ /* including simdjson/generic/ondemand/json_type-inl.h for westmere: #include "simdjson/generic/ondemand/json_type-inl.h" */ /* begin file simdjson/generic/ondemand/json_type-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { switch (type) { case json_type::array: out << "array"; break; case json_type::object: out << "object"; break; case json_type::number: out << "number"; break; case json_type::string: out << "string"; break; case json_type::boolean: out << "boolean"; break; case json_type::null: out << "null"; break; default: SIMDJSON_UNREACHABLE(); } return out; } #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { return out << type.value(); } #endif simdjson_inline number_type number::get_number_type() const noexcept { return type; } simdjson_inline bool number::is_uint64() const noexcept { return get_number_type() == number_type::unsigned_integer; } simdjson_inline uint64_t number::get_uint64() const noexcept { return payload.unsigned_integer; } simdjson_inline number::operator uint64_t() const noexcept { return get_uint64(); } simdjson_inline bool number::is_int64() const noexcept { return get_number_type() == number_type::signed_integer; } simdjson_inline int64_t number::get_int64() const noexcept { return payload.signed_integer; } simdjson_inline number::operator int64_t() const noexcept { return get_int64(); } simdjson_inline bool number::is_double() const noexcept { return get_number_type() == number_type::floating_point_number; } simdjson_inline double number::get_double() const noexcept { return payload.floating_point_number; } simdjson_inline number::operator double() const noexcept { return get_double(); } simdjson_inline double number::as_double() const noexcept { if(is_double()) { return payload.floating_point_number; } if(is_int64()) { return double(payload.signed_integer); } return double(payload.unsigned_integer); } simdjson_inline void number::append_s64(int64_t value) noexcept { payload.signed_integer = value; type = number_type::signed_integer; } simdjson_inline void number::append_u64(uint64_t value) noexcept { payload.unsigned_integer = value; type = number_type::unsigned_integer; } simdjson_inline void number::append_double(double value) noexcept { payload.floating_point_number = value; type = number_type::floating_point_number; } simdjson_inline void number::skip_double() noexcept { type = number_type::floating_point_number; } } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::json_type &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H /* end file simdjson/generic/ondemand/json_type-inl.h for westmere */ /* including simdjson/generic/ondemand/logger-inl.h for westmere: #include "simdjson/generic/ondemand/logger-inl.h" */ /* begin file simdjson/generic/ondemand/logger-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include #include namespace simdjson { namespace westmere { namespace ondemand { namespace logger { static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; static constexpr const int LOG_EVENT_LEN = 20; static constexpr const int LOG_BUFFER_LEN = 30; static constexpr const int LOG_SMALL_BUFFER_LEN = 10; static int log_depth = 0; // Not threadsafe. Log only. // Helper to turn unprintable or newline characters into spaces static inline char printable_char(char c) { if (c >= 0x20) { return c; } else { return ' '; } } template static inline std::string string_format(const std::string& format, const Args&... args) { SIMDJSON_PUSH_DISABLE_ALL_WARNINGS int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; auto size = static_cast(size_s); if (size <= 0) return std::string(); std::unique_ptr buf(new char[size]); std::snprintf(buf.get(), size, format.c_str(), args...); SIMDJSON_POP_DISABLE_WARNINGS return std::string(buf.get(), buf.get() + size - 1); } static inline log_level get_log_level_from_env() { SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe char *lvl = getenv("SIMDJSON_LOG_LEVEL"); SIMDJSON_POP_DISABLE_WARNINGS if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } return log_level::info; } static inline log_level log_threshold() { static log_level threshold = get_log_level_from_env(); return threshold; } static inline bool should_log(log_level level) { return level >= log_threshold(); } inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { log_line(iter, index, depth, "", type, detail, log_level::info); } inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { log_line(iter, index, depth, "+", type, detail, log_level::info); if (LOG_ENABLED) { log_depth++; } } inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { log_line(iter, "+", type, "", delta, depth_delta, log_level::info); if (LOG_ENABLED) { log_depth++; } } inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { if (LOG_ENABLED) { log_depth--; } log_line(iter, "-", type, "", delta, depth_delta, log_level::info); } inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); } inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); } inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_event(iter.json_iter(), type, detail, delta, depth_delta); } inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { log_value(iter.json_iter(), type, detail, delta, depth_delta); } inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { log_start_value(iter.json_iter(), type, delta, depth_delta); } inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { log_end_value(iter.json_iter(), type, delta, depth_delta); } inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { log_error(iter.json_iter(), error, detail, delta, depth_delta); } inline void log_headers() noexcept { if (LOG_ENABLED) { if (simdjson_unlikely(should_log(log_level::info))) { // Technically a static variable is not thread-safe, but if you are using threads and logging... well... static bool displayed_hint{false}; log_depth = 0; printf("\n"); if (!displayed_hint) { // We only print this helpful header once. printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); printf("# +array says 'this is where we were when we discovered the start array'\n"); printf( "# -array says 'this is where we were when we ended the array'\n"); printf("# skip says 'this is a structural or value I am skipping'\n"); printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); printf("#\n"); printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); printf("# in addition to the depth being displayed.\n"); printf("#\n"); printf("# Every token in the document has a single depth determined by the tokens before it,\n"); printf("# and is not affected by what the token actually is.\n"); printf("#\n"); printf("# Not all structural elements are presented as tokens in the logs.\n"); printf("#\n"); printf("# We never give control to the user within an empty array or an empty object.\n"); printf("#\n"); printf("# Inside an array, having a depth greater than the array's depth means that\n"); printf("# we are pointing inside a value.\n"); printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); displayed_hint = true; } printf("\n"); printf("| %-*s ", LOG_EVENT_LEN, "Event"); printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); // printf("| %-*s ", 5, "Next#"); printf("| %-*s ", 5, "Depth"); printf("| Detail "); printf("|\n"); printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); // printf("|%.*s", 5+2, DASHES); printf("|%.*s", 5 + 2, DASHES); printf("|--------"); printf("|\n"); fflush(stdout); } } } template inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); } template inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { if (LOG_ENABLED) { if (simdjson_unlikely(should_log(level))) { const int indent = depth * 2; const auto buf = iter.token.buf; auto msg = string_format(title, std::forward(args)...); printf("| %*s%s%-*s ", indent, "", title_prefix, LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); { // Print the current structural. printf("| "); // Before we begin, the index might point right before the document. // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 if (index < iter._root) { printf("%*s", LOG_BUFFER_LEN, ""); } else { auto current_structural = &buf[*index]; for (int i = 0; i < LOG_BUFFER_LEN; i++) { printf("%c", printable_char(current_structural[i])); } } printf(" "); } { // Print the next structural. printf("| "); auto next_structural = &buf[*(index + 1)]; for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { printf("%c", printable_char(next_structural[i])); } printf(" "); } // printf("| %5u ", *(index+1)); printf("| %5i ", depth); printf("| %6.*s ", int(detail.size()), detail.data()); printf("|\n"); fflush(stdout); } } } } // namespace logger } // namespace ondemand } // namespace westmere } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* end file simdjson/generic/ondemand/logger-inl.h for westmere */ /* including simdjson/generic/ondemand/object-inl.h for westmere: #include "simdjson/generic/ondemand/object-inl.h" */ /* begin file simdjson/generic/ondemand/object-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { return find_field_unordered(key); } simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { return std::forward(*this).find_field_unordered(key); } simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); if (!has_value) { logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); return NO_SUCH_FIELD; } return value(iter.child()); } simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { SIMDJSON_TRY( iter.start_object().error() ); return object(iter); } simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { SIMDJSON_TRY( iter.start_root_object().error() ); return object(iter); } simdjson_inline error_code object::consume() noexcept { if(iter.is_at_key()) { /** * whenever you are pointing at a key, calling skip_child() is * unsafe because you will hit a string and you will assume that * it is string value, and this mistake will lead you to make bad * depth computation. */ /** * We want to 'consume' the key. We could really * just do _json_iter->return_current_and_advance(); at this * point, but, for clarity, we will use the high-level API to * eat the key. We assume that the compiler optimizes away * most of the work. */ simdjson_unused raw_json_string actual_key; auto error = iter.field_key().get(actual_key); if (error) { iter.abandon(); return error; }; // Let us move to the value while we are at it. if ((error = iter.field_value())) { iter.abandon(); return error; } } auto error_skip = iter.json_iter().skip_child(iter.depth()-1); if(error_skip) { iter.abandon(); } return error_skip; } simdjson_inline simdjson_result object::raw_json() noexcept { const uint8_t * starting_point{iter.peek_start()}; auto error = consume(); if(error) { return error; } const uint8_t * final_point{iter._json_iter->peek()}; return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { SIMDJSON_TRY( iter.started_object().error() ); return object(iter); } simdjson_inline object object::resume(const value_iterator &iter) noexcept { return iter; } simdjson_inline object::object(const value_iterator &_iter) noexcept : iter{_iter} { } simdjson_inline simdjson_result object::begin() noexcept { #if SIMDJSON_DEVELOPMENT_CHECKS if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif return object_iterator(iter); } simdjson_inline simdjson_result object::end() noexcept { return object_iterator(iter); } inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } json_pointer = json_pointer.substr(1); size_t slash = json_pointer.find('/'); std::string_view key = json_pointer.substr(0, slash); // Grab the child with the given key simdjson_result child; // If there is an escape character in the key, unescape it and then get the child. size_t escape = key.find('~'); if (escape != std::string_view::npos) { // Unescape the key std::string unescaped(key); do { switch (unescaped[escape+1]) { case '0': unescaped.replace(escape, 2, "~"); break; case '1': unescaped.replace(escape, 2, "/"); break; default: return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); } escape = unescaped.find('~', escape+1); } while (escape != std::string::npos); child = find_field(unescaped); // Take note find_field does not unescape keys when matching } else { child = find_field(key); } if(child.error()) { return child; // we do not continue if there was an error } // If there is a /, we have to recurse and look up more of the path if (slash != std::string_view::npos) { child = child.at_pointer(json_pointer.substr(slash)); } return child; } simdjson_inline simdjson_result object::count_fields() & noexcept { size_t count{0}; // Important: we do not consume any of the values. for(simdjson_unused auto v : *this) { count++; } // The above loop will always succeed, but we want to report errors. if(iter.error()) { return iter.error(); } // We need to move back at the start because we expect users to iterate through // the object after counting the number of elements. iter.reset_object(); return count; } simdjson_inline simdjson_result object::is_empty() & noexcept { bool is_not_empty; auto error = iter.reset_object().get(is_not_empty); if(error) { return error; } return !is_not_empty; } simdjson_inline simdjson_result object::reset() & noexcept { return iter.reset_object(); } } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::object &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} simdjson_inline simdjson_result simdjson_result::begin() noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() noexcept { if (error()) { return error(); } return first.end(); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first).find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first)[key]; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first).find_field(key); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } inline simdjson_result simdjson_result::reset() noexcept { if (error()) { return error(); } return first.reset(); } inline simdjson_result simdjson_result::is_empty() noexcept { if (error()) { return error(); } return first.is_empty(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* end file simdjson/generic/ondemand/object-inl.h for westmere */ /* including simdjson/generic/ondemand/object_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/object_iterator-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { // // object_iterator // simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept : iter{_iter} {} simdjson_inline simdjson_result object_iterator::operator*() noexcept { error_code error = iter.error(); if (error) { iter.abandon(); return error; } auto result = field::start(iter); // TODO this is a safety rail ... users should exit loops as soon as they receive an error. // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. if (result.error()) { iter.abandon(); } return result; } simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { return !(*this != other); } simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { return iter.is_open(); } SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_inline object_iterator &object_iterator::operator++() noexcept { // TODO this is a safety rail ... users should exit loops as soon as they receive an error. // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error simdjson_unused error_code error; if ((error = iter.skip_child() )) { return *this; } simdjson_unused bool has_value; if ((error = iter.has_next_field().get(has_value) )) { return *this; }; return *this; } SIMDJSON_POP_DISABLE_WARNINGS // // ### Live States // // While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is // always SUCCESS: // // - Start: This is the state when the object is first found and the iterator is just past the {. // In this state, at_start == true. // - Next: After we hand a scalar value to the user, or an array/object which they then fully // iterate over, the iterator is at the , or } before the next value. In this state, // depth == iter.depth, at_start == false, and error == SUCCESS. // - Unfinished Business: When we hand an array/object to the user which they do not fully // iterate over, we need to finish that iteration by skipping child values until we reach the // Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. // // ## Error States // // In error states, we will yield exactly one more value before stopping. iter.depth == depth // and at_start is always false. We decrement after yielding the error, moving to the Finished // state. // // - Chained Error: When the object iterator is part of an error chain--for example, in // `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an // object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and // iter.depth == depth, and at_start == false. We decrement depth when we yield the error. // - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, // we flag that as an error and treat it exactly the same as a Chained Error. In this state, // error == TAPE_ERROR, iter.depth == depth, and at_start == false. // // Errors that occur while reading a field to give to the user (such as when the key is not a // string or the field is missing a colon) are yielded immediately. Depth is then decremented, // moving to the Finished state without transitioning through an Error state at all. // // ## Terminal State // // The terminal state has iter.depth < depth. at_start is always false. // // - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. // In this state, iter.depth < depth, at_start == false, and error == SUCCESS. // } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( westmere::ondemand::object_iterator &&value ) noexcept : implementation_simdjson_result_base(std::forward(value)) { first.iter.assert_is_valid(); } simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base({}, error) { } simdjson_inline simdjson_result simdjson_result::operator*() noexcept { if (error()) { return error(); } return *first; } // If we're iterating and there is an error, return the error once. simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return !error(); } return first == other.first; } // If we're iterating and there is an error, return the error once. simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return error(); } return first != other.first; } // Checks for ']' and ',' simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { // Clear the error if there is one, so we don't yield it twice if (error()) { second = SUCCESS; return *this; } ++first; return *this; } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H /* end file simdjson/generic/ondemand/object_iterator-inl.h for westmere */ /* including simdjson/generic/ondemand/parser-inl.h for westmere: #include "simdjson/generic/ondemand/parser-inl.h" */ /* begin file simdjson/generic/ondemand/parser-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ /* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { simdjson_inline parser::parser(size_t max_capacity) noexcept : _max_capacity{max_capacity} { } simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { if (new_capacity > max_capacity()) { return CAPACITY; } if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } // string_capacity copied from document::allocate _capacity = 0; size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); #if SIMDJSON_DEVELOPMENT_CHECKS start_positions.reset(new (std::nothrow) token_position[new_max_depth]); #endif if (implementation) { SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); } else { SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); } _capacity = new_capacity; _max_depth = new_max_depth; return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); // Allocate if needed if (capacity() < json.length() || !string_buf) { SIMDJSON_TRY( allocate(json.length(), max_depth()) ); } // Run stage 1. SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); return document::start({ reinterpret_cast(json.data()), this }); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { return iterate(padded_string_view(json, allocated)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { if(json.capacity() - json.size() < SIMDJSON_PADDING) { json.reserve(json.size() + SIMDJSON_PADDING); } return iterate(padded_string_view(json)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { return iterate(padded_string_view(json)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception SIMDJSON_TRY( result.error() ); padded_string_view json = result.value_unsafe(); return iterate(json); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception SIMDJSON_TRY( result.error() ); const padded_string &json = result.value_unsafe(); return iterate(json); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); // Allocate if needed if (capacity() < json.length()) { SIMDJSON_TRY( allocate(json.length(), max_depth()) ); } // Run stage 1. SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); return json_iterator(reinterpret_cast(json.data()), this); } inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { buf += 3; len -= 3; } if(allow_comma_separated && batch_size < len) { batch_size = len; } return document_stream(*this, buf, len, batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } simdjson_inline size_t parser::max_depth() const noexcept { return _max_depth; } simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { _max_capacity = max_capacity; } else { _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; } } simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); if (!end) { return STRING_ERROR; } std::string_view result(reinterpret_cast(dst), end-dst); dst = end; return result; } simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); if (!end) { return STRING_ERROR; } std::string_view result(reinterpret_cast(dst), end-dst); dst = end; return result; } } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::parser &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* end file simdjson/generic/ondemand/parser-inl.h for westmere */ /* including simdjson/generic/ondemand/raw_json_string-inl.h for westmere: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ /* begin file simdjson/generic/ondemand/raw_json_string-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; // if the content has no escape character, just scan through it quickly! for(;pos < target.size() && target[pos] != '\\';pos++) {} // slow path may begin. bool escaping{false}; for(;pos < target.size();pos++) { if((target[pos] == '"') && !escaping) { return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } return true; } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { size_t pos{0}; // if the content has no escape character, just scan through it quickly! for(;target[pos] && target[pos] != '\\';pos++) {} // slow path may begin. bool escaping{false}; for(;target[pos];pos++) { if((target[pos] == '"') && !escaping) { return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } return true; } simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { // If we are going to call memcmp, then we must know something about the length of the raw_json_string. return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { // Assumptions: does not contain unescaped quote characters, and // the raw content is quote terminated within a valid JSON string. if(target.size() <= SIMDJSON_PADDING) { return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } const char * r{raw()}; size_t pos{0}; for(;pos < target.size();pos++) { if(r[pos] != target[pos]) { return false; } } if(r[pos] != '"') { return false; } return true; } simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { const char * r{raw()}; size_t pos{0}; bool escaping{false}; for(;pos < target.size();pos++) { if(r[pos] != target[pos]) { return false; } // if target is a compile-time constant and it is free from // quotes, then the next part could get optimized away through // inlining. if((target[pos] == '"') && !escaping) { // We have reached the end of the raw_json_string but // the target is not done. return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } if(r[pos] != '"') { return false; } return true; } simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and // the raw content is quote terminated within a valid JSON string. const char * r{raw()}; size_t pos{0}; for(;target[pos];pos++) { if(r[pos] != target[pos]) { return false; } } if(r[pos] != '"') { return false; } return true; } simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { // Assumptions: does not contain unescaped quote characters, and // the raw content is quote terminated within a valid JSON string. const char * r{raw()}; size_t pos{0}; bool escaping{false}; for(;target[pos];pos++) { if(r[pos] != target[pos]) { return false; } // if target is a compile-time constant and it is free from // quotes, then the next part could get optimized away through // inlining. if((target[pos] == '"') && !escaping) { // We have reached the end of the raw_json_string but // the target is not done. return false; } else if(target[pos] == '\\') { escaping = !escaping; } else { escaping = false; } } if(r[pos] != '"') { return false; } return true; } simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { return a.unsafe_is_equal(c); } simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { return a == c; } simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { return !(a == c); } simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { return !(a == c); } simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { return iter.unescape(*this, allow_replacement); } simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { return iter.unescape_wobbly(*this); } simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { bool in_escape = false; const char *s = str.raw(); while (true) { switch (*s) { case '\\': in_escape = !in_escape; break; case '"': if (in_escape) { in_escape = false; } else { return out; } break; default: if (in_escape) { in_escape = false; } } out << *s; s++; } } } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::raw_json_string &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} simdjson_inline simdjson_result simdjson_result::raw() const noexcept { if (error()) { return error(); } return first.raw(); } simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(westmere::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { if (error()) { return error(); } return first.unescape(iter, allow_replacement); } simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(westmere::ondemand::json_iterator &iter) const noexcept { if (error()) { return error(); } return first.unescape_wobbly(iter); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* end file simdjson/generic/ondemand/raw_json_string-inl.h for westmere */ /* including simdjson/generic/ondemand/serialization-inl.h for westmere: #include "simdjson/generic/ondemand/serialization-inl.h" */ /* begin file simdjson/generic/ondemand/serialization-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { inline std::string_view trim(const std::string_view str) noexcept { // We can almost surely do better by rolling our own find_first_not_of function. size_t first = str.find_first_not_of(" \t\n\r"); // If we have the empty string (just white space), then no trimming is possible, and // we return the empty string_view. if (std::string_view::npos == first) { return std::string_view(); } size_t last = str.find_last_not_of(" \t\n\r"); return str.substr(first, (last - first + 1)); } inline simdjson_result to_json_string(westmere::ondemand::document& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(westmere::ondemand::document_reference& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(westmere::ondemand::value& x) noexcept { /** * If we somehow receive a value that has already been consumed, * then the following code could be in trouble. E.g., we create * an array as needed, but if an array was already created, then * it could be bad. */ using namespace westmere::ondemand; westmere::ondemand::json_type t; auto error = x.type().get(t); if(error != SUCCESS) { return error; } switch (t) { case json_type::array: { westmere::ondemand::array array; error = x.get_array().get(array); if(error) { return error; } return to_json_string(array); } case json_type::object: { westmere::ondemand::object object; error = x.get_object().get(object); if(error) { return error; } return to_json_string(object); } default: return trim(x.raw_json_token()); } } inline simdjson_result to_json_string(westmere::ondemand::object& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(westmere::ondemand::array& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } } // namespace simdjson namespace simdjson { namespace westmere { namespace ondemand { #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::value x) { std::string_view v; auto error = simdjson::to_json_string(x).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::value x) { std::string_view v; auto error = simdjson::to_json_string(x).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::array value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::array value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document_reference& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif #if SIMDJSON_EXCEPTIONS inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::object value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { throw simdjson::simdjson_error(error); } } inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::object value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { return (out << v); } else { return (out << error); } } #endif }}} // namespace simdjson::westmere::ondemand #endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* end file simdjson/generic/ondemand/serialization-inl.h for westmere */ /* including simdjson/generic/ondemand/token_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/token_iterator-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { simdjson_inline token_iterator::token_iterator( const uint8_t *_buf, token_position position ) noexcept : buf{_buf}, _position{position} { } simdjson_inline uint32_t token_iterator::current_offset() const noexcept { return *(_position); } simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { return &buf[*(_position++)]; } simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { return &buf[*position]; } simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { return *position; } simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { return *(position+1) - *position; } simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { return &buf[*(_position+delta)]; } simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { return *(_position+delta); } simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { return *(_position+delta+1) - *(_position+delta); } simdjson_inline token_position token_iterator::position() const noexcept { return _position; } simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { _position = target_position; } simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { return _position == other._position; } simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { return _position != other._position; } simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { return _position > other._position; } simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { return _position >= other._position; } simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { return _position < other._position; } simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { return _position <= other._position; } } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::token_iterator &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H /* end file simdjson/generic/ondemand/token_iterator-inl.h for westmere */ /* including simdjson/generic/ondemand/value-inl.h for westmere: #include "simdjson/generic/ondemand/value-inl.h" */ /* begin file simdjson/generic/ondemand/value-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { simdjson_inline value::value(const value_iterator &_iter) noexcept : iter{_iter} { } simdjson_inline value value::start(const value_iterator &iter) noexcept { return iter; } simdjson_inline value value::resume(const value_iterator &iter) noexcept { return iter; } simdjson_inline simdjson_result value::get_array() noexcept { return array::start(iter); } simdjson_inline simdjson_result value::get_object() noexcept { return object::start(iter); } simdjson_inline simdjson_result value::start_or_resume_object() noexcept { if (iter.at_start()) { return get_object(); } else { return object::resume(iter); } } simdjson_inline simdjson_result value::get_raw_json_string() noexcept { return iter.get_raw_json_string(); } simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { return iter.get_string(allow_replacement); } template simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { return iter.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result value::get_wobbly_string() noexcept { return iter.get_wobbly_string(); } simdjson_inline simdjson_result value::get_double() noexcept { return iter.get_double(); } simdjson_inline simdjson_result value::get_double_in_string() noexcept { return iter.get_double_in_string(); } simdjson_inline simdjson_result value::get_uint64() noexcept { return iter.get_uint64(); } simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { return iter.get_uint64_in_string(); } simdjson_inline simdjson_result value::get_int64() noexcept { return iter.get_int64(); } simdjson_inline simdjson_result value::get_int64_in_string() noexcept { return iter.get_int64_in_string(); } simdjson_inline simdjson_result value::get_bool() noexcept { return iter.get_bool(); } simdjson_inline simdjson_result value::is_null() noexcept { return iter.is_null(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } template simdjson_inline error_code value::get(T &out) noexcept { return get().get(out); } #if SIMDJSON_EXCEPTIONS simdjson_inline value::operator array() noexcept(false) { return get_array(); } simdjson_inline value::operator object() noexcept(false) { return get_object(); } simdjson_inline value::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline value::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline value::operator double() noexcept(false) { return get_double(); } simdjson_inline value::operator std::string_view() noexcept(false) { return get_string(false); } simdjson_inline value::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } simdjson_inline value::operator bool() noexcept(false) { return get_bool(); } #endif simdjson_inline simdjson_result value::begin() & noexcept { return get_array().begin(); } simdjson_inline simdjson_result value::end() & noexcept { return {}; } simdjson_inline simdjson_result value::count_elements() & noexcept { simdjson_result answer; auto a = get_array(); answer = a.count_elements(); // count_elements leaves you pointing inside the array, at the first element. // We need to move back so that the user can create a new array (which requires that // we point at '['). iter.move_at_start(); return answer; } simdjson_inline simdjson_result value::count_fields() & noexcept { simdjson_result answer; auto a = get_object(); answer = a.count_fields(); iter.move_at_start(); return answer; } simdjson_inline simdjson_result value::at(size_t index) noexcept { auto a = get_array(); return a.at(index); } simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result value::find_field(const char *key) noexcept { return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { return start_or_resume_object().find_field_unordered(key); } simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { return start_or_resume_object()[key]; } simdjson_inline simdjson_result value::operator[](const char *key) noexcept { return start_or_resume_object()[key]; } simdjson_inline simdjson_result value::type() noexcept { return iter.type(); } simdjson_inline simdjson_result value::is_scalar() noexcept { json_type this_type; auto error = type().get(this_type); if(error) { return error; } return ! ((this_type == json_type::array) || (this_type == json_type::object)); } simdjson_inline bool value::is_negative() noexcept { return iter.is_negative(); } simdjson_inline simdjson_result value::is_integer() noexcept { return iter.is_integer(); } simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { return iter.get_number_type(); } simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { return iter.get_number(); } simdjson_inline std::string_view value::raw_json_token() noexcept { return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); } simdjson_inline simdjson_result value::raw_json() noexcept { json_type t; SIMDJSON_TRY(type().get(t)); switch (t) { case json_type::array: { ondemand::array array; SIMDJSON_TRY(get_array().get(array)); return array.raw_json(); } case json_type::object: { ondemand::object object; SIMDJSON_TRY(get_object().get(object)); return object.raw_json(); } default: return raw_json_token(); } } simdjson_inline simdjson_result value::current_location() noexcept { return iter.json_iter().current_location(); } simdjson_inline int32_t value::current_depth() const noexcept{ return iter.json_iter().depth(); } simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { json_type t; SIMDJSON_TRY(type().get(t)); switch (t) { case json_type::array: return (*this).get_array().at_pointer(json_pointer); case json_type::object: return (*this).get_object().at_pointer(json_pointer); default: return INVALID_JSON_POINTER; } } } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result( westmere::ondemand::value &&value ) noexcept : implementation_simdjson_result_base( std::forward(value) ) { } simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { if (error()) { return error(); } return first.at(index); } simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } simdjson_inline simdjson_result simdjson_result::end() & noexcept { if (error()) { return error(); } return {}; } simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { if (error()) { return error(); } return first.find_field(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { if (error()) { return error(); } return first[key]; } simdjson_inline simdjson_result simdjson_result::get_array() noexcept { if (error()) { return error(); } return first.get_array(); } simdjson_inline simdjson_result simdjson_result::get_object() noexcept { if (error()) { return error(); } return first.get_object(); } simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } template simdjson_inline simdjson_result simdjson_result::get() noexcept { if (error()) { return error(); } return first.get(); } template simdjson_inline error_code simdjson_result::get(T &out) noexcept { if (error()) { return error(); } return first.get(out); } template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { if (error()) { return error(); } return std::move(first); } template<> simdjson_inline error_code simdjson_result::get(westmere::ondemand::value &out) noexcept { if (error()) { return error(); } out = first; return SUCCESS; } simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS simdjson_inline simdjson_result::operator westmere::ondemand::array() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator westmere::ondemand::object() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator westmere::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); } simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { if (error()) { return error(); } return first.current_depth(); } simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H /* end file simdjson/generic/ondemand/value-inl.h for westmere */ /* including simdjson/generic/ondemand/value_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* begin file simdjson/generic/ondemand/value_iterator-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { simdjson_inline value_iterator::value_iterator( json_iterator *json_iter, depth_t depth, token_position start_position ) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} { } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { SIMDJSON_TRY( start_container('{', "Not an object", "object") ); return started_object(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { SIMDJSON_TRY( start_container('{', "Not an object", "object") ); return started_root_object(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { assert_at_container_start(); #if SIMDJSON_DEVELOPMENT_CHECKS _json_iter->set_start_position(_depth, start_position()); #endif if (*_json_iter->peek() == '}') { logger::log_value(*_json_iter, "empty object"); _json_iter->return_current_and_advance(); end_container(); return false; } return true; } simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { // When in streaming mode, we cannot expect peek_last() to be the last structural element of the // current document. It only works in the normal mode where we have indexed a single document. // Note that adding a check for 'streaming' is not expensive since we only have at most // one root element. if ( ! _json_iter->streaming() ) { // The following lines do not fully protect against garbage content within the // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should // call `at_end()` on the document instance at the end of the processing to // ensure that the processing has finished at the end. // if (*_json_iter->peek_last() != '}') { _json_iter->abandon(); return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); } // If the last character is } *and* the first gibberish character is also '}' // then on-demand could accidentally go over. So we need additional checks. // https://github.com/simdjson/simdjson/issues/1834 // Checking that the document is balanced requires a full scan which is potentially // expensive, but it only happens in edge cases where the first padding character is // a closing bracket. if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { _json_iter->abandon(); // The exact error would require more work. It will typically be an unclosed object. return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); } } return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { auto error = check_root_object(); if(error) { return error; } return started_object(); } simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { #if SIMDJSON_CHECK_EOF if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } #endif // SIMDJSON_CHECK_EOF _json_iter->ascend_to(depth()-1); return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { assert_at_next(); // It's illegal to call this unless there are more tokens: anything that ends in } or ] is // obligated to verify there are more tokens if they are not the top level. switch (*_json_iter->return_current_and_advance()) { case '}': logger::log_end_value(*_json_iter, "object"); SIMDJSON_TRY( end_container() ); return false; case ',': return true; default: return report_error(TAPE_ERROR, "Missing comma between object fields"); } } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { error_code error; bool has_value; // // Initially, the object can be in one of a few different places: // // 1. The start of the object, at the first field: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2, index 1) // ``` if (at_first_field()) { has_value = true; // // 2. When a previous search did not yield a value or the object is empty: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 0) // { } // ^ (depth 0, index 2) // ``` // } else if (!is_open()) { #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif return false; // 3. When a previous search found a field or an iterator yielded a value: // // ``` // // When a field was not fully consumed (or not even touched at all) // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2) // // When a field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // // When the last field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // ``` // } else { if ((error = skip_child() )) { abandon(); return error; } if ((error = has_next_field().get(has_value) )) { abandon(); return error; } #if SIMDJSON_DEVELOPMENT_CHECKS if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif } while (has_value) { // Get the key and colon, stopping at the value. raw_json_string actual_key; // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. // field_key() advances the pointer and checks that '"' is found (corresponding to a key). // The depth is left unchanged by field_key(). if ((error = field_key().get(actual_key) )) { abandon(); return error; }; // field_value() will advance and check that we find a ':' separating the // key and the value. It will also increment the depth by one. if ((error = field_value() )) { abandon(); return error; } // If it matches, stop and return // We could do it this way if we wanted to allow arbitrary // key content (including escaped quotes). //if (actual_key.unsafe_is_equal(max_key_length, key)) { // Instead we do the following which may trigger buffer overruns if the // user provides an adversarial key (containing a well placed unescaped quote // character and being longer than the number of bytes remaining in the JSON // input). if (actual_key.unsafe_is_equal(key)) { logger::log_event(*this, "match", key, -2); // If we return here, then we return while pointing at the ':' that we just checked. return true; } // No match: skip the value and see if , or } is next logger::log_event(*this, "no match", key, -2); // The call to skip_child is meant to skip over the value corresponding to the key. // After skip_child(), we are right before the next comma (',') or the final brace ('}'). SIMDJSON_TRY( skip_child() ); // Skip the value entirely // The has_next_field() advances the pointer and check that either ',' or '}' is found. // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, // then we are in error and we abort. if ((error = has_next_field().get(has_value) )) { abandon(); return error; } } // If the loop ended, we're out of fields to look at. return false; } SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { /** * When find_field_unordered_raw is called, we can either be pointing at the * first key, pointing outside (at the closing brace) or if a key was matched * we can be either pointing right afterthe ':' right before the value (that we need skip), * or we may have consumed the value and we might be at a comma or at the * final brace (ready for a call to has_next_field()). */ error_code error; bool has_value; // First, we scan from that point to the end. // If we don't find a match, we may loop back around, and scan from the beginning to that point. token_position search_start = _json_iter->position(); // We want to know whether we need to go back to the beginning. bool at_first = at_first_field(); /////////////// // Initially, the object can be in one of a few different places: // // 1. At the first key: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2, index 1) // ``` // if (at_first) { has_value = true; // 2. When a previous search did not yield a value or the object is empty: // // ``` // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 0) // { } // ^ (depth 0, index 2) // ``` // } else if (!is_open()) { #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif SIMDJSON_TRY(reset_object().get(has_value)); at_first = true; // 3. When a previous search found a field or an iterator yielded a value: // // ``` // // When a field was not fully consumed (or not even touched at all) // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 2) // // When a field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // // When the last field was fully consumed // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // ``` // } else { // If someone queried a key but they not did access the value, then we are left pointing // at the ':' and we need to move forward through the value... If the value was // processed then skip_child() does not move the iterator (but may adjust the depth). if ((error = skip_child() )) { abandon(); return error; } search_start = _json_iter->position(); if ((error = has_next_field().get(has_value) )) { abandon(); return error; } #if SIMDJSON_DEVELOPMENT_CHECKS if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif } // After initial processing, we will be in one of two states: // // ``` // // At the beginning of a field // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 1) // // At the end of the object // { "a": [ 1, 2 ], "b": [ 3, 4 ] } // ^ (depth 0) // ``` // // Next, we find a match starting from the current position. while (has_value) { SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field // Get the key and colon, stopping at the value. raw_json_string actual_key; // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. // field_key() advances the pointer and checks that '"' is found (corresponding to a key). // The depth is left unchanged by field_key(). if ((error = field_key().get(actual_key) )) { abandon(); return error; }; // field_value() will advance and check that we find a ':' separating the // key and the value. It will also increment the depth by one. if ((error = field_value() )) { abandon(); return error; } // If it matches, stop and return // We could do it this way if we wanted to allow arbitrary // key content (including escaped quotes). // if (actual_key.unsafe_is_equal(max_key_length, key)) { // Instead we do the following which may trigger buffer overruns if the // user provides an adversarial key (containing a well placed unescaped quote // character and being longer than the number of bytes remaining in the JSON // input). if (actual_key.unsafe_is_equal(key)) { logger::log_event(*this, "match", key, -2); // If we return here, then we return while pointing at the ':' that we just checked. return true; } // No match: skip the value and see if , or } is next logger::log_event(*this, "no match", key, -2); // The call to skip_child is meant to skip over the value corresponding to the key. // After skip_child(), we are right before the next comma (',') or the final brace ('}'). SIMDJSON_TRY( skip_child() ); // The has_next_field() advances the pointer and check that either ',' or '}' is found. // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, // then we are in error and we abort. if ((error = has_next_field().get(has_value) )) { abandon(); return error; } } // Performance note: it maybe wasteful to rewind to the beginning when there might be // no other query following. Indeed, it would require reskipping the whole object. // Instead, you can just stay where you are. If there is a new query, there is always time // to rewind. if(at_first) { return false; } // If we reach the end without finding a match, search the rest of the fields starting at the // beginning of the object. // (We have already run through the object before, so we've already validated its structure. We // don't check errors in this bit.) SIMDJSON_TRY(reset_object().get(has_value)); while (true) { SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field // Get the key and colon, stopping at the value. raw_json_string actual_key; // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. // field_key() advances the pointer and checks that '"' is found (corresponding to a key). // The depth is left unchanged by field_key(). error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); // field_value() will advance and check that we find a ':' separating the // key and the value. It will also increment the depth by one. error = field_value(); SIMDJSON_ASSUME(!error); // If it matches, stop and return // We could do it this way if we wanted to allow arbitrary // key content (including escaped quotes). // if (actual_key.unsafe_is_equal(max_key_length, key)) { // Instead we do the following which may trigger buffer overruns if the // user provides an adversarial key (containing a well placed unescaped quote // character and being longer than the number of bytes remaining in the JSON // input). if (actual_key.unsafe_is_equal(key)) { logger::log_event(*this, "match", key, -2); // If we return here, then we return while pointing at the ':' that we just checked. return true; } // No match: skip the value and see if , or } is next logger::log_event(*this, "no match", key, -2); // The call to skip_child is meant to skip over the value corresponding to the key. // After skip_child(), we are right before the next comma (',') or the final brace ('}'). SIMDJSON_TRY( skip_child() ); // If we reached the end of the key-value pair we started from, then we know // that the key is not there so we return false. We are either right before // the next comma or the final brace. if(_json_iter->position() == search_start) { return false; } // The has_next_field() advances the pointer and check that either ',' or '}' is found. // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, // then we are in error and we abort. error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); // If we make the mistake of exiting here, then we could be left pointing at a key // in the middle of an object. That's not an allowable state. } // If the loop ended, we're out of fields to look at. The program should // never reach this point. return false; } SIMDJSON_POP_DISABLE_WARNINGS simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { assert_at_next(); const uint8_t *key = _json_iter->return_current_and_advance(); if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } return raw_json_string(key); } simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { assert_at_next(); if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } _json_iter->descend_to(depth()+1); return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { SIMDJSON_TRY( start_container('[', "Not an array", "array") ); return started_array(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { SIMDJSON_TRY( start_container('[', "Not an array", "array") ); return started_root_array(); } inline std::string value_iterator::to_string() const noexcept { auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); if(_json_iter != nullptr) { answer += _json_iter->to_string(); } answer += std::string(" ]"); return answer; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { assert_at_container_start(); if (*_json_iter->peek() == ']') { logger::log_value(*_json_iter, "empty array"); _json_iter->return_current_and_advance(); SIMDJSON_TRY( end_container() ); return false; } _json_iter->descend_to(depth()+1); #if SIMDJSON_DEVELOPMENT_CHECKS _json_iter->set_start_position(_depth, start_position()); #endif return true; } simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { // When in streaming mode, we cannot expect peek_last() to be the last structural element of the // current document. It only works in the normal mode where we have indexed a single document. // Note that adding a check for 'streaming' is not expensive since we only have at most // one root element. if ( ! _json_iter->streaming() ) { // The following lines do not fully protect against garbage content within the // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should // also call `at_end()` on the document instance at the end of the processing to // ensure that the processing has finished at the end. // if (*_json_iter->peek_last() != ']') { _json_iter->abandon(); return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); } // If the last character is ] *and* the first gibberish character is also ']' // then on-demand could accidentally go over. So we need additional checks. // https://github.com/simdjson/simdjson/issues/1834 // Checking that the document is balanced requires a full scan which is potentially // expensive, but it only happens in edge cases where the first padding character is // a closing bracket. if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { _json_iter->abandon(); // The exact error would require more work. It will typically be an unclosed array. return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); } } return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { auto error = check_root_array(); if (error) { return error; } return started_array(); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { assert_at_next(); logger::log_event(*this, "has_next_element"); switch (*_json_iter->return_current_and_advance()) { case ']': logger::log_end_value(*_json_iter, "array"); SIMDJSON_TRY( end_container() ); return false; case ',': _json_iter->descend_to(depth()+1); return true; default: return report_error(TAPE_ERROR, "Missing comma between array elements"); } } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { auto not_true = atomparsing::str4ncmp(json, "true"); auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); if (error) { return incorrect_type_error("Not a boolean"); } return simdjson_result(!not_true); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); // if we start with 'n', we must be a null if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } return is_null_string; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { return get_raw_json_string().unescape(json_iter(), allow_replacement); } template simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { std::string_view content; auto err = get_string(allow_replacement).get(content); if (err) { return err; } receiver = content; return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { return get_raw_json_string().unescape_wobbly(json_iter()); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { auto json = peek_scalar("string"); if (*json != '"') { return incorrect_type_error("Not a string"); } advance_scalar("string"); return raw_json_string(json+1); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { auto result = numberparsing::parse_double(peek_non_root_scalar("double")); if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { auto result = parse_bool(peek_non_root_scalar("bool")); if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } return result; } simdjson_inline simdjson_result value_iterator::is_null() noexcept { bool is_null_value; SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); if(is_null_value) { advance_non_root_scalar("null"); } return is_null_value; } simdjson_inline bool value_iterator::is_negative() noexcept { return numberparsing::is_negative(peek_non_root_scalar("numbersign")); } simdjson_inline bool value_iterator::is_root_negative() noexcept { return numberparsing::is_negative(peek_root_scalar("numbersign")); } simdjson_inline simdjson_result value_iterator::is_integer() noexcept { return numberparsing::is_integer(peek_non_root_scalar("integer")); } simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { return numberparsing::get_number_type(peek_non_root_scalar("integer")); } simdjson_inline simdjson_result value_iterator::get_number() noexcept { number num; error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); if(error) { return error; } return num; } simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("is_root_integer"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { return false; // if there are more than 20 characters, it cannot be represented as an integer. } auto answer = numberparsing::is_integer(tmpbuf); // If the parsing was a success, we must still check that it is // a single scalar. Note that we parse first because of cases like '[]' where // getting TRAILING_CONTENT is wrong. if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } return answer; } simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } auto answer = numberparsing::get_number_type(tmpbuf); if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } return answer; } simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } number num; error_code error = numberparsing::parse_number(tmpbuf, num); if(error) { return error; } if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("number"); return num; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); } template simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { std::string_view content; auto err = get_root_string(check_trailing, allow_replacement).get(content); if (err) { return err; } receiver = content; return SUCCESS; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { auto json = peek_scalar("string"); if (*json != '"') { return incorrect_type_error("Not a string"); } if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_scalar("string"); return raw_json_string(json+1); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_unsigned(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_unsigned_in_string(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("uint64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_integer(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_integer_in_string(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("int64"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_double(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } auto result = numberparsing::parse_double_in_string(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("double"); } return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("bool"); uint8_t tmpbuf[5+1+1]; // +1 for null termination tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } auto result = parse_bool(tmpbuf); if(result.error() == SUCCESS) { if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("bool"); } return result; } simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("null"); bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); if(result) { // we have something that looks like a null. if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("null"); } return result; } simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); return _json_iter->skip_child(depth()); } simdjson_inline value_iterator value_iterator::child() const noexcept { assert_at_child(); return { _json_iter, depth()+1, _json_iter->token.position() }; } // GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller // relating depth and iterator depth, which is a desired effect. It does not happen if is_open is // marked non-inline. SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING simdjson_inline bool value_iterator::is_open() const noexcept { return _json_iter->depth() >= depth(); } SIMDJSON_POP_DISABLE_WARNINGS simdjson_inline bool value_iterator::at_end() const noexcept { return _json_iter->at_end(); } simdjson_inline bool value_iterator::at_start() const noexcept { return _json_iter->token.position() == start_position(); } simdjson_inline bool value_iterator::at_first_field() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); return _json_iter->token.position() == start_position() + 1; } simdjson_inline void value_iterator::abandon() noexcept { _json_iter->abandon(); } simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { return _depth; } simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { return _json_iter->error; } simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { return _json_iter->string_buf_loc(); } simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { return *_json_iter; } simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { return *_json_iter; } simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { return _json_iter->peek(start_position()); } simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { return _json_iter->peek_length(start_position()); } simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. if (!is_at_start()) { return peek_start(); } // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. assert_at_start(); return _json_iter->peek(); } simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. if (!is_at_start()) { return; } // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. assert_at_start(); _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); } simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { logger::log_start_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. const uint8_t *json; if (!is_at_start()) { #if SIMDJSON_DEVELOPMENT_CHECKS if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif json = peek_start(); if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } } else { assert_at_start(); /** * We should be prudent. Let us peek. If it is not the right type, we * return an error. Only once we have determined that we have the right * type are we allowed to advance! */ json = _json_iter->peek(); if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } _json_iter->return_current_and_advance(); } return SUCCESS; } simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return peek_start(); } assert_at_root(); return _json_iter->peek(); } simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return peek_start(); } assert_at_non_root_start(); return _json_iter->peek(); } simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return; } assert_at_root(); _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); } simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return; } assert_at_non_root_start(); _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); } simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { logger::log_error(*_json_iter, start_position(), depth(), message); return INCORRECT_TYPE; } simdjson_inline bool value_iterator::is_at_start() const noexcept { return position() == start_position(); } simdjson_inline bool value_iterator::is_at_key() const noexcept { // Keys are at the same depth as the object. // Note here that we could be safer and check that we are within an object, // but we do not. return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; } simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). auto delta = position() - start_position(); return delta == 1 || delta == 2; } inline void value_iterator::assert_at_start() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth ); SIMDJSON_ASSUME( _depth > 0 ); } inline void value_iterator::assert_at_container_start() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); SIMDJSON_ASSUME( _json_iter->_depth == _depth ); SIMDJSON_ASSUME( _depth > 0 ); } inline void value_iterator::assert_at_next() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth ); SIMDJSON_ASSUME( _depth > 0 ); } simdjson_inline void value_iterator::move_at_start() noexcept { _json_iter->_depth = _depth; _json_iter->token.set_position(_start_position); } simdjson_inline void value_iterator::move_at_container_start() noexcept { _json_iter->_depth = _depth; _json_iter->token.set_position(_start_position + 1); } simdjson_inline simdjson_result value_iterator::reset_array() noexcept { if(error()) { return error(); } move_at_container_start(); return started_array(); } simdjson_inline simdjson_result value_iterator::reset_object() noexcept { if(error()) { return error(); } move_at_container_start(); return started_object(); } inline void value_iterator::assert_at_child() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); SIMDJSON_ASSUME( _depth > 0 ); } inline void value_iterator::assert_at_root() const noexcept { assert_at_start(); SIMDJSON_ASSUME( _depth == 1 ); } inline void value_iterator::assert_at_non_root_start() const noexcept { assert_at_start(); SIMDJSON_ASSUME( _depth > 1 ); } inline void value_iterator::assert_is_valid() const noexcept { SIMDJSON_ASSUME( _json_iter != nullptr ); } simdjson_inline bool value_iterator::is_valid() const noexcept { return _json_iter != nullptr; } simdjson_inline simdjson_result value_iterator::type() const noexcept { switch (*peek_start()) { case '{': return json_type::object; case '[': return json_type::array; case '"': return json_type::string; case 'n': return json_type::null; case 't': case 'f': return json_type::boolean; case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return json_type::number; default: return TAPE_ERROR; } } simdjson_inline token_position value_iterator::start_position() const noexcept { return _start_position; } simdjson_inline token_position value_iterator::position() const noexcept { return _json_iter->position(); } simdjson_inline token_position value_iterator::end_position() const noexcept { return _json_iter->end_position(); } simdjson_inline token_position value_iterator::last_position() const noexcept { return _json_iter->last_position(); } simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { return _json_iter->report_error(error, message); } } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::value_iterator &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* end file simdjson/generic/ondemand/value_iterator-inl.h for westmere */ /* end file simdjson/generic/ondemand/amalgamated.h for westmere */ /* including simdjson/westmere/end.h: #include "simdjson/westmere/end.h" */ /* begin file simdjson/westmere/end.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE SIMDJSON_UNTARGET_REGION #endif /* undefining SIMDJSON_IMPLEMENTATION from "westmere" */ #undef SIMDJSON_IMPLEMENTATION /* end file simdjson/westmere/end.h */ #endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H /* end file simdjson/westmere/ondemand.h */ #else #error Unknown SIMDJSON_BUILTIN_IMPLEMENTATION #endif /* undefining SIMDJSON_CONDITIONAL_INCLUDE */ #undef SIMDJSON_CONDITIONAL_INCLUDE namespace simdjson { /** * @copydoc simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand */ namespace ondemand = SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand; } // namespace simdjson #endif // SIMDJSON_BUILTIN_ONDEMAND_H /* end file simdjson/builtin/ondemand.h */ namespace simdjson { /** * @copydoc simdjson::builtin::ondemand */ namespace ondemand = builtin::ondemand; } // namespace simdjson #endif // SIMDJSON_ONDEMAND_H /* end file simdjson/ondemand.h */ #endif // SIMDJSON_H /* end file simdjson.h */