// Copyright 2007-2010 Baptiste Lepilleur // Distributed under MIT license, or public domain if desired and // recognized in your jurisdiction. // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE #if !defined(JSON_IS_AMALGAMATION) # include "reader.h" # include "value.h" # include "json_tool.h" #endif // if !defined(JSON_IS_AMALGAMATION) #include #include #include #include #include #include #include #include #if _MSC_VER >= 1400 // VC++ 8.0 #pragma warning( disable : 4996 ) // disable warning about strdup being deprecated. #endif namespace CSJson { // Implementation of class Features // //////////////////////////////// Features::Features() : allowComments_( true ) , strictRoot_( false ) { } Features Features::all() { return Features(); } Features Features::strictMode() { Features features; features.allowComments_ = false; features.strictRoot_ = true; return features; } // Implementation of class Reader // //////////////////////////////// static inline bool in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4 ) { return c == c1 || c == c2 || c == c3 || c == c4; } static inline bool in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4, Reader::Char c5 ) { return c == c1 || c == c2 || c == c3 || c == c4 || c == c5; } static bool containsNewLine( Reader::Location begin, Reader::Location end ) { for ( ;begin < end; ++begin ) if ( *begin == '\n' || *begin == '\r' ) return true; return false; } // Class Reader // ////////////////////////////////////////////////////////////////// Reader::Reader() : features_( Features::all() ) { } Reader::Reader( const Features &features ) : features_( features ) { } bool Reader::parse( const std::string &document, Value &root, bool collectComments ) { document_ = document; const char *begin = document_.c_str(); const char *end = begin + document_.length(); return parse( begin, end, root, collectComments ); } bool Reader::parse( std::istream& sin, Value &root, bool collectComments ) { //std::istream_iterator begin(sin); //std::istream_iterator end; // Those would allow streamed input from a file, if parse() were a // template function. // Since std::string is reference-counted, this at least does not // create an extra copy. std::string doc; std::getline(sin, doc, (char)EOF); return parse( doc, root, collectComments ); } bool Reader::parse( const char *beginDoc, const char *endDoc, Value &root, bool collectComments ) { if ( !features_.allowComments_ ) { collectComments = false; } begin_ = beginDoc; end_ = endDoc; collectComments_ = collectComments; current_ = begin_; lastValueEnd_ = 0; lastValue_ = 0; commentsBefore_ = ""; errors_.clear(); while ( !nodes_.empty() ) nodes_.pop(); nodes_.push( &root ); bool successful = readValue(); Token token; skipCommentTokens( token ); if ( collectComments_ && !commentsBefore_.empty() ) root.setComment( commentsBefore_, commentAfter ); if ( features_.strictRoot_ ) { if ( !root.isArray() && !root.isObject() ) { // Set error location to start of doc, ideally should be first token found in doc token.type_ = tokenError; token.start_ = beginDoc; token.end_ = endDoc; addError( "A valid JSON document must be either an array or an object value.", token ); return false; } } return successful; } bool Reader::readValue() { Token token; skipCommentTokens( token ); bool successful = true; if ( collectComments_ && !commentsBefore_.empty() ) { currentValue().setComment( commentsBefore_, commentBefore ); commentsBefore_ = ""; } switch ( token.type_ ) { case tokenObjectBegin: successful = readObject( token ); break; case tokenArrayBegin: successful = readArray( token ); break; case tokenNumber: successful = decodeNumber( token ); break; case tokenString: successful = decodeString( token ); break; case tokenTrue: currentValue() = true; break; case tokenFalse: currentValue() = false; break; case tokenNull: currentValue() = Value(); break; default: return addError( "Syntax error: value, object or array expected.", token ); } if ( collectComments_ ) { lastValueEnd_ = current_; lastValue_ = ¤tValue(); } return successful; } void Reader::skipCommentTokens( Token &token ) { if ( features_.allowComments_ ) { do { readToken( token ); } while ( token.type_ == tokenComment ); } else { readToken( token ); } } bool Reader::expectToken( TokenType type, Token &token, const char *message ) { readToken( token ); if ( token.type_ != type ) return addError( message, token ); return true; } bool Reader::readToken( Token &token ) { skipSpaces(); token.start_ = current_; Char c = getNextChar(); bool ok = true; switch ( c ) { case '{': token.type_ = tokenObjectBegin; break; case '}': token.type_ = tokenObjectEnd; break; case '[': token.type_ = tokenArrayBegin; break; case ']': token.type_ = tokenArrayEnd; break; case '"': token.type_ = tokenString; ok = readString(); break; case '/': token.type_ = tokenComment; ok = readComment(); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '-': token.type_ = tokenNumber; readNumber(); break; case 't': token.type_ = tokenTrue; ok = match( "rue", 3 ); break; case 'f': token.type_ = tokenFalse; ok = match( "alse", 4 ); break; case 'n': token.type_ = tokenNull; ok = match( "ull", 3 ); break; case ',': token.type_ = tokenArraySeparator; break; case ':': token.type_ = tokenMemberSeparator; break; case 0: token.type_ = tokenEndOfStream; break; default: ok = false; break; } if ( !ok ) token.type_ = tokenError; token.end_ = current_; return true; } void Reader::skipSpaces() { while ( current_ != end_ ) { Char c = *current_; if ( c == ' ' || c == '\t' || c == '\r' || c == '\n' ) ++current_; else break; } } bool Reader::match( Location pattern, int patternLength ) { if ( end_ - current_ < patternLength ) return false; int index = patternLength; while ( index-- ) if ( current_[index] != pattern[index] ) return false; current_ += patternLength; return true; } bool Reader::readComment() { Location commentBegin = current_ - 1; Char c = getNextChar(); bool successful = false; if ( c == '*' ) successful = readCStyleComment(); else if ( c == '/' ) successful = readCppStyleComment(); if ( !successful ) return false; if ( collectComments_ ) { CommentPlacement placement = commentBefore; if ( lastValueEnd_ && !containsNewLine( lastValueEnd_, commentBegin ) ) { if ( c != '*' || !containsNewLine( commentBegin, current_ ) ) placement = commentAfterOnSameLine; } addComment( commentBegin, current_, placement ); } return true; } void Reader::addComment( Location begin, Location end, CommentPlacement placement ) { assert( collectComments_ ); if ( placement == commentAfterOnSameLine ) { assert( lastValue_ != 0 ); lastValue_->setComment( std::string( begin, end ), placement ); } else { if ( !commentsBefore_.empty() ) commentsBefore_ += "\n"; commentsBefore_ += std::string( begin, end ); } } bool Reader::readCStyleComment() { while ( current_ != end_ ) { Char c = getNextChar(); if ( c == '*' && *current_ == '/' ) break; } return getNextChar() == '/'; } bool Reader::readCppStyleComment() { while ( current_ != end_ ) { Char c = getNextChar(); if ( c == '\r' || c == '\n' ) break; } return true; } void Reader::readNumber() { while ( current_ != end_ ) { if ( !(*current_ >= '0' && *current_ <= '9') && !in( *current_, '.', 'e', 'E', '+', '-' ) ) break; ++current_; } } bool Reader::readString() { Char c = 0; while ( current_ != end_ ) { c = getNextChar(); if ( c == '\\' ) getNextChar(); else if ( c == '"' ) break; } return c == '"'; } bool Reader::readObject( Token &/*tokenStart*/ ) { Token tokenName; std::string name; currentValue() = Value( objectValue ); while ( readToken( tokenName ) ) { bool initialTokenOk = true; while ( tokenName.type_ == tokenComment && initialTokenOk ) initialTokenOk = readToken( tokenName ); if ( !initialTokenOk ) break; if ( tokenName.type_ == tokenObjectEnd && name.empty() ) // empty object return true; if ( tokenName.type_ != tokenString ) break; name = ""; if ( !decodeString( tokenName, name ) ) return recoverFromError( tokenObjectEnd ); Token colon; if ( !readToken( colon ) || colon.type_ != tokenMemberSeparator ) { return addErrorAndRecover( "Missing ':' after object member name", colon, tokenObjectEnd ); } Value &value = currentValue()[ name ]; nodes_.push( &value ); bool ok = readValue(); nodes_.pop(); if ( !ok ) // error already set return recoverFromError( tokenObjectEnd ); Token comma; if ( !readToken( comma ) || ( comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator && comma.type_ != tokenComment ) ) { return addErrorAndRecover( "Missing ',' or '}' in object declaration", comma, tokenObjectEnd ); } bool finalizeTokenOk = true; while ( comma.type_ == tokenComment && finalizeTokenOk ) finalizeTokenOk = readToken( comma ); if ( comma.type_ == tokenObjectEnd ) return true; } return addErrorAndRecover( "Missing '}' or object member name", tokenName, tokenObjectEnd ); } bool Reader::readArray( Token &/*tokenStart*/ ) { currentValue() = Value( arrayValue ); skipSpaces(); if ( *current_ == ']' ) // empty array { Token endArray; readToken( endArray ); return true; } int index = 0; for (;;) { Value &value = currentValue()[ index++ ]; nodes_.push( &value ); bool ok = readValue(); nodes_.pop(); if ( !ok ) // error already set return recoverFromError( tokenArrayEnd ); Token token; // Accept Comment after last item in the array. ok = readToken( token ); while ( token.type_ == tokenComment && ok ) { ok = readToken( token ); } bool badTokenType = ( token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd ); if ( !ok || badTokenType ) { return addErrorAndRecover( "Missing ',' or ']' in array declaration", token, tokenArrayEnd ); } if ( token.type_ == tokenArrayEnd ) break; } return true; } bool Reader::decodeNumber( Token &token ) { bool isDouble = false; for ( Location inspect = token.start_; inspect != token.end_; ++inspect ) { isDouble = isDouble || in( *inspect, '.', 'e', 'E', '+' ) || ( *inspect == '-' && inspect != token.start_ ); } if ( isDouble ) return decodeDouble( token ); // Attempts to parse the number as an integer. If the number is // larger than the maximum supported value of an integer then // we decode the number as a double. Location current = token.start_; bool isNegative = *current == '-'; if ( isNegative ) ++current; Value::LargestUInt maxIntegerValue = isNegative ? Value::LargestUInt(-Value::minLargestInt) : Value::maxLargestUInt; Value::LargestUInt threshold = maxIntegerValue / 10; Value::UInt lastDigitThreshold = Value::UInt( maxIntegerValue % 10 ); assert( lastDigitThreshold >=0 && lastDigitThreshold <= 9 ); Value::LargestUInt value = 0; while ( current < token.end_ ) { Char c = *current++; if ( c < '0' || c > '9' ) return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token ); Value::UInt digit(c - '0'); if ( value >= threshold ) { // If the current digit is not the last one, or if it is // greater than the last digit of the maximum integer value, // the parse the number as a double. if ( current != token.end_ || digit > lastDigitThreshold ) { return decodeDouble( token ); } } value = value * 10 + digit; } if ( isNegative ) currentValue() = -Value::LargestInt( value ); else if ( value <= Value::LargestUInt(Value::maxInt) ) currentValue() = Value::LargestInt( value ); else currentValue() = value; return true; } bool Reader::decodeDouble( Token &token ) { double value = 0; const int bufferSize = 32; int count; int length = int(token.end_ - token.start_); if ( length <= bufferSize ) { Char buffer[bufferSize+1]; memcpy( buffer, token.start_, length ); buffer[length] = 0; count = sscanf( buffer, "%lf", &value ); } else { std::string buffer( token.start_, token.end_ ); count = sscanf( buffer.c_str(), "%lf", &value ); } if ( count != 1 ) return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token ); currentValue() = value; return true; } bool Reader::decodeString( Token &token ) { std::string decoded; if ( !decodeString( token, decoded ) ) return false; currentValue() = decoded; return true; } bool Reader::decodeString( Token &token, std::string &decoded ) { decoded.reserve( token.end_ - token.start_ - 2 ); Location current = token.start_ + 1; // skip '"' Location end = token.end_ - 1; // do not include '"' while ( current != end ) { Char c = *current++; if ( c == '"' ) break; else if ( c == '\\' ) { if ( current == end ) return addError( "Empty escape sequence in string", token, current ); Char escape = *current++; switch ( escape ) { case '"': decoded += '"'; break; case '/': decoded += '/'; break; case '\\': decoded += '\\'; break; case 'b': decoded += '\b'; break; case 'f': decoded += '\f'; break; case 'n': decoded += '\n'; break; case 'r': decoded += '\r'; break; case 't': decoded += '\t'; break; case 'u': { unsigned int unicode; if ( !decodeUnicodeCodePoint( token, current, end, unicode ) ) return false; decoded += codePointToUTF8(unicode); } break; default: return addError( "Bad escape sequence in string", token, current ); } } else { decoded += c; } } return true; } bool Reader::decodeUnicodeCodePoint( Token &token, Location ¤t, Location end, unsigned int &unicode ) { if ( !decodeUnicodeEscapeSequence( token, current, end, unicode ) ) return false; if (unicode >= 0xD800 && unicode <= 0xDBFF) { // surrogate pairs if (end - current < 6) return addError( "additional six characters expected to parse unicode surrogate pair.", token, current ); unsigned int surrogatePair; if (*(current++) == '\\' && *(current++)== 'u') { if (decodeUnicodeEscapeSequence( token, current, end, surrogatePair )) { unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF); } else return false; } else return addError( "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current ); } return true; } bool Reader::decodeUnicodeEscapeSequence( Token &token, Location ¤t, Location end, unsigned int &unicode ) { if ( end - current < 4 ) return addError( "Bad unicode escape sequence in string: four digits expected.", token, current ); unicode = 0; for ( int index =0; index < 4; ++index ) { Char c = *current++; unicode *= 16; if ( c >= '0' && c <= '9' ) unicode += c - '0'; else if ( c >= 'a' && c <= 'f' ) unicode += c - 'a' + 10; else if ( c >= 'A' && c <= 'F' ) unicode += c - 'A' + 10; else return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current ); } return true; } bool Reader::addError( const std::string &message, Token &token, Location extra ) { ErrorInfo info; info.token_ = token; info.message_ = message; info.extra_ = extra; errors_.push_back( info ); return false; } bool Reader::recoverFromError( TokenType skipUntilToken ) { int errorCount = int(errors_.size()); Token skip; for (;;) { if ( !readToken(skip) ) errors_.resize( errorCount ); // discard errors caused by recovery if ( skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream ) break; } errors_.resize( errorCount ); return false; } bool Reader::addErrorAndRecover( const std::string &message, Token &token, TokenType skipUntilToken ) { addError( message, token ); return recoverFromError( skipUntilToken ); } Value & Reader::currentValue() { return *(nodes_.top()); } Reader::Char Reader::getNextChar() { if ( current_ == end_ ) return 0; return *current_++; } void Reader::getLocationLineAndColumn( Location location, int &line, int &column ) const { Location current = begin_; Location lastLineStart = current; line = 0; while ( current < location && current != end_ ) { Char c = *current++; if ( c == '\r' ) { if ( *current == '\n' ) ++current; lastLineStart = current; ++line; } else if ( c == '\n' ) { lastLineStart = current; ++line; } } // column & line start at 1 column = int(location - lastLineStart) + 1; ++line; } std::string Reader::getLocationLineAndColumn( Location location ) const { int line, column; getLocationLineAndColumn( location, line, column ); char buffer[18+16+16+1]; sprintf( buffer, "Line %d, Column %d", line, column ); return buffer; } // Deprecated. Preserved for backward compatibility std::string Reader::getFormatedErrorMessages() const { return getFormattedErrorMessages(); } std::string Reader::getFormattedErrorMessages() const { std::string formattedMessage; for ( Errors::const_iterator itError = errors_.begin(); itError != errors_.end(); ++itError ) { const ErrorInfo &error = *itError; formattedMessage += "* " + getLocationLineAndColumn( error.token_.start_ ) + "\n"; formattedMessage += " " + error.message_ + "\n"; if ( error.extra_ ) formattedMessage += "See " + getLocationLineAndColumn( error.extra_ ) + " for detail.\n"; } return formattedMessage; } std::istream& operator>>( std::istream &sin, Value &root ) { CSJson::Reader reader; bool ok = reader.parse(sin, root, true); //JSON_ASSERT( ok ); if (!ok) throw std::runtime_error(reader.getFormattedErrorMessages()); return sin; } } // namespace CSJson