JsonCpp project page Classes Namespace JsonCpp home page

json_reader.cpp
Go to the documentation of this file.
1// Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2// Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3// Distributed under MIT license, or public domain if desired and
4// recognized in your jurisdiction.
5// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6
7#if !defined(JSON_IS_AMALGAMATION)
8#include "json_tool.h"
9#include <json/assertions.h>
10#include <json/reader.h>
11#include <json/value.h>
12#endif // if !defined(JSON_IS_AMALGAMATION)
13#include <algorithm>
14#include <cassert>
15#include <cmath>
16#include <cstring>
17#include <iostream>
18#include <istream>
19#include <limits>
20#include <memory>
21#include <set>
22#include <sstream>
23#include <utility>
24
25#include <cstdio>
26#if __cplusplus >= 201103L
27
28#if !defined(sscanf)
29#define sscanf std::sscanf
30#endif
31
32#endif //__cplusplus
33
34#if defined(_MSC_VER)
35#if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
36#define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
37#endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
38#endif //_MSC_VER
39
40#if defined(_MSC_VER)
41// Disable warning about strdup being deprecated.
42#pragma warning(disable : 4996)
43#endif
44
45// Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
46// time to change the stack limit
47#if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
48#define JSONCPP_DEPRECATED_STACK_LIMIT 1000
49#endif
50
51static size_t const stackLimit_g =
52 JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
53
54namespace Json {
55
56#if __cplusplus >= 201103L || (defined(_CPPLIB_VER) && _CPPLIB_VER >= 520)
57using CharReaderPtr = std::unique_ptr<CharReader>;
58#else
59using CharReaderPtr = std::auto_ptr<CharReader>;
60#endif
61
62// Implementation of class Features
63// ////////////////////////////////
64
65Features::Features() = default;
66
67Features Features::all() { return {}; }
68
70 Features features;
71 features.allowComments_ = false;
72 features.strictRoot_ = true;
73 features.allowDroppedNullPlaceholders_ = false;
74 features.allowNumericKeys_ = false;
75 return features;
76}
77
78// Implementation of class Reader
79// ////////////////////////////////
80
81bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
82 return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
83}
84
85// Class Reader
86// //////////////////////////////////////////////////////////////////
87
88Reader::Reader() : features_(Features::all()) {}
89
90Reader::Reader(const Features& features) : features_(features) {}
91
92bool Reader::parse(const std::string& document, Value& root,
93 bool collectComments) {
94 document_.assign(document.begin(), document.end());
95 const char* begin = document_.c_str();
96 const char* end = begin + document_.length();
97 return parse(begin, end, root, collectComments);
98}
99
100bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
101 // std::istream_iterator<char> begin(is);
102 // std::istream_iterator<char> end;
103 // Those would allow streamed input from a file, if parse() were a
104 // template function.
105
106 // Since String is reference-counted, this at least does not
107 // create an extra copy.
108 String doc(std::istreambuf_iterator<char>(is), {});
109 return parse(doc.data(), doc.data() + doc.size(), root, collectComments);
110}
111
112bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,
113 bool collectComments) {
114 if (!features_.allowComments_) {
115 collectComments = false;
116 }
117
118 begin_ = beginDoc;
119 end_ = endDoc;
120 collectComments_ = collectComments;
121 current_ = begin_;
122 lastValueEnd_ = nullptr;
123 lastValue_ = nullptr;
124 commentsBefore_.clear();
125 errors_.clear();
126 while (!nodes_.empty())
127 nodes_.pop();
128 nodes_.push(&root);
129
130 bool successful = readValue();
131 Token token;
132 skipCommentTokens(token);
133 if (collectComments_ && !commentsBefore_.empty())
134 root.setComment(commentsBefore_, commentAfter);
135 if (features_.strictRoot_) {
136 if (!root.isArray() && !root.isObject()) {
137 // Set error location to start of doc, ideally should be first token found
138 // in doc
139 token.type_ = tokenError;
140 token.start_ = beginDoc;
141 token.end_ = endDoc;
142 addError(
143 "A valid JSON document must be either an array or an object value.",
144 token);
145 return false;
146 }
147 }
148 return successful;
149}
150
151bool Reader::readValue() {
152 // readValue() may call itself only if it calls readObject() or ReadArray().
153 // These methods execute nodes_.push() just before and nodes_.pop)() just
154 // after calling readValue(). parse() executes one nodes_.push(), so > instead
155 // of >=.
156 if (nodes_.size() > stackLimit_g)
157 throwRuntimeError("Exceeded stackLimit in readValue().");
158
159 Token token;
160 skipCommentTokens(token);
161 bool successful = true;
162
163 if (collectComments_ && !commentsBefore_.empty()) {
164 currentValue().setComment(commentsBefore_, commentBefore);
165 commentsBefore_.clear();
166 }
167
168 switch (token.type_) {
169 case tokenObjectBegin:
170 successful = readObject(token);
171 currentValue().setOffsetLimit(current_ - begin_);
172 break;
173 case tokenArrayBegin:
174 successful = readArray(token);
175 currentValue().setOffsetLimit(current_ - begin_);
176 break;
177 case tokenNumber:
178 successful = decodeNumber(token);
179 break;
180 case tokenString:
181 successful = decodeString(token);
182 break;
183 case tokenTrue: {
184 Value v(true);
185 currentValue().swapPayload(v);
186 currentValue().setOffsetStart(token.start_ - begin_);
187 currentValue().setOffsetLimit(token.end_ - begin_);
188 } break;
189 case tokenFalse: {
190 Value v(false);
191 currentValue().swapPayload(v);
192 currentValue().setOffsetStart(token.start_ - begin_);
193 currentValue().setOffsetLimit(token.end_ - begin_);
194 } break;
195 case tokenNull: {
196 Value v;
197 currentValue().swapPayload(v);
198 currentValue().setOffsetStart(token.start_ - begin_);
199 currentValue().setOffsetLimit(token.end_ - begin_);
200 } break;
201 case tokenArraySeparator:
202 case tokenObjectEnd:
203 case tokenArrayEnd:
204 if (features_.allowDroppedNullPlaceholders_) {
205 // "Un-read" the current token and mark the current value as a null
206 // token.
207 current_--;
208 Value v;
209 currentValue().swapPayload(v);
210 currentValue().setOffsetStart(current_ - begin_ - 1);
211 currentValue().setOffsetLimit(current_ - begin_);
212 break;
213 } // Else, fall through...
214 default:
215 currentValue().setOffsetStart(token.start_ - begin_);
216 currentValue().setOffsetLimit(token.end_ - begin_);
217 return addError("Syntax error: value, object or array expected.", token);
218 }
219
220 if (collectComments_) {
221 lastValueEnd_ = current_;
222 lastValue_ = &currentValue();
223 }
224
225 return successful;
226}
227
228void Reader::skipCommentTokens(Token& token) {
229 if (features_.allowComments_) {
230 do {
231 readToken(token);
232 } while (token.type_ == tokenComment);
233 } else {
234 readToken(token);
235 }
236}
237
238bool Reader::readToken(Token& token) {
239 skipSpaces();
240 token.start_ = current_;
241 Char c = getNextChar();
242 bool ok = true;
243 switch (c) {
244 case '{':
245 token.type_ = tokenObjectBegin;
246 break;
247 case '}':
248 token.type_ = tokenObjectEnd;
249 break;
250 case '[':
251 token.type_ = tokenArrayBegin;
252 break;
253 case ']':
254 token.type_ = tokenArrayEnd;
255 break;
256 case '"':
257 token.type_ = tokenString;
258 ok = readString();
259 break;
260 case '/':
261 token.type_ = tokenComment;
262 ok = readComment();
263 break;
264 case '0':
265 case '1':
266 case '2':
267 case '3':
268 case '4':
269 case '5':
270 case '6':
271 case '7':
272 case '8':
273 case '9':
274 case '-':
275 token.type_ = tokenNumber;
276 readNumber();
277 break;
278 case 't':
279 token.type_ = tokenTrue;
280 ok = match("rue", 3);
281 break;
282 case 'f':
283 token.type_ = tokenFalse;
284 ok = match("alse", 4);
285 break;
286 case 'n':
287 token.type_ = tokenNull;
288 ok = match("ull", 3);
289 break;
290 case ',':
291 token.type_ = tokenArraySeparator;
292 break;
293 case ':':
294 token.type_ = tokenMemberSeparator;
295 break;
296 case 0:
297 token.type_ = tokenEndOfStream;
298 break;
299 default:
300 ok = false;
301 break;
302 }
303 if (!ok)
304 token.type_ = tokenError;
305 token.end_ = current_;
306 return ok;
307}
308
309void Reader::skipSpaces() {
310 while (current_ != end_) {
311 Char c = *current_;
312 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
313 ++current_;
314 else
315 break;
316 }
317}
318
319bool Reader::match(const Char* pattern, int patternLength) {
320 if (end_ - current_ < patternLength)
321 return false;
322 int index = patternLength;
323 while (index--)
324 if (current_[index] != pattern[index])
325 return false;
326 current_ += patternLength;
327 return true;
328}
329
330bool Reader::readComment() {
331 Location commentBegin = current_ - 1;
332 Char c = getNextChar();
333 bool successful = false;
334 if (c == '*')
335 successful = readCStyleComment();
336 else if (c == '/')
337 successful = readCppStyleComment();
338 if (!successful)
339 return false;
340
341 if (collectComments_) {
343 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
344 if (c != '*' || !containsNewLine(commentBegin, current_))
345 placement = commentAfterOnSameLine;
346 }
347
348 addComment(commentBegin, current_, placement);
349 }
350 return true;
351}
352
353String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
354 String normalized;
355 normalized.reserve(static_cast<size_t>(end - begin));
356 Reader::Location current = begin;
357 while (current != end) {
358 char c = *current++;
359 if (c == '\r') {
360 if (current != end && *current == '\n')
361 // convert dos EOL
362 ++current;
363 // convert Mac EOL
364 normalized += '\n';
365 } else {
366 normalized += c;
367 }
368 }
369 return normalized;
370}
371
372void Reader::addComment(Location begin, Location end,
373 CommentPlacement placement) {
374 assert(collectComments_);
375 const String& normalized = normalizeEOL(begin, end);
376 if (placement == commentAfterOnSameLine) {
377 assert(lastValue_ != nullptr);
378 lastValue_->setComment(normalized, placement);
379 } else {
380 commentsBefore_ += normalized;
381 }
382}
383
384bool Reader::readCStyleComment() {
385 while ((current_ + 1) < end_) {
386 Char c = getNextChar();
387 if (c == '*' && *current_ == '/')
388 break;
389 }
390 return getNextChar() == '/';
391}
392
393bool Reader::readCppStyleComment() {
394 while (current_ != end_) {
395 Char c = getNextChar();
396 if (c == '\n')
397 break;
398 if (c == '\r') {
399 // Consume DOS EOL. It will be normalized in addComment.
400 if (current_ != end_ && *current_ == '\n')
401 getNextChar();
402 // Break on Moc OS 9 EOL.
403 break;
404 }
405 }
406 return true;
407}
408
409void Reader::readNumber() {
410 Location p = current_;
411 char c = '0'; // stopgap for already consumed character
412 // integral part
413 while (c >= '0' && c <= '9')
414 c = (current_ = p) < end_ ? *p++ : '\0';
415 // fractional part
416 if (c == '.') {
417 c = (current_ = p) < end_ ? *p++ : '\0';
418 while (c >= '0' && c <= '9')
419 c = (current_ = p) < end_ ? *p++ : '\0';
420 }
421 // exponential part
422 if (c == 'e' || c == 'E') {
423 c = (current_ = p) < end_ ? *p++ : '\0';
424 if (c == '+' || c == '-')
425 c = (current_ = p) < end_ ? *p++ : '\0';
426 while (c >= '0' && c <= '9')
427 c = (current_ = p) < end_ ? *p++ : '\0';
428 }
429}
430
431bool Reader::readString() {
432 Char c = '\0';
433 while (current_ != end_) {
434 c = getNextChar();
435 if (c == '\\')
436 getNextChar();
437 else if (c == '"')
438 break;
439 }
440 return c == '"';
441}
442
443bool Reader::readObject(Token& token) {
444 Token tokenName;
445 String name;
446 Value init(objectValue);
447 currentValue().swapPayload(init);
448 currentValue().setOffsetStart(token.start_ - begin_);
449 while (readToken(tokenName)) {
450 bool initialTokenOk = true;
451 while (tokenName.type_ == tokenComment && initialTokenOk)
452 initialTokenOk = readToken(tokenName);
453 if (!initialTokenOk)
454 break;
455 if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
456 return true;
457 name.clear();
458 if (tokenName.type_ == tokenString) {
459 if (!decodeString(tokenName, name))
460 return recoverFromError(tokenObjectEnd);
461 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
462 Value numberName;
463 if (!decodeNumber(tokenName, numberName))
464 return recoverFromError(tokenObjectEnd);
465 name = numberName.asString();
466 } else {
467 break;
468 }
469
470 Token colon;
471 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
472 return addErrorAndRecover("Missing ':' after object member name", colon,
473 tokenObjectEnd);
474 }
475 Value& value = currentValue()[name];
476 nodes_.push(&value);
477 bool ok = readValue();
478 nodes_.pop();
479 if (!ok) // error already set
480 return recoverFromError(tokenObjectEnd);
481
482 Token comma;
483 if (!readToken(comma) ||
484 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
485 comma.type_ != tokenComment)) {
486 return addErrorAndRecover("Missing ',' or '}' in object declaration",
487 comma, tokenObjectEnd);
488 }
489 bool finalizeTokenOk = true;
490 while (comma.type_ == tokenComment && finalizeTokenOk)
491 finalizeTokenOk = readToken(comma);
492 if (comma.type_ == tokenObjectEnd)
493 return true;
494 }
495 return addErrorAndRecover("Missing '}' or object member name", tokenName,
496 tokenObjectEnd);
497}
498
499bool Reader::readArray(Token& token) {
500 Value init(arrayValue);
501 currentValue().swapPayload(init);
502 currentValue().setOffsetStart(token.start_ - begin_);
503 skipSpaces();
504 if (current_ != end_ && *current_ == ']') // empty array
505 {
506 Token endArray;
507 readToken(endArray);
508 return true;
509 }
510 int index = 0;
511 for (;;) {
512 Value& value = currentValue()[index++];
513 nodes_.push(&value);
514 bool ok = readValue();
515 nodes_.pop();
516 if (!ok) // error already set
517 return recoverFromError(tokenArrayEnd);
518
519 Token currentToken;
520 // Accept Comment after last item in the array.
521 ok = readToken(currentToken);
522 while (currentToken.type_ == tokenComment && ok) {
523 ok = readToken(currentToken);
524 }
525 bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
526 currentToken.type_ != tokenArrayEnd);
527 if (!ok || badTokenType) {
528 return addErrorAndRecover("Missing ',' or ']' in array declaration",
529 currentToken, tokenArrayEnd);
530 }
531 if (currentToken.type_ == tokenArrayEnd)
532 break;
533 }
534 return true;
535}
536
537bool Reader::decodeNumber(Token& token) {
538 Value decoded;
539 if (!decodeNumber(token, decoded))
540 return false;
541 currentValue().swapPayload(decoded);
542 currentValue().setOffsetStart(token.start_ - begin_);
543 currentValue().setOffsetLimit(token.end_ - begin_);
544 return true;
545}
546
547bool Reader::decodeNumber(Token& token, Value& decoded) {
548 // Attempts to parse the number as an integer. If the number is
549 // larger than the maximum supported value of an integer then
550 // we decode the number as a double.
551 Location current = token.start_;
552 bool isNegative = *current == '-';
553 if (isNegative)
554 ++current;
555 // TODO: Help the compiler do the div and mod at compile time or get rid of
556 // them.
557 Value::LargestUInt maxIntegerValue =
560 Value::LargestUInt threshold = maxIntegerValue / 10;
561 Value::LargestUInt value = 0;
562 while (current < token.end_) {
563 Char c = *current++;
564 if (c < '0' || c > '9')
565 return decodeDouble(token, decoded);
566 auto digit(static_cast<Value::UInt>(c - '0'));
567 if (value >= threshold) {
568 // We've hit or exceeded the max value divided by 10 (rounded down). If
569 // a) we've only just touched the limit, b) this is the last digit, and
570 // c) it's small enough to fit in that rounding delta, we're okay.
571 // Otherwise treat this number as a double to avoid overflow.
572 if (value > threshold || current != token.end_ ||
573 digit > maxIntegerValue % 10) {
574 return decodeDouble(token, decoded);
575 }
576 }
577 value = value * 10 + digit;
578 }
579 if (isNegative && value == maxIntegerValue)
580 decoded = Value::minLargestInt;
581 else if (isNegative)
582 decoded = -Value::LargestInt(value);
583 else if (value <= Value::LargestUInt(Value::maxInt))
584 decoded = Value::LargestInt(value);
585 else
586 decoded = value;
587 return true;
588}
589
590bool Reader::decodeDouble(Token& token) {
591 Value decoded;
592 if (!decodeDouble(token, decoded))
593 return false;
594 currentValue().swapPayload(decoded);
595 currentValue().setOffsetStart(token.start_ - begin_);
596 currentValue().setOffsetLimit(token.end_ - begin_);
597 return true;
598}
599
600bool Reader::decodeDouble(Token& token, Value& decoded) {
601 double value = 0;
602 String buffer(token.start_, token.end_);
603 IStringStream is(buffer);
604 if (!(is >> value)) {
605 if (value == std::numeric_limits<double>::max())
606 value = std::numeric_limits<double>::infinity();
607 else if (value == std::numeric_limits<double>::lowest())
608 value = -std::numeric_limits<double>::infinity();
609 else if (!std::isinf(value))
610 return addError(
611 "'" + String(token.start_, token.end_) + "' is not a number.", token);
612 }
613 decoded = value;
614 return true;
615}
616
617bool Reader::decodeString(Token& token) {
618 String decoded_string;
619 if (!decodeString(token, decoded_string))
620 return false;
621 Value decoded(decoded_string);
622 currentValue().swapPayload(decoded);
623 currentValue().setOffsetStart(token.start_ - begin_);
624 currentValue().setOffsetLimit(token.end_ - begin_);
625 return true;
626}
627
628bool Reader::decodeString(Token& token, String& decoded) {
629 decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
630 Location current = token.start_ + 1; // skip '"'
631 Location end = token.end_ - 1; // do not include '"'
632 while (current != end) {
633 Char c = *current++;
634 if (c == '"')
635 break;
636 if (c == '\\') {
637 if (current == end)
638 return addError("Empty escape sequence in string", token, current);
639 Char escape = *current++;
640 switch (escape) {
641 case '"':
642 decoded += '"';
643 break;
644 case '/':
645 decoded += '/';
646 break;
647 case '\\':
648 decoded += '\\';
649 break;
650 case 'b':
651 decoded += '\b';
652 break;
653 case 'f':
654 decoded += '\f';
655 break;
656 case 'n':
657 decoded += '\n';
658 break;
659 case 'r':
660 decoded += '\r';
661 break;
662 case 't':
663 decoded += '\t';
664 break;
665 case 'u': {
666 unsigned int unicode;
667 if (!decodeUnicodeCodePoint(token, current, end, unicode))
668 return false;
669 decoded += codePointToUTF8(unicode);
670 } break;
671 default:
672 return addError("Bad escape sequence in string", token, current);
673 }
674 } else {
675 decoded += c;
676 }
677 }
678 return true;
679}
680
681bool Reader::decodeUnicodeCodePoint(Token& token, Location& current,
682 Location end, unsigned int& unicode) {
683
684 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
685 return false;
686 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
687 // surrogate pairs
688 if (end - current < 6)
689 return addError(
690 "additional six characters expected to parse unicode surrogate pair.",
691 token, current);
692 if (*(current++) == '\\' && *(current++) == 'u') {
693 unsigned int surrogatePair;
694 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
695 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
696 } else
697 return false;
698 } else
699 return addError("expecting another \\u token to begin the second half of "
700 "a unicode surrogate pair",
701 token, current);
702 }
703 return true;
704}
705
706bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current,
707 Location end,
708 unsigned int& ret_unicode) {
709 if (end - current < 4)
710 return addError(
711 "Bad unicode escape sequence in string: four digits expected.", token,
712 current);
713 int unicode = 0;
714 for (int index = 0; index < 4; ++index) {
715 Char c = *current++;
716 unicode *= 16;
717 if (c >= '0' && c <= '9')
718 unicode += c - '0';
719 else if (c >= 'a' && c <= 'f')
720 unicode += c - 'a' + 10;
721 else if (c >= 'A' && c <= 'F')
722 unicode += c - 'A' + 10;
723 else
724 return addError(
725 "Bad unicode escape sequence in string: hexadecimal digit expected.",
726 token, current);
727 }
728 ret_unicode = static_cast<unsigned int>(unicode);
729 return true;
730}
731
732bool Reader::addError(const String& message, Token& token, Location extra) {
733 ErrorInfo info;
734 info.token_ = token;
735 info.message_ = message;
736 info.extra_ = extra;
737 errors_.push_back(info);
738 return false;
739}
740
741bool Reader::recoverFromError(TokenType skipUntilToken) {
742 size_t const errorCount = errors_.size();
743 Token skip;
744 for (;;) {
745 if (!readToken(skip))
746 errors_.resize(errorCount); // discard errors caused by recovery
747 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
748 break;
749 }
750 errors_.resize(errorCount);
751 return false;
752}
753
754bool Reader::addErrorAndRecover(const String& message, Token& token,
755 TokenType skipUntilToken) {
756 addError(message, token);
757 return recoverFromError(skipUntilToken);
758}
759
760Value& Reader::currentValue() { return *(nodes_.top()); }
761
762Reader::Char Reader::getNextChar() {
763 if (current_ == end_)
764 return 0;
765 return *current_++;
766}
767
768void Reader::getLocationLineAndColumn(Location location, int& line,
769 int& column) const {
770 Location current = begin_;
771 Location lastLineStart = current;
772 line = 0;
773 while (current < location && current != end_) {
774 Char c = *current++;
775 if (c == '\r') {
776 if (*current == '\n')
777 ++current;
778 lastLineStart = current;
779 ++line;
780 } else if (c == '\n') {
781 lastLineStart = current;
782 ++line;
783 }
784 }
785 // column & line start at 1
786 column = int(location - lastLineStart) + 1;
787 ++line;
788}
789
790String Reader::getLocationLineAndColumn(Location location) const {
791 int line, column;
792 getLocationLineAndColumn(location, line, column);
793 char buffer[18 + 16 + 16 + 1];
794 jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
795 return buffer;
796}
797
798// Deprecated. Preserved for backward compatibility
799String Reader::getFormatedErrorMessages() const {
801}
802
804 String formattedMessage;
805 for (const auto& error : errors_) {
806 formattedMessage +=
807 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
808 formattedMessage += " " + error.message_ + "\n";
809 if (error.extra_)
810 formattedMessage +=
811 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
812 }
813 return formattedMessage;
814}
815
816std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
817 std::vector<Reader::StructuredError> allErrors;
818 for (const auto& error : errors_) {
819 Reader::StructuredError structured;
820 structured.offset_start = error.token_.start_ - begin_;
821 structured.offset_limit = error.token_.end_ - begin_;
822 structured.message = error.message_;
823 allErrors.push_back(structured);
824 }
825 return allErrors;
826}
827
828bool Reader::pushError(const Value& value, const String& message) {
829 ptrdiff_t const length = end_ - begin_;
830 if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
831 return false;
832 Token token;
833 token.type_ = tokenError;
834 token.start_ = begin_ + value.getOffsetStart();
835 token.end_ = begin_ + value.getOffsetLimit();
836 ErrorInfo info;
837 info.token_ = token;
838 info.message_ = message;
839 info.extra_ = nullptr;
840 errors_.push_back(info);
841 return true;
842}
843
844bool Reader::pushError(const Value& value, const String& message,
845 const Value& extra) {
846 ptrdiff_t const length = end_ - begin_;
847 if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
848 extra.getOffsetLimit() > length)
849 return false;
850 Token token;
851 token.type_ = tokenError;
852 token.start_ = begin_ + value.getOffsetStart();
853 token.end_ = begin_ + value.getOffsetLimit();
854 ErrorInfo info;
855 info.token_ = token;
856 info.message_ = message;
857 info.extra_ = begin_ + extra.getOffsetStart();
858 errors_.push_back(info);
859 return true;
860}
861
862bool Reader::good() const { return errors_.empty(); }
863
864// Originally copied from the Features class (now deprecated), used internally
865// for features implementation.
866class OurFeatures {
867public:
868 static OurFeatures all();
869 bool allowComments_;
870 bool allowTrailingCommas_;
871 bool strictRoot_;
872 bool allowDroppedNullPlaceholders_;
873 bool allowNumericKeys_;
874 bool allowSingleQuotes_;
875 bool failIfExtra_;
876 bool rejectDupKeys_;
877 bool allowSpecialFloats_;
878 bool skipBom_;
879 size_t stackLimit_;
880}; // OurFeatures
881
882OurFeatures OurFeatures::all() { return {}; }
883
884// Implementation of class Reader
885// ////////////////////////////////
886
887// Originally copied from the Reader class (now deprecated), used internally
888// for implementing JSON reading.
889class OurReader {
890public:
891 using Char = char;
892 using Location = const Char*;
893 struct StructuredError {
894 ptrdiff_t offset_start;
895 ptrdiff_t offset_limit;
896 String message;
897 };
898
899 explicit OurReader(OurFeatures const& features);
900 bool parse(const char* beginDoc, const char* endDoc, Value& root,
901 bool collectComments = true);
902 String getFormattedErrorMessages() const;
903 std::vector<StructuredError> getStructuredErrors() const;
904
905private:
906 OurReader(OurReader const&); // no impl
907 void operator=(OurReader const&); // no impl
908
909 enum TokenType {
910 tokenEndOfStream = 0,
911 tokenObjectBegin,
912 tokenObjectEnd,
913 tokenArrayBegin,
914 tokenArrayEnd,
915 tokenString,
916 tokenNumber,
917 tokenTrue,
918 tokenFalse,
919 tokenNull,
920 tokenNaN,
921 tokenPosInf,
922 tokenNegInf,
923 tokenArraySeparator,
924 tokenMemberSeparator,
925 tokenComment,
926 tokenError
927 };
928
929 class Token {
930 public:
931 TokenType type_;
932 Location start_;
933 Location end_;
934 };
935
936 class ErrorInfo {
937 public:
938 Token token_;
939 String message_;
940 Location extra_;
941 };
942
943 using Errors = std::deque<ErrorInfo>;
944
945 bool readToken(Token& token);
946 void skipSpaces();
947 void skipBom(bool skipBom);
948 bool match(const Char* pattern, int patternLength);
949 bool readComment();
950 bool readCStyleComment(bool* containsNewLineResult);
951 bool readCppStyleComment();
952 bool readString();
953 bool readStringSingleQuote();
954 bool readNumber(bool checkInf);
955 bool readValue();
956 bool readObject(Token& token);
957 bool readArray(Token& token);
958 bool decodeNumber(Token& token);
959 bool decodeNumber(Token& token, Value& decoded);
960 bool decodeString(Token& token);
961 bool decodeString(Token& token, String& decoded);
962 bool decodeDouble(Token& token);
963 bool decodeDouble(Token& token, Value& decoded);
964 bool decodeUnicodeCodePoint(Token& token, Location& current, Location end,
965 unsigned int& unicode);
966 bool decodeUnicodeEscapeSequence(Token& token, Location& current,
967 Location end, unsigned int& unicode);
968 bool addError(const String& message, Token& token, Location extra = nullptr);
969 bool recoverFromError(TokenType skipUntilToken);
970 bool addErrorAndRecover(const String& message, Token& token,
971 TokenType skipUntilToken);
972 void skipUntilSpace();
973 Value& currentValue();
974 Char getNextChar();
975 void getLocationLineAndColumn(Location location, int& line,
976 int& column) const;
977 String getLocationLineAndColumn(Location location) const;
978 void addComment(Location begin, Location end, CommentPlacement placement);
979 void skipCommentTokens(Token& token);
980
981 static String normalizeEOL(Location begin, Location end);
982 static bool containsNewLine(Location begin, Location end);
983
984 using Nodes = std::stack<Value*>;
985
986 Nodes nodes_{};
987 Errors errors_{};
988 String document_{};
989 Location begin_ = nullptr;
990 Location end_ = nullptr;
991 Location current_ = nullptr;
992 Location lastValueEnd_ = nullptr;
993 Value* lastValue_ = nullptr;
994 bool lastValueHasAComment_ = false;
995 String commentsBefore_{};
996
997 OurFeatures const features_;
998 bool collectComments_ = false;
999}; // OurReader
1000
1001// complete copy of Read impl, for OurReader
1002
1003bool OurReader::containsNewLine(OurReader::Location begin,
1004 OurReader::Location end) {
1005 return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
1006}
1007
1008OurReader::OurReader(OurFeatures const& features) : features_(features) {}
1009
1010bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
1011 bool collectComments) {
1012 if (!features_.allowComments_) {
1013 collectComments = false;
1014 }
1015
1016 begin_ = beginDoc;
1017 end_ = endDoc;
1018 collectComments_ = collectComments;
1019 current_ = begin_;
1020 lastValueEnd_ = nullptr;
1021 lastValue_ = nullptr;
1022 commentsBefore_.clear();
1023 errors_.clear();
1024 while (!nodes_.empty())
1025 nodes_.pop();
1026 nodes_.push(&root);
1027
1028 // skip byte order mark if it exists at the beginning of the UTF-8 text.
1029 skipBom(features_.skipBom_);
1030 bool successful = readValue();
1031 nodes_.pop();
1032 Token token;
1033 skipCommentTokens(token);
1034 if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
1035 addError("Extra non-whitespace after JSON value.", token);
1036 return false;
1037 }
1038 if (collectComments_ && !commentsBefore_.empty())
1039 root.setComment(commentsBefore_, commentAfter);
1040 if (features_.strictRoot_) {
1041 if (!root.isArray() && !root.isObject()) {
1042 // Set error location to start of doc, ideally should be first token found
1043 // in doc
1044 token.type_ = tokenError;
1045 token.start_ = beginDoc;
1046 token.end_ = endDoc;
1047 addError(
1048 "A valid JSON document must be either an array or an object value.",
1049 token);
1050 return false;
1051 }
1052 }
1053 return successful;
1054}
1055
1056bool OurReader::readValue() {
1057 // To preserve the old behaviour we cast size_t to int.
1058 if (nodes_.size() > features_.stackLimit_)
1059 throwRuntimeError("Exceeded stackLimit in readValue().");
1060 Token token;
1061 skipCommentTokens(token);
1062 bool successful = true;
1063
1064 if (collectComments_ && !commentsBefore_.empty()) {
1065 currentValue().setComment(commentsBefore_, commentBefore);
1066 commentsBefore_.clear();
1067 }
1068
1069 switch (token.type_) {
1070 case tokenObjectBegin:
1071 successful = readObject(token);
1072 currentValue().setOffsetLimit(current_ - begin_);
1073 break;
1074 case tokenArrayBegin:
1075 successful = readArray(token);
1076 currentValue().setOffsetLimit(current_ - begin_);
1077 break;
1078 case tokenNumber:
1079 successful = decodeNumber(token);
1080 break;
1081 case tokenString:
1082 successful = decodeString(token);
1083 break;
1084 case tokenTrue: {
1085 Value v(true);
1086 currentValue().swapPayload(v);
1087 currentValue().setOffsetStart(token.start_ - begin_);
1088 currentValue().setOffsetLimit(token.end_ - begin_);
1089 } break;
1090 case tokenFalse: {
1091 Value v(false);
1092 currentValue().swapPayload(v);
1093 currentValue().setOffsetStart(token.start_ - begin_);
1094 currentValue().setOffsetLimit(token.end_ - begin_);
1095 } break;
1096 case tokenNull: {
1097 Value v;
1098 currentValue().swapPayload(v);
1099 currentValue().setOffsetStart(token.start_ - begin_);
1100 currentValue().setOffsetLimit(token.end_ - begin_);
1101 } break;
1102 case tokenNaN: {
1103 Value v(std::numeric_limits<double>::quiet_NaN());
1104 currentValue().swapPayload(v);
1105 currentValue().setOffsetStart(token.start_ - begin_);
1106 currentValue().setOffsetLimit(token.end_ - begin_);
1107 } break;
1108 case tokenPosInf: {
1109 Value v(std::numeric_limits<double>::infinity());
1110 currentValue().swapPayload(v);
1111 currentValue().setOffsetStart(token.start_ - begin_);
1112 currentValue().setOffsetLimit(token.end_ - begin_);
1113 } break;
1114 case tokenNegInf: {
1115 Value v(-std::numeric_limits<double>::infinity());
1116 currentValue().swapPayload(v);
1117 currentValue().setOffsetStart(token.start_ - begin_);
1118 currentValue().setOffsetLimit(token.end_ - begin_);
1119 } break;
1120 case tokenArraySeparator:
1121 case tokenObjectEnd:
1122 case tokenArrayEnd:
1123 if (features_.allowDroppedNullPlaceholders_) {
1124 // "Un-read" the current token and mark the current value as a null
1125 // token.
1126 current_--;
1127 Value v;
1128 currentValue().swapPayload(v);
1129 currentValue().setOffsetStart(current_ - begin_ - 1);
1130 currentValue().setOffsetLimit(current_ - begin_);
1131 break;
1132 } // else, fall through ...
1133 default:
1134 currentValue().setOffsetStart(token.start_ - begin_);
1135 currentValue().setOffsetLimit(token.end_ - begin_);
1136 return addError("Syntax error: value, object or array expected.", token);
1137 }
1138
1139 if (collectComments_) {
1140 lastValueEnd_ = current_;
1141 lastValueHasAComment_ = false;
1142 lastValue_ = &currentValue();
1143 }
1144
1145 return successful;
1146}
1147
1148void OurReader::skipCommentTokens(Token& token) {
1149 if (features_.allowComments_) {
1150 do {
1151 readToken(token);
1152 } while (token.type_ == tokenComment);
1153 } else {
1154 readToken(token);
1155 }
1156}
1157
1158bool OurReader::readToken(Token& token) {
1159 skipSpaces();
1160 token.start_ = current_;
1161 Char c = getNextChar();
1162 bool ok = true;
1163 switch (c) {
1164 case '{':
1165 token.type_ = tokenObjectBegin;
1166 break;
1167 case '}':
1168 token.type_ = tokenObjectEnd;
1169 break;
1170 case '[':
1171 token.type_ = tokenArrayBegin;
1172 break;
1173 case ']':
1174 token.type_ = tokenArrayEnd;
1175 break;
1176 case '"':
1177 token.type_ = tokenString;
1178 ok = readString();
1179 break;
1180 case '\'':
1181 if (features_.allowSingleQuotes_) {
1182 token.type_ = tokenString;
1183 ok = readStringSingleQuote();
1184 } else {
1185 // If we don't allow single quotes, this is a failure case.
1186 ok = false;
1187 }
1188 break;
1189 case '/':
1190 token.type_ = tokenComment;
1191 ok = readComment();
1192 break;
1193 case '0':
1194 case '1':
1195 case '2':
1196 case '3':
1197 case '4':
1198 case '5':
1199 case '6':
1200 case '7':
1201 case '8':
1202 case '9':
1203 token.type_ = tokenNumber;
1204 readNumber(false);
1205 break;
1206 case '-':
1207 if (readNumber(true)) {
1208 token.type_ = tokenNumber;
1209 } else {
1210 token.type_ = tokenNegInf;
1211 ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1212 }
1213 break;
1214 case '+':
1215 if (readNumber(true)) {
1216 token.type_ = tokenNumber;
1217 } else {
1218 token.type_ = tokenPosInf;
1219 ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1220 }
1221 break;
1222 case 't':
1223 token.type_ = tokenTrue;
1224 ok = match("rue", 3);
1225 break;
1226 case 'f':
1227 token.type_ = tokenFalse;
1228 ok = match("alse", 4);
1229 break;
1230 case 'n':
1231 token.type_ = tokenNull;
1232 ok = match("ull", 3);
1233 break;
1234 case 'N':
1235 if (features_.allowSpecialFloats_) {
1236 token.type_ = tokenNaN;
1237 ok = match("aN", 2);
1238 } else {
1239 ok = false;
1240 }
1241 break;
1242 case 'I':
1243 if (features_.allowSpecialFloats_) {
1244 token.type_ = tokenPosInf;
1245 ok = match("nfinity", 7);
1246 } else {
1247 ok = false;
1248 }
1249 break;
1250 case ',':
1251 token.type_ = tokenArraySeparator;
1252 break;
1253 case ':':
1254 token.type_ = tokenMemberSeparator;
1255 break;
1256 case 0:
1257 token.type_ = tokenEndOfStream;
1258 break;
1259 default:
1260 ok = false;
1261 break;
1262 }
1263 if (!ok)
1264 token.type_ = tokenError;
1265 token.end_ = current_;
1266 return ok;
1267}
1268
1269void OurReader::skipSpaces() {
1270 while (current_ != end_) {
1271 Char c = *current_;
1272 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1273 ++current_;
1274 else
1275 break;
1276 }
1277}
1278
1279void OurReader::skipBom(bool skipBom) {
1280 // The default behavior is to skip BOM.
1281 if (skipBom) {
1282 if ((end_ - begin_) >= 3 && strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) {
1283 begin_ += 3;
1284 current_ = begin_;
1285 }
1286 }
1287}
1288
1289bool OurReader::match(const Char* pattern, int patternLength) {
1290 if (end_ - current_ < patternLength)
1291 return false;
1292 int index = patternLength;
1293 while (index--)
1294 if (current_[index] != pattern[index])
1295 return false;
1296 current_ += patternLength;
1297 return true;
1298}
1299
1300bool OurReader::readComment() {
1301 const Location commentBegin = current_ - 1;
1302 const Char c = getNextChar();
1303 bool successful = false;
1304 bool cStyleWithEmbeddedNewline = false;
1305
1306 const bool isCStyleComment = (c == '*');
1307 const bool isCppStyleComment = (c == '/');
1308 if (isCStyleComment) {
1309 successful = readCStyleComment(&cStyleWithEmbeddedNewline);
1310 } else if (isCppStyleComment) {
1311 successful = readCppStyleComment();
1312 }
1313
1314 if (!successful)
1315 return false;
1316
1317 if (collectComments_) {
1318 CommentPlacement placement = commentBefore;
1319
1320 if (!lastValueHasAComment_) {
1321 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1322 if (isCppStyleComment || !cStyleWithEmbeddedNewline) {
1323 placement = commentAfterOnSameLine;
1324 lastValueHasAComment_ = true;
1325 }
1326 }
1327 }
1328
1329 addComment(commentBegin, current_, placement);
1330 }
1331 return true;
1332}
1333
1334String OurReader::normalizeEOL(OurReader::Location begin,
1335 OurReader::Location end) {
1336 String normalized;
1337 normalized.reserve(static_cast<size_t>(end - begin));
1338 OurReader::Location current = begin;
1339 while (current != end) {
1340 char c = *current++;
1341 if (c == '\r') {
1342 if (current != end && *current == '\n')
1343 // convert dos EOL
1344 ++current;
1345 // convert Mac EOL
1346 normalized += '\n';
1347 } else {
1348 normalized += c;
1349 }
1350 }
1351 return normalized;
1352}
1353
1354void OurReader::addComment(Location begin, Location end,
1355 CommentPlacement placement) {
1356 assert(collectComments_);
1357 const String& normalized = normalizeEOL(begin, end);
1358 if (placement == commentAfterOnSameLine) {
1359 assert(lastValue_ != nullptr);
1360 lastValue_->setComment(normalized, placement);
1361 } else {
1362 commentsBefore_ += normalized;
1363 }
1364}
1365
1366bool OurReader::readCStyleComment(bool* containsNewLineResult) {
1367 *containsNewLineResult = false;
1368
1369 while ((current_ + 1) < end_) {
1370 Char c = getNextChar();
1371 if (c == '*' && *current_ == '/')
1372 break;
1373 if (c == '\n')
1374 *containsNewLineResult = true;
1375 }
1376
1377 return getNextChar() == '/';
1378}
1379
1380bool OurReader::readCppStyleComment() {
1381 while (current_ != end_) {
1382 Char c = getNextChar();
1383 if (c == '\n')
1384 break;
1385 if (c == '\r') {
1386 // Consume DOS EOL. It will be normalized in addComment.
1387 if (current_ != end_ && *current_ == '\n')
1388 getNextChar();
1389 // Break on Moc OS 9 EOL.
1390 break;
1391 }
1392 }
1393 return true;
1394}
1395
1396bool OurReader::readNumber(bool checkInf) {
1397 Location p = current_;
1398 if (checkInf && p != end_ && *p == 'I') {
1399 current_ = ++p;
1400 return false;
1401 }
1402 char c = '0'; // stopgap for already consumed character
1403 // integral part
1404 while (c >= '0' && c <= '9')
1405 c = (current_ = p) < end_ ? *p++ : '\0';
1406 // fractional part
1407 if (c == '.') {
1408 c = (current_ = p) < end_ ? *p++ : '\0';
1409 while (c >= '0' && c <= '9')
1410 c = (current_ = p) < end_ ? *p++ : '\0';
1411 }
1412 // exponential part
1413 if (c == 'e' || c == 'E') {
1414 c = (current_ = p) < end_ ? *p++ : '\0';
1415 if (c == '+' || c == '-')
1416 c = (current_ = p) < end_ ? *p++ : '\0';
1417 while (c >= '0' && c <= '9')
1418 c = (current_ = p) < end_ ? *p++ : '\0';
1419 }
1420 return true;
1421}
1422bool OurReader::readString() {
1423 Char c = 0;
1424 while (current_ != end_) {
1425 c = getNextChar();
1426 if (c == '\\')
1427 getNextChar();
1428 else if (c == '"')
1429 break;
1430 }
1431 return c == '"';
1432}
1433
1434bool OurReader::readStringSingleQuote() {
1435 Char c = 0;
1436 while (current_ != end_) {
1437 c = getNextChar();
1438 if (c == '\\')
1439 getNextChar();
1440 else if (c == '\'')
1441 break;
1442 }
1443 return c == '\'';
1444}
1445
1446bool OurReader::readObject(Token& token) {
1447 Token tokenName;
1448 String name;
1449 Value init(objectValue);
1450 currentValue().swapPayload(init);
1451 currentValue().setOffsetStart(token.start_ - begin_);
1452 while (readToken(tokenName)) {
1453 bool initialTokenOk = true;
1454 while (tokenName.type_ == tokenComment && initialTokenOk)
1455 initialTokenOk = readToken(tokenName);
1456 if (!initialTokenOk)
1457 break;
1458 if (tokenName.type_ == tokenObjectEnd &&
1459 (name.empty() ||
1460 features_.allowTrailingCommas_)) // empty object or trailing comma
1461 return true;
1462 name.clear();
1463 if (tokenName.type_ == tokenString) {
1464 if (!decodeString(tokenName, name))
1465 return recoverFromError(tokenObjectEnd);
1466 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1467 Value numberName;
1468 if (!decodeNumber(tokenName, numberName))
1469 return recoverFromError(tokenObjectEnd);
1470 name = numberName.asString();
1471 } else {
1472 break;
1473 }
1474 if (name.length() >= (1U << 30))
1475 throwRuntimeError("keylength >= 2^30");
1476 if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1477 String msg = "Duplicate key: '" + name + "'";
1478 return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1479 }
1480
1481 Token colon;
1482 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1483 return addErrorAndRecover("Missing ':' after object member name", colon,
1484 tokenObjectEnd);
1485 }
1486 Value& value = currentValue()[name];
1487 nodes_.push(&value);
1488 bool ok = readValue();
1489 nodes_.pop();
1490 if (!ok) // error already set
1491 return recoverFromError(tokenObjectEnd);
1492
1493 Token comma;
1494 if (!readToken(comma) ||
1495 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
1496 comma.type_ != tokenComment)) {
1497 return addErrorAndRecover("Missing ',' or '}' in object declaration",
1498 comma, tokenObjectEnd);
1499 }
1500 bool finalizeTokenOk = true;
1501 while (comma.type_ == tokenComment && finalizeTokenOk)
1502 finalizeTokenOk = readToken(comma);
1503 if (comma.type_ == tokenObjectEnd)
1504 return true;
1505 }
1506 return addErrorAndRecover("Missing '}' or object member name", tokenName,
1507 tokenObjectEnd);
1508}
1509
1510bool OurReader::readArray(Token& token) {
1511 Value init(arrayValue);
1512 currentValue().swapPayload(init);
1513 currentValue().setOffsetStart(token.start_ - begin_);
1514 int index = 0;
1515 for (;;) {
1516 skipSpaces();
1517 if (current_ != end_ && *current_ == ']' &&
1518 (index == 0 ||
1519 (features_.allowTrailingCommas_ &&
1520 !features_.allowDroppedNullPlaceholders_))) // empty array or trailing
1521 // comma
1522 {
1523 Token endArray;
1524 readToken(endArray);
1525 return true;
1526 }
1527 Value& value = currentValue()[index++];
1528 nodes_.push(&value);
1529 bool ok = readValue();
1530 nodes_.pop();
1531 if (!ok) // error already set
1532 return recoverFromError(tokenArrayEnd);
1533
1534 Token currentToken;
1535 // Accept Comment after last item in the array.
1536 ok = readToken(currentToken);
1537 while (currentToken.type_ == tokenComment && ok) {
1538 ok = readToken(currentToken);
1539 }
1540 bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1541 currentToken.type_ != tokenArrayEnd);
1542 if (!ok || badTokenType) {
1543 return addErrorAndRecover("Missing ',' or ']' in array declaration",
1544 currentToken, tokenArrayEnd);
1545 }
1546 if (currentToken.type_ == tokenArrayEnd)
1547 break;
1548 }
1549 return true;
1550}
1551
1552bool OurReader::decodeNumber(Token& token) {
1553 Value decoded;
1554 if (!decodeNumber(token, decoded))
1555 return false;
1556 currentValue().swapPayload(decoded);
1557 currentValue().setOffsetStart(token.start_ - begin_);
1558 currentValue().setOffsetLimit(token.end_ - begin_);
1559 return true;
1560}
1561
1562bool OurReader::decodeNumber(Token& token, Value& decoded) {
1563 // Attempts to parse the number as an integer. If the number is
1564 // larger than the maximum supported value of an integer then
1565 // we decode the number as a double.
1566 Location current = token.start_;
1567 const bool isNegative = *current == '-';
1568 if (isNegative) {
1569 ++current;
1570 }
1571
1572 // We assume we can represent the largest and smallest integer types as
1573 // unsigned integers with separate sign. This is only true if they can fit
1574 // into an unsigned integer.
1576 "Int must be smaller than UInt");
1577
1578 // We need to convert minLargestInt into a positive number. The easiest way
1579 // to do this conversion is to assume our "threshold" value of minLargestInt
1580 // divided by 10 can fit in maxLargestInt when absolute valued. This should
1581 // be a safe assumption.
1583 "The absolute value of minLargestInt must be greater than or "
1584 "equal to maxLargestInt");
1585 static_assert(Value::minLargestInt / 10 >= -Value::maxLargestInt,
1586 "The absolute value of minLargestInt must be only 1 magnitude "
1587 "larger than maxLargest Int");
1588
1589 static constexpr Value::LargestUInt positive_threshold =
1591 static constexpr Value::UInt positive_last_digit = Value::maxLargestUInt % 10;
1592
1593 // For the negative values, we have to be more careful. Since typically
1594 // -Value::minLargestInt will cause an overflow, we first divide by 10 and
1595 // then take the inverse. This assumes that minLargestInt is only a single
1596 // power of 10 different in magnitude, which we check above. For the last
1597 // digit, we take the modulus before negating for the same reason.
1598 static constexpr auto negative_threshold =
1600 static constexpr auto negative_last_digit =
1602
1603 const Value::LargestUInt threshold =
1604 isNegative ? negative_threshold : positive_threshold;
1605 const Value::UInt max_last_digit =
1606 isNegative ? negative_last_digit : positive_last_digit;
1607
1608 Value::LargestUInt value = 0;
1609 while (current < token.end_) {
1610 Char c = *current++;
1611 if (c < '0' || c > '9')
1612 return decodeDouble(token, decoded);
1613
1614 const auto digit(static_cast<Value::UInt>(c - '0'));
1615 if (value >= threshold) {
1616 // We've hit or exceeded the max value divided by 10 (rounded down). If
1617 // a) we've only just touched the limit, meaning value == threshold,
1618 // b) this is the last digit, or
1619 // c) it's small enough to fit in that rounding delta, we're okay.
1620 // Otherwise treat this number as a double to avoid overflow.
1621 if (value > threshold || current != token.end_ ||
1622 digit > max_last_digit) {
1623 return decodeDouble(token, decoded);
1624 }
1625 }
1626 value = value * 10 + digit;
1627 }
1628
1629 if (isNegative) {
1630 // We use the same magnitude assumption here, just in case.
1631 const auto last_digit = static_cast<Value::UInt>(value % 10);
1632 decoded = -Value::LargestInt(value / 10) * 10 - last_digit;
1633 } else if (value <= Value::LargestUInt(Value::maxLargestInt)) {
1634 decoded = Value::LargestInt(value);
1635 } else {
1636 decoded = value;
1637 }
1638
1639 return true;
1640}
1641
1642bool OurReader::decodeDouble(Token& token) {
1643 Value decoded;
1644 if (!decodeDouble(token, decoded))
1645 return false;
1646 currentValue().swapPayload(decoded);
1647 currentValue().setOffsetStart(token.start_ - begin_);
1648 currentValue().setOffsetLimit(token.end_ - begin_);
1649 return true;
1650}
1651
1652bool OurReader::decodeDouble(Token& token, Value& decoded) {
1653 double value = 0;
1654 const String buffer(token.start_, token.end_);
1655 IStringStream is(buffer);
1656 if (!(is >> value)) {
1657 if (value == std::numeric_limits<double>::max())
1658 value = std::numeric_limits<double>::infinity();
1659 else if (value == std::numeric_limits<double>::lowest())
1660 value = -std::numeric_limits<double>::infinity();
1661 else if (!std::isinf(value))
1662 return addError(
1663 "'" + String(token.start_, token.end_) + "' is not a number.", token);
1664 }
1665 decoded = value;
1666 return true;
1667}
1668
1669bool OurReader::decodeString(Token& token) {
1670 String decoded_string;
1671 if (!decodeString(token, decoded_string))
1672 return false;
1673 Value decoded(decoded_string);
1674 currentValue().swapPayload(decoded);
1675 currentValue().setOffsetStart(token.start_ - begin_);
1676 currentValue().setOffsetLimit(token.end_ - begin_);
1677 return true;
1678}
1679
1680bool OurReader::decodeString(Token& token, String& decoded) {
1681 decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1682 Location current = token.start_ + 1; // skip '"'
1683 Location end = token.end_ - 1; // do not include '"'
1684 while (current != end) {
1685 Char c = *current++;
1686 if (c == '"')
1687 break;
1688 if (c == '\\') {
1689 if (current == end)
1690 return addError("Empty escape sequence in string", token, current);
1691 Char escape = *current++;
1692 switch (escape) {
1693 case '"':
1694 decoded += '"';
1695 break;
1696 case '/':
1697 decoded += '/';
1698 break;
1699 case '\\':
1700 decoded += '\\';
1701 break;
1702 case 'b':
1703 decoded += '\b';
1704 break;
1705 case 'f':
1706 decoded += '\f';
1707 break;
1708 case 'n':
1709 decoded += '\n';
1710 break;
1711 case 'r':
1712 decoded += '\r';
1713 break;
1714 case 't':
1715 decoded += '\t';
1716 break;
1717 case 'u': {
1718 unsigned int unicode;
1719 if (!decodeUnicodeCodePoint(token, current, end, unicode))
1720 return false;
1721 decoded += codePointToUTF8(unicode);
1722 } break;
1723 default:
1724 return addError("Bad escape sequence in string", token, current);
1725 }
1726 } else {
1727 decoded += c;
1728 }
1729 }
1730 return true;
1731}
1732
1733bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current,
1734 Location end, unsigned int& unicode) {
1735
1736 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1737 return false;
1738 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1739 // surrogate pairs
1740 if (end - current < 6)
1741 return addError(
1742 "additional six characters expected to parse unicode surrogate pair.",
1743 token, current);
1744 if (*(current++) == '\\' && *(current++) == 'u') {
1745 unsigned int surrogatePair;
1746 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1747 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1748 } else
1749 return false;
1750 } else
1751 return addError("expecting another \\u token to begin the second half of "
1752 "a unicode surrogate pair",
1753 token, current);
1754 }
1755 return true;
1756}
1757
1758bool OurReader::decodeUnicodeEscapeSequence(Token& token, Location& current,
1759 Location end,
1760 unsigned int& ret_unicode) {
1761 if (end - current < 4)
1762 return addError(
1763 "Bad unicode escape sequence in string: four digits expected.", token,
1764 current);
1765 int unicode = 0;
1766 for (int index = 0; index < 4; ++index) {
1767 Char c = *current++;
1768 unicode *= 16;
1769 if (c >= '0' && c <= '9')
1770 unicode += c - '0';
1771 else if (c >= 'a' && c <= 'f')
1772 unicode += c - 'a' + 10;
1773 else if (c >= 'A' && c <= 'F')
1774 unicode += c - 'A' + 10;
1775 else
1776 return addError(
1777 "Bad unicode escape sequence in string: hexadecimal digit expected.",
1778 token, current);
1779 }
1780 ret_unicode = static_cast<unsigned int>(unicode);
1781 return true;
1782}
1783
1784bool OurReader::addError(const String& message, Token& token, Location extra) {
1785 ErrorInfo info;
1786 info.token_ = token;
1787 info.message_ = message;
1788 info.extra_ = extra;
1789 errors_.push_back(info);
1790 return false;
1791}
1792
1793bool OurReader::recoverFromError(TokenType skipUntilToken) {
1794 size_t errorCount = errors_.size();
1795 Token skip;
1796 for (;;) {
1797 if (!readToken(skip))
1798 errors_.resize(errorCount); // discard errors caused by recovery
1799 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1800 break;
1801 }
1802 errors_.resize(errorCount);
1803 return false;
1804}
1805
1806bool OurReader::addErrorAndRecover(const String& message, Token& token,
1807 TokenType skipUntilToken) {
1808 addError(message, token);
1809 return recoverFromError(skipUntilToken);
1810}
1811
1812Value& OurReader::currentValue() { return *(nodes_.top()); }
1813
1814OurReader::Char OurReader::getNextChar() {
1815 if (current_ == end_)
1816 return 0;
1817 return *current_++;
1818}
1819
1820void OurReader::getLocationLineAndColumn(Location location, int& line,
1821 int& column) const {
1822 Location current = begin_;
1823 Location lastLineStart = current;
1824 line = 0;
1825 while (current < location && current != end_) {
1826 Char c = *current++;
1827 if (c == '\r') {
1828 if (*current == '\n')
1829 ++current;
1830 lastLineStart = current;
1831 ++line;
1832 } else if (c == '\n') {
1833 lastLineStart = current;
1834 ++line;
1835 }
1836 }
1837 // column & line start at 1
1838 column = int(location - lastLineStart) + 1;
1839 ++line;
1840}
1841
1842String OurReader::getLocationLineAndColumn(Location location) const {
1843 int line, column;
1844 getLocationLineAndColumn(location, line, column);
1845 char buffer[18 + 16 + 16 + 1];
1846 jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1847 return buffer;
1848}
1849
1850String OurReader::getFormattedErrorMessages() const {
1851 String formattedMessage;
1852 for (const auto& error : errors_) {
1853 formattedMessage +=
1854 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1855 formattedMessage += " " + error.message_ + "\n";
1856 if (error.extra_)
1857 formattedMessage +=
1858 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1859 }
1860 return formattedMessage;
1861}
1862
1863std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
1864 std::vector<OurReader::StructuredError> allErrors;
1865 for (const auto& error : errors_) {
1866 OurReader::StructuredError structured;
1867 structured.offset_start = error.token_.start_ - begin_;
1868 structured.offset_limit = error.token_.end_ - begin_;
1869 structured.message = error.message_;
1870 allErrors.push_back(structured);
1871 }
1872 return allErrors;
1873}
1874
1875class OurCharReader : public CharReader {
1876 bool const collectComments_;
1877 OurReader reader_;
1878
1879public:
1880 OurCharReader(bool collectComments, OurFeatures const& features)
1881 : collectComments_(collectComments), reader_(features) {}
1882 bool parse(char const* beginDoc, char const* endDoc, Value* root,
1883 String* errs) override {
1884 bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1885 if (errs) {
1886 *errs = reader_.getFormattedErrorMessages();
1887 }
1888 return ok;
1889 }
1890};
1891
1895 bool collectComments = settings_["collectComments"].asBool();
1896 OurFeatures features = OurFeatures::all();
1897 features.allowComments_ = settings_["allowComments"].asBool();
1898 features.allowTrailingCommas_ = settings_["allowTrailingCommas"].asBool();
1899 features.strictRoot_ = settings_["strictRoot"].asBool();
1900 features.allowDroppedNullPlaceholders_ =
1901 settings_["allowDroppedNullPlaceholders"].asBool();
1902 features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1903 features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1904
1905 // Stack limit is always a size_t, so we get this as an unsigned int
1906 // regardless of it we have 64-bit integer support enabled.
1907 features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1908 features.failIfExtra_ = settings_["failIfExtra"].asBool();
1909 features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1910 features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1911 features.skipBom_ = settings_["skipBom"].asBool();
1912 return new OurCharReader(collectComments, features);
1913}
1914
1916 static const auto& valid_keys = *new std::set<String>{
1917 "collectComments",
1918 "allowComments",
1919 "allowTrailingCommas",
1920 "strictRoot",
1921 "allowDroppedNullPlaceholders",
1922 "allowNumericKeys",
1923 "allowSingleQuotes",
1924 "stackLimit",
1925 "failIfExtra",
1926 "rejectDupKeys",
1927 "allowSpecialFloats",
1928 "skipBom",
1929 };
1930 for (auto si = settings_.begin(); si != settings_.end(); ++si) {
1931 auto key = si.name();
1932 if (valid_keys.count(key))
1933 continue;
1934 if (invalid)
1935 (*invalid)[key] = *si;
1936 else
1937 return false;
1938 }
1939 return invalid ? invalid->empty() : true;
1940}
1941
1943 return settings_[key];
1944}
1945// static
1948 (*settings)["allowComments"] = false;
1949 (*settings)["allowTrailingCommas"] = false;
1950 (*settings)["strictRoot"] = true;
1951 (*settings)["allowDroppedNullPlaceholders"] = false;
1952 (*settings)["allowNumericKeys"] = false;
1953 (*settings)["allowSingleQuotes"] = false;
1954 (*settings)["stackLimit"] = 1000;
1955 (*settings)["failIfExtra"] = true;
1956 (*settings)["rejectDupKeys"] = true;
1957 (*settings)["allowSpecialFloats"] = false;
1958 (*settings)["skipBom"] = true;
1960}
1961// static
1964 (*settings)["collectComments"] = true;
1965 (*settings)["allowComments"] = true;
1966 (*settings)["allowTrailingCommas"] = true;
1967 (*settings)["strictRoot"] = false;
1968 (*settings)["allowDroppedNullPlaceholders"] = false;
1969 (*settings)["allowNumericKeys"] = false;
1970 (*settings)["allowSingleQuotes"] = false;
1971 (*settings)["stackLimit"] = 1000;
1972 (*settings)["failIfExtra"] = false;
1973 (*settings)["rejectDupKeys"] = false;
1974 (*settings)["allowSpecialFloats"] = false;
1975 (*settings)["skipBom"] = true;
1977}
1978
1980// global functions
1981
1982bool parseFromStream(CharReader::Factory const& fact, IStream& sin, Value* root,
1983 String* errs) {
1984 OStringStream ssin;
1985 ssin << sin.rdbuf();
1986 String doc = ssin.str();
1987 char const* begin = doc.data();
1988 char const* end = begin + doc.size();
1989 // Note that we do not actually need a null-terminator.
1990 CharReaderPtr const reader(fact.newCharReader());
1991 return reader->parse(begin, end, root, errs);
1992}
1993
1996 String errs;
1997 bool ok = parseFromStream(b, sin, &root, &errs);
1998 if (!ok) {
1999 throwRuntimeError(errs);
2000 }
2001 return sin;
2002}
2003
2004} // namespace Json
virtual CharReader * newCharReader() const =0
Allocate a CharReader via operator new().
Build a CharReader implementation.
Definition: reader.h:289
static void setDefaults(Json::Value *settings)
Called by ctor, but you can use this to reset settings_.
Value & operator[](const String &key)
A simple way to update a specific setting.
CharReader * newCharReader() const override
Allocate a CharReader via operator new().
static void strictMode(Json::Value *settings)
Same as old Features::strictMode().
Json::Value settings_
Configuration of this builder.
Definition: reader.h:335
~CharReaderBuilder() override
bool validate(Json::Value *invalid) const
Interface for reading JSON from a char array.
Definition: reader.h:245
Configuration passed to reader and writer.
Definition: json_features.h:22
bool strictRoot_
true if root must be either an array or an object value.
Definition: json_features.h:49
bool allowComments_
true if comments are allowed. Default: true.
Definition: json_features.h:45
bool allowDroppedNullPlaceholders_
true if dropped null placeholders are allowed. Default: false.
Definition: json_features.h:52
static Features all()
A configuration that allows all features and assumes all strings are UTF-8.
Definition: json_reader.cpp:67
Features()
Initialize the configuration like JsonConfig::allFeatures;.
static Features strictMode()
A configuration that is strictly compatible with the JSON specification.
Definition: json_reader.cpp:69
bool allowNumericKeys_
true if numeric object key are allowed. Default: false.
Definition: json_features.h:55
char Char
Definition: reader.h:39
Reader()
Constructs a Reader allowing all features for parsing.
Definition: json_reader.cpp:88
bool pushError(const Value &value, const String &message)
Add a semantic error message.
bool good() const
Return whether there are any errors.
const Char * Location
Definition: reader.h:40
std::vector< StructuredError > getStructuredErrors() const
Returns a vector of structured errors encountered while parsing.
bool parse(const std::string &document, Value &root, bool collectComments=true)
Read a Value from a JSON document.
Definition: json_reader.cpp:92
String getFormattedErrorMessages() const
Returns a user friendly string that list errors in the parsed document.
Represents a JSON value.
Definition: value.h:194
const_iterator begin() const
bool empty() const
Return true if empty array, empty object, or null; otherwise, false.
Definition: json_value.cpp:882
static constexpr LargestInt maxLargestInt
Maximum signed integer value that can be stored in a Json::Value.
Definition: value.h:227
Json::UInt UInt
Definition: value.h:201
bool isArray() const
void setComment(const char *comment, size_t len, CommentPlacement placement)
Comments must be //... or /* ... *‍/.
Definition: value.h:571
ptrdiff_t getOffsetLimit() const
const_iterator end() const
void swapPayload(Value &other)
Swap values but leave comments and source offsets in place.
Definition: json_value.cpp:456
void setOffsetLimit(ptrdiff_t limit)
Json::LargestInt LargestInt
Definition: value.h:207
Json::LargestUInt LargestUInt
Definition: value.h:208
UInt asUInt() const
Definition: json_value.cpp:676
bool isObject() const
void setOffsetStart(ptrdiff_t start)
static constexpr Int maxInt
Maximum signed int value that can be stored in a Json::Value.
Definition: value.h:234
bool asBool() const
Definition: json_value.cpp:804
static constexpr LargestUInt maxLargestUInt
Maximum unsigned integer value that can be stored in a Json::Value.
Definition: value.h:229
static constexpr LargestInt minLargestInt
Minimum signed integer value that can be stored in a Json::Value.
Definition: value.h:224
ptrdiff_t getOffsetStart() const
#define jsoncpp_snprintf
Definition: config.h:63
#define JSONCPP_DEPRECATED_STACK_LIMIT
Definition: json_reader.cpp:48
static size_t const stackLimit_g
Definition: json_reader.cpp:51
JSON (JavaScript Object Notation).
Definition: allocator.h:15
std::basic_istringstream< String::value_type, String::traits_type, String::allocator_type > IStringStream
Definition: config.h:135
std::basic_ostringstream< String::value_type, String::traits_type, String::allocator_type > OStringStream
Definition: config.h:138
std::basic_string< char, std::char_traits< char >, Allocator< char > > String
Definition: config.h:132
CommentPlacement
Definition: value.h:119
@ commentAfterOnSameLine
a comment just after a value on the same line
Definition: value.h:121
@ commentBefore
a comment placed on the line before a value
Definition: value.h:120
@ commentAfter
a comment on the line after a value (only make sense for
Definition: value.h:122
@ arrayValue
array value (ordered list)
Definition: value.h:115
@ objectValue
object value (collection of name/value pairs).
Definition: value.h:116
static String codePointToUTF8(unsigned int cp)
Converts a unicode code-point to UTF-8.
Definition: json_tool.h:39
IStream & operator>>(IStream &, Value &)
Read from 'sin' into 'root'.
bool parseFromStream(CharReader::Factory const &, IStream &, Value *root, String *errs)
Consume entire stream and use its begin/end.
std::istream IStream
Definition: config.h:139
std::auto_ptr< CharReader > CharReaderPtr
Definition: json_reader.cpp:59
An error tagged with where in the JSON text it was encountered.
Definition: reader.h:47