1#ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP
2#define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP
3
4#include <boost/property_tree/json_parser/error.hpp>
5
6#include <boost/core/ref.hpp>
7#include <boost/bind/bind.hpp>
8#include <boost/bind/placeholders.hpp>
9
10#include <iterator>
11#include <sstream>
12#include <string>
13
14namespace boost { namespace property_tree {
15 namespace json_parser { namespace detail
16{
17
18 template <typename Encoding, typename Iterator, typename Sentinel>
19 class source
20 {
21 public:
22 typedef typename std::iterator_traits<Iterator>::value_type
23 code_unit;
24 typedef bool (Encoding::*encoding_predicate)(code_unit c) const;
25
26 explicit source(Encoding& encoding) : encoding(encoding) {}
27
28 template <typename Range>
29 void set_input(const std::string& filename, const Range& r)
30 {
31 this->filename = filename;
32 cur = r.begin();
33 end = r.end();
34 // Note that there is no backtracking, so if e.g. a UTF-8 file
35 // starts with something that initially looks like a BOM but isn't,
36 // there's trouble.
37 // However, no valid JSON file can start with a UTF-8 EF byte.
38 encoding.skip_introduction(cur, end);
39 line = 1;
40 offset = 0;
41 }
42
43 bool done() const { return cur == end; }
44
45 void parse_error(const char* msg) {
46 BOOST_PROPERTY_TREE_THROW(
47 json_parser_error(msg, filename, line));
48 }
49
50 void next() {
51 if (encoding.is_nl(*cur)) {
52 ++line;
53 offset = 0;
54 } else {
55 ++offset;
56 }
57 ++cur;
58 }
59
60 template <typename Action>
61 bool have(encoding_predicate p, Action& a) {
62 bool found = cur != end && (encoding.*p)(*cur);
63 if (found) {
64 a(*cur);
65 next();
66 }
67 return found;
68 }
69
70 bool have(encoding_predicate p) {
71 DoNothing n;
72 return have(p, n);
73 }
74
75 template <typename Action>
76 void expect(encoding_predicate p, const char* msg, Action& a) {
77 if (!have(p, a)) {
78 parse_error(msg);
79 }
80 }
81
82 void expect(encoding_predicate p, const char* msg) {
83 DoNothing n;
84 expect(p, msg, n);
85 }
86
87 code_unit need_cur(const char* msg) {
88 if (cur == end) {
89 parse_error(msg);
90 }
91 return *cur;
92 }
93
94 Iterator& raw_cur() { return cur; }
95 Sentinel raw_end() { return end; }
96
97 private:
98 struct DoNothing {
99 void operator ()(code_unit) const {}
100 };
101
102 Encoding& encoding;
103 Iterator cur;
104 Sentinel end;
105 std::string filename;
106 int line;
107 int offset;
108 };
109
110 template <typename Callbacks, typename Encoding, typename Iterator,
111 typename = typename std::iterator_traits<Iterator>
112 ::iterator_category>
113 class number_callback_adapter
114 {
115 public:
116 number_callback_adapter(Callbacks& callbacks, Encoding& encoding,
117 Iterator& cur)
118 : callbacks(callbacks), encoding(encoding), first(cur), cur(cur)
119 {}
120
121 void operator ()(typename Encoding::external_char) {}
122
123 void finish() const {
124 callbacks.on_number(encoding.to_internal(first, cur));
125 }
126
127 private:
128 number_callback_adapter(const number_callback_adapter&);
129
130 Callbacks& callbacks;
131 Encoding& encoding;
132 Iterator first;
133 Iterator& cur;
134 };
135
136 template <typename Callbacks, typename Encoding, typename Iterator>
137 class number_callback_adapter<Callbacks, Encoding, Iterator,
138 std::input_iterator_tag>
139 {
140 public:
141 number_callback_adapter(Callbacks& callbacks, Encoding& encoding,
142 Iterator&)
143 : callbacks(callbacks), encoding(encoding), first(true)
144 {}
145
146 void operator ()(typename Encoding::external_char c) {
147 if (first) {
148 callbacks.on_begin_number();
149 first = false;
150 }
151 callbacks.on_digit(encoding.to_internal_trivial(c));
152 }
153
154 void finish() const {
155 callbacks.on_end_number();
156 }
157 private:
158 number_callback_adapter(const number_callback_adapter&);
159
160 Callbacks& callbacks;
161 Encoding& encoding;
162 bool first;
163 };
164
165 template <typename Callbacks, typename Encoding, typename Iterator,
166 typename = typename std::iterator_traits<Iterator>
167 ::iterator_category>
168 class string_callback_adapter
169 {
170 public:
171 string_callback_adapter(Callbacks& callbacks, Encoding& encoding,
172 Iterator& cur)
173 : callbacks(callbacks), encoding(encoding), cur(cur),
174 run_begin(cur)
175 {}
176
177 void start_run() {
178 run_begin = cur;
179 }
180
181 void finish_run() {
182 callbacks.on_code_units(encoding.to_internal(run_begin, cur));
183 }
184
185 template <typename Sentinel, typename EncodingErrorFn>
186 void process_codepoint(Sentinel end, EncodingErrorFn error_fn) {
187 encoding.skip_codepoint(cur, end, error_fn);
188 }
189
190 private:
191 string_callback_adapter(const string_callback_adapter&);
192
193 Callbacks& callbacks;
194 Encoding& encoding;
195 Iterator& cur;
196 Iterator run_begin;
197 };
198
199 template <typename Callbacks, typename Encoding, typename Iterator>
200 class string_callback_adapter<Callbacks, Encoding, Iterator,
201 std::input_iterator_tag>
202 {
203 public:
204 string_callback_adapter(Callbacks& callbacks, Encoding& encoding,
205 Iterator& cur)
206 : callbacks(callbacks), encoding(encoding), cur(cur)
207 {}
208
209 void start_run() {}
210
211 void finish_run() {}
212
213 template <typename Sentinel, typename EncodingErrorFn>
214 void process_codepoint(Sentinel end, EncodingErrorFn error_fn) {
215 encoding.transcode_codepoint(cur, end,
216 boost::bind(&Callbacks::on_code_unit,
217 boost::ref(callbacks), boost::placeholders::_1),
218 error_fn);
219 }
220
221 private:
222 string_callback_adapter(const string_callback_adapter&);
223
224 Callbacks& callbacks;
225 Encoding& encoding;
226 Iterator& cur;
227 };
228
229 template <typename Callbacks, typename Encoding, typename Iterator,
230 typename Sentinel>
231 class parser
232 {
233 typedef detail::number_callback_adapter<Callbacks, Encoding, Iterator>
234 number_adapter;
235 typedef detail::string_callback_adapter<Callbacks, Encoding, Iterator>
236 string_adapter;
237 typedef detail::source<Encoding, Iterator, Sentinel> source;
238 typedef typename source::code_unit code_unit;
239
240 public:
241 parser(Callbacks& callbacks, Encoding& encoding)
242 : callbacks(callbacks), encoding(encoding), src(encoding)
243 {}
244
245 template <typename Range>
246 void set_input(const std::string& filename, const Range& r) {
247 src.set_input(filename, r);
248 }
249
250 void finish() {
251 skip_ws();
252 if (!src.done()) {
253 parse_error(msg: "garbage after data");
254 }
255 }
256
257 void parse_value() {
258 if (parse_object()) return;
259 if (parse_array()) return;
260 if (parse_string()) return;
261 if (parse_boolean()) return;
262 if (parse_null()) return;
263 if (parse_number()) return;
264 parse_error(msg: "expected value");
265 }
266
267 bool parse_null() {
268 skip_ws();
269 if (!have(&Encoding::is_n)) {
270 return false;
271 }
272 expect(&Encoding::is_u, "expected 'null'");
273 expect(&Encoding::is_l, "expected 'null'");
274 expect(&Encoding::is_l, "expected 'null'");
275 callbacks.on_null();
276 return true;
277 }
278
279 bool parse_boolean() {
280 skip_ws();
281 if (have(&Encoding::is_t)) {
282 expect(&Encoding::is_r, "expected 'true'");
283 expect(&Encoding::is_u, "expected 'true'");
284 expect(&Encoding::is_e, "expected 'true'");
285 callbacks.on_boolean(true);
286 return true;
287 }
288 if (have(&Encoding::is_f)) {
289 expect(&Encoding::is_a, "expected 'false'");
290 expect(&Encoding::is_l, "expected 'false'");
291 expect(&Encoding::is_s, "expected 'false'");
292 expect(&Encoding::is_e, "expected 'false'");
293 callbacks.on_boolean(false);
294 return true;
295 }
296 return false;
297 }
298
299 bool parse_number() {
300 skip_ws();
301
302 number_adapter adapter(callbacks, encoding, src.raw_cur());
303 bool started = false;
304 if (have(&Encoding::is_minus, adapter)) {
305 started = true;
306 }
307 if (!have(&Encoding::is_0, adapter) && !parse_int_part(action&: adapter)) {
308 if (started) {
309 parse_error(msg: "expected digits after -");
310 }
311 return false;
312 }
313 parse_frac_part(action&: adapter);
314 parse_exp_part(action&: adapter);
315 adapter.finish();
316 return true;
317 }
318
319 bool parse_string() {
320 skip_ws();
321
322 if (!have(&Encoding::is_quote)) {
323 return false;
324 }
325
326 callbacks.on_begin_string();
327 string_adapter adapter(callbacks, encoding, src.raw_cur());
328 while (!encoding.is_quote(need_cur(msg: "unterminated string"))) {
329 if (encoding.is_backslash(*src.raw_cur())) {
330 adapter.finish_run();
331 next();
332 parse_escape();
333 adapter.start_run();
334 } else {
335 adapter.process_codepoint(src.raw_end(),
336 boost::bind(&parser::parse_error,
337 this, "invalid code sequence"));
338 }
339 }
340 adapter.finish_run();
341 callbacks.on_end_string();
342 next();
343 return true;
344 }
345
346 bool parse_array() {
347 skip_ws();
348
349 if (!have(&Encoding::is_open_bracket)) {
350 return false;
351 }
352
353 callbacks.on_begin_array();
354 skip_ws();
355 if (have(&Encoding::is_close_bracket)) {
356 callbacks.on_end_array();
357 return true;
358 }
359 do {
360 parse_value();
361 skip_ws();
362 } while (have(&Encoding::is_comma));
363 expect(&Encoding::is_close_bracket, "expected ']' or ','");
364 callbacks.on_end_array();
365 return true;
366 }
367
368 bool parse_object() {
369 skip_ws();
370
371 if (!have(&Encoding::is_open_brace)) {
372 return false;
373 }
374
375 callbacks.on_begin_object();
376 skip_ws();
377 if (have(&Encoding::is_close_brace)) {
378 callbacks.on_end_object();
379 return true;
380 }
381 do {
382 if (!parse_string()) {
383 parse_error(msg: "expected key string");
384 }
385 skip_ws();
386 expect(&Encoding::is_colon, "expected ':'");
387 parse_value();
388 skip_ws();
389 } while (have(&Encoding::is_comma));
390 expect(&Encoding::is_close_brace, "expected '}' or ','");
391 callbacks.on_end_object();
392 return true;
393 }
394
395 private:
396 typedef typename source::encoding_predicate encoding_predicate;
397
398 void parse_error(const char* msg) { src.parse_error(msg); }
399 void next() { src.next(); }
400 template <typename Action>
401 bool have(encoding_predicate p, Action& a) { return src.have(p, a); }
402 bool have(encoding_predicate p) { return src.have(p); }
403 template <typename Action>
404 void expect(encoding_predicate p, const char* msg, Action& a) {
405 src.expect(p, msg, a);
406 }
407 void expect(encoding_predicate p, const char* msg) {
408 src.expect(p, msg);
409 }
410 code_unit need_cur(const char* msg) { return src.need_cur(msg); }
411
412 void skip_ws() {
413 while (have(&Encoding::is_ws)) {
414 }
415 }
416
417 bool parse_int_part(number_adapter& action) {
418 if (!have(&Encoding::is_digit0, action)) {
419 return false;
420 }
421 parse_digits(action);
422 return true;
423 }
424
425 void parse_frac_part(number_adapter& action) {
426 if (!have(&Encoding::is_dot, action)) {
427 return;
428 }
429 expect(&Encoding::is_digit, "need at least one digit after '.'",
430 action);
431 parse_digits(action);
432 }
433
434 void parse_exp_part(number_adapter& action) {
435 if (!have(&Encoding::is_eE, action)) {
436 return;
437 }
438 have(&Encoding::is_plusminus, action);
439 expect(&Encoding::is_digit, "need at least one digit in exponent",
440 action);
441 parse_digits(action);
442 }
443
444 void parse_digits(number_adapter& action) {
445 while (have(&Encoding::is_digit, action)) {
446 }
447 }
448
449 void parse_escape() {
450 if (have(&Encoding::is_quote)) {
451 feed(codepoint: 0x22);
452 } else if (have(&Encoding::is_backslash)) {
453 feed(codepoint: 0x5c);
454 } else if (have(&Encoding::is_slash)) {
455 feed(codepoint: 0x2f);
456 } else if (have(&Encoding::is_b)) {
457 feed(codepoint: 0x08); // backspace
458 } else if (have(&Encoding::is_f)) {
459 feed(codepoint: 0x0c); // formfeed
460 } else if (have(&Encoding::is_n)) {
461 feed(codepoint: 0x0a); // line feed
462 } else if (have(&Encoding::is_r)) {
463 feed(codepoint: 0x0d); // carriage return
464 } else if (have(&Encoding::is_t)) {
465 feed(codepoint: 0x09); // horizontal tab
466 } else if (have(&Encoding::is_u)) {
467 parse_codepoint_ref();
468 } else {
469 parse_error(msg: "invalid escape sequence");
470 }
471 }
472
473 unsigned parse_hex_quad() {
474 unsigned codepoint = 0;
475 for (int i = 0; i < 4; ++i) {
476 int value = encoding.decode_hexdigit(
477 need_cur(msg: "invalid escape sequence"));
478 if (value < 0) {
479 parse_error(msg: "invalid escape sequence");
480 }
481 codepoint *= 16;
482 codepoint += value;
483 next();
484 }
485 return codepoint;
486 }
487
488 static bool is_surrogate_high(unsigned codepoint) {
489 return (codepoint & 0xfc00) == 0xd800;
490 }
491 static bool is_surrogate_low(unsigned codepoint) {
492 return (codepoint & 0xfc00) == 0xdc00;
493 }
494 static unsigned combine_surrogates(unsigned high, unsigned low) {
495 return 0x010000 + (((high & 0x3ff) << 10) | (low & 0x3ff));
496 }
497
498 void parse_codepoint_ref() {
499 unsigned codepoint = parse_hex_quad();
500 if (is_surrogate_low(codepoint)) {
501 parse_error(msg: "invalid codepoint, stray low surrogate");
502 }
503 if (is_surrogate_high(codepoint)) {
504 expect(&Encoding::is_backslash,
505 "invalid codepoint, stray high surrogate");
506 expect(&Encoding::is_u,
507 "expected codepoint reference after high surrogate");
508 int low = parse_hex_quad();
509 if (!is_surrogate_low(codepoint: low)) {
510 parse_error(msg: "expected low surrogate after high surrogate");
511 }
512 codepoint = combine_surrogates(high: codepoint, low);
513 }
514 feed(codepoint);
515 }
516
517 void feed(unsigned codepoint) {
518 encoding.feed_codepoint(codepoint,
519 boost::bind(&Callbacks::on_code_unit,
520 boost::ref(callbacks), boost::placeholders::_1));
521 }
522
523 Callbacks& callbacks;
524 Encoding& encoding;
525 source src;
526 };
527
528}}}}
529
530#endif
531

source code of boost/libs/property_tree/include/boost/property_tree/json_parser/detail/parser.hpp