1#ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_WIDE_ENCODING_HPP
2#define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_WIDE_ENCODING_HPP
3
4#include <boost/assert.hpp>
5#include <boost/range/iterator_range_core.hpp>
6
7#include <utility>
8
9namespace boost { namespace property_tree {
10 namespace json_parser { namespace detail
11{
12
13 struct external_wide_encoding
14 {
15 typedef wchar_t external_char;
16
17 bool is_nl(wchar_t c) const { return c == L'\n'; }
18 bool is_ws(wchar_t c) const {
19 return c == L' ' || c == L'\t' || c == L'\n' || c == L'\r';
20 }
21
22 bool is_minus(wchar_t c) const { return c == L'-'; }
23 bool is_plusminus(wchar_t c) const { return c == L'+' || c == L'-'; }
24 bool is_dot(wchar_t c) const { return c == L'.'; }
25 bool is_eE(wchar_t c) const { return c == L'e' || c == L'E'; }
26 bool is_0(wchar_t c) const { return c == L'0'; }
27 bool is_digit(wchar_t c) const { return c >= L'0' && c <= L'9'; }
28 bool is_digit0(wchar_t c) const { return c >= L'1' && c <= L'9'; }
29
30 bool is_quote(wchar_t c) const { return c == L'"'; }
31 bool is_backslash(wchar_t c) const { return c == L'\\'; }
32 bool is_slash(wchar_t c) const { return c == L'/'; }
33
34 bool is_comma(wchar_t c) const { return c == L','; }
35 bool is_open_bracket(wchar_t c) const { return c == L'['; }
36 bool is_close_bracket(wchar_t c) const { return c == L']'; }
37 bool is_colon(wchar_t c) const { return c == L':'; }
38 bool is_open_brace(wchar_t c) const { return c == L'{'; }
39 bool is_close_brace(wchar_t c) const { return c == L'}'; }
40
41 bool is_a(wchar_t c) const { return c == L'a'; }
42 bool is_b(wchar_t c) const { return c == L'b'; }
43 bool is_e(wchar_t c) const { return c == L'e'; }
44 bool is_f(wchar_t c) const { return c == L'f'; }
45 bool is_l(wchar_t c) const { return c == L'l'; }
46 bool is_n(wchar_t c) const { return c == L'n'; }
47 bool is_r(wchar_t c) const { return c == L'r'; }
48 bool is_s(wchar_t c) const { return c == L's'; }
49 bool is_t(wchar_t c) const { return c == L't'; }
50 bool is_u(wchar_t c) const { return c == L'u'; }
51
52 int decode_hexdigit(wchar_t c) {
53 if (c >= L'0' && c <= L'9') return c - L'0';
54 if (c >= L'A' && c <= L'F') return c - L'A' + 10;
55 if (c >= L'a' && c <= L'f') return c - L'a' + 10;
56 return -1;
57 }
58 };
59
60 template <bool B> struct is_utf16 {};
61
62 class wide_wide_encoding : public external_wide_encoding
63 {
64 typedef is_utf16<sizeof(wchar_t) == 2> test_utf16;
65 public:
66 typedef wchar_t internal_char;
67
68 template <typename Iterator>
69 boost::iterator_range<Iterator>
70 to_internal(Iterator first, Iterator last) const {
71 return boost::make_iterator_range(first, last);
72 }
73
74 wchar_t to_internal_trivial(wchar_t c) const {
75 BOOST_ASSERT(!is_surrogate_high(c) && !is_surrogate_low(c));
76 return c;
77 }
78
79 template <typename Iterator, typename Sentinel,
80 typename EncodingErrorFn>
81 void skip_codepoint(Iterator& cur, Sentinel end,
82 EncodingErrorFn error_fn) const {
83 transcode_codepoint(cur, end, DoNothing(), error_fn);
84 }
85
86 template <typename Iterator, typename Sentinel, typename TranscodedFn,
87 typename EncodingErrorFn>
88 void transcode_codepoint(Iterator& cur, Sentinel end,
89 TranscodedFn transcoded_fn, EncodingErrorFn error_fn) const {
90 return transcode_codepoint(cur, end, transcoded_fn, error_fn,
91 test_utf16());
92 }
93
94 template <typename TranscodedFn>
95 void feed_codepoint(unsigned codepoint,
96 TranscodedFn transcoded_fn) const {
97 feed_codepoint(codepoint, transcoded_fn, test_utf16());
98 }
99
100 template <typename Iterator, typename Sentinel>
101 void skip_introduction(Iterator& cur, Sentinel end) const {
102 // Endianness is already decoded at this level.
103 if (cur != end && *cur == 0xfeff) {
104 ++cur;
105 }
106 }
107
108 private:
109 struct DoNothing {
110 void operator ()(wchar_t) const {}
111 };
112
113 template <typename Iterator, typename Sentinel, typename TranscodedFn,
114 typename EncodingErrorFn>
115 void transcode_codepoint(Iterator& cur, Sentinel,
116 TranscodedFn transcoded_fn,
117 EncodingErrorFn error_fn,
118 is_utf16<false>) const {
119 wchar_t c = *cur;
120 if (c < 0x20) {
121 error_fn();
122 }
123 transcoded_fn(c);
124 ++cur;
125 }
126 template <typename Iterator, typename Sentinel, typename TranscodedFn,
127 typename EncodingErrorFn>
128 void transcode_codepoint(Iterator& cur, Sentinel end,
129 TranscodedFn transcoded_fn,
130 EncodingErrorFn error_fn,
131 is_utf16<true>) const {
132 wchar_t c = *cur;
133 if (c < 0x20) {
134 error_fn();
135 }
136 if (is_surrogate_low(codepoint: c)) {
137 error_fn();
138 }
139 transcoded_fn(c);
140 ++cur;
141 if (is_surrogate_high(codepoint: c)) {
142 if (cur == end) {
143 error_fn();
144 }
145 c = *cur;
146 if (!is_surrogate_low(codepoint: c)) {
147 error_fn();
148 }
149 transcoded_fn(c);
150 ++cur;
151 }
152 }
153
154 template <typename TranscodedFn>
155 void feed_codepoint(unsigned codepoint, TranscodedFn transcoded_fn,
156 is_utf16<false>) const {
157 transcoded_fn(static_cast<wchar_t>(codepoint));
158 }
159 template <typename TranscodedFn>
160 void feed_codepoint(unsigned codepoint, TranscodedFn transcoded_fn,
161 is_utf16<true>) const {
162 if (codepoint < 0x10000) {
163 transcoded_fn(static_cast<wchar_t>(codepoint));
164 } else {
165 codepoint -= 0x10000;
166 transcoded_fn(static_cast<wchar_t>((codepoint >> 10) | 0xd800));
167 transcoded_fn(static_cast<wchar_t>(
168 (codepoint & 0x3ff) | 0xdc00));
169 }
170 }
171
172 static bool is_surrogate_high(unsigned codepoint) {
173 return (codepoint & 0xfc00) == 0xd800;
174 }
175 static bool is_surrogate_low(unsigned codepoint) {
176 return (codepoint & 0xfc00) == 0xdc00;
177 }
178 };
179
180}}}}
181
182#endif
183

source code of boost/libs/property_tree/include/boost/property_tree/json_parser/detail/wide_encoding.hpp