1 | /* |
2 | * |
3 | * Copyright (c) 2004 |
4 | * John Maddock |
5 | * |
6 | * Use, modification and distribution are subject to the |
7 | * Boost Software License, Version 1.0. (See accompanying file |
8 | * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
9 | * |
10 | */ |
11 | |
12 | /* |
13 | * LOCATION: see http://www.boost.org for most recent version. |
14 | * FILE regex_traits_defaults.hpp |
15 | * VERSION see <boost/version.hpp> |
16 | * DESCRIPTION: Declares API's for access to regex_traits default properties. |
17 | */ |
18 | |
19 | #ifndef BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED |
20 | #define BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED |
21 | |
22 | #ifdef BOOST_MSVC |
23 | #pragma warning(push) |
24 | #pragma warning(disable: 4103) |
25 | #endif |
26 | #ifdef BOOST_HAS_ABI_HEADERS |
27 | # include BOOST_ABI_PREFIX |
28 | #endif |
29 | #ifdef BOOST_MSVC |
30 | #pragma warning(pop) |
31 | #endif |
32 | |
33 | #include <boost/regex/config.hpp> |
34 | |
35 | #ifndef BOOST_REGEX_SYNTAX_TYPE_HPP |
36 | #include <boost/regex/v4/syntax_type.hpp> |
37 | #endif |
38 | #ifndef BOOST_REGEX_ERROR_TYPE_HPP |
39 | #include <boost/regex/v4/error_type.hpp> |
40 | #endif |
41 | #include <boost/type_traits/make_unsigned.hpp> |
42 | |
43 | #ifdef BOOST_NO_STDC_NAMESPACE |
44 | namespace std{ |
45 | using ::strlen; |
46 | } |
47 | #endif |
48 | |
49 | namespace boost{ namespace BOOST_REGEX_DETAIL_NS{ |
50 | |
51 | |
52 | // |
53 | // helpers to suppress warnings: |
54 | // |
55 | template <class charT> |
56 | inline bool is_extended(charT c) |
57 | { |
58 | typedef typename make_unsigned<charT>::type unsigned_type; |
59 | return (sizeof(charT) > 1) && (static_cast<unsigned_type>(c) >= 256u); |
60 | } |
61 | inline bool is_extended(char) |
62 | { return false; } |
63 | |
64 | |
65 | BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_syntax(regex_constants::syntax_type n); |
66 | BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_error_string(regex_constants::error_type n); |
67 | BOOST_REGEX_DECL regex_constants::syntax_type BOOST_REGEX_CALL get_default_syntax_type(char c); |
68 | BOOST_REGEX_DECL regex_constants::escape_syntax_type BOOST_REGEX_CALL get_default_escape_syntax_type(char c); |
69 | |
70 | // is charT c a combining character? |
71 | BOOST_REGEX_DECL bool BOOST_REGEX_CALL is_combining_implementation(uint_least16_t s); |
72 | |
73 | template <class charT> |
74 | inline bool is_combining(charT c) |
75 | { |
76 | return (c <= static_cast<charT>(0)) ? false : ((c >= static_cast<charT>((std::numeric_limits<uint_least16_t>::max)())) ? false : is_combining_implementation(s: static_cast<unsigned short>(c))); |
77 | } |
78 | template <> |
79 | inline bool is_combining<char>(char) |
80 | { |
81 | return false; |
82 | } |
83 | template <> |
84 | inline bool is_combining<signed char>(signed char) |
85 | { |
86 | return false; |
87 | } |
88 | template <> |
89 | inline bool is_combining<unsigned char>(unsigned char) |
90 | { |
91 | return false; |
92 | } |
93 | #if !defined(__hpux) && !defined(__WINSCW__) // can't use WCHAR_MAX/MIN in pp-directives |
94 | #ifdef _MSC_VER |
95 | template<> |
96 | inline bool is_combining<wchar_t>(wchar_t c) |
97 | { |
98 | return is_combining_implementation(static_cast<unsigned short>(c)); |
99 | } |
100 | #elif !defined(__DECCXX) && !defined(__osf__) && !defined(__OSF__) && defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T) |
101 | #if defined(WCHAR_MAX) && (WCHAR_MAX <= USHRT_MAX) |
102 | template<> |
103 | inline bool is_combining<wchar_t>(wchar_t c) |
104 | { |
105 | return is_combining_implementation(static_cast<unsigned short>(c)); |
106 | } |
107 | #else |
108 | template<> |
109 | inline bool is_combining<wchar_t>(wchar_t c) |
110 | { |
111 | return (c >= (std::numeric_limits<uint_least16_t>::max)()) ? false : is_combining_implementation(static_cast<unsigned short>(c)); |
112 | } |
113 | #endif |
114 | #endif |
115 | #endif |
116 | |
117 | // |
118 | // is a charT c a line separator? |
119 | // |
120 | template <class charT> |
121 | inline bool is_separator(charT c) |
122 | { |
123 | return BOOST_REGEX_MAKE_BOOL( |
124 | (c == static_cast<charT>('\n')) |
125 | || (c == static_cast<charT>('\r')) |
126 | || (c == static_cast<charT>('\f')) |
127 | || (static_cast<boost::uint16_t>(c) == 0x2028u) |
128 | || (static_cast<boost::uint16_t>(c) == 0x2029u) |
129 | || (static_cast<boost::uint16_t>(c) == 0x85u)); |
130 | } |
131 | template <> |
132 | inline bool is_separator<char>(char c) |
133 | { |
134 | return BOOST_REGEX_MAKE_BOOL((c == '\n') || (c == '\r') || (c == '\f')); |
135 | } |
136 | |
137 | // |
138 | // get a default collating element: |
139 | // |
140 | BOOST_REGEX_DECL std::string BOOST_REGEX_CALL lookup_default_collate_name(const std::string& name); |
141 | |
142 | // |
143 | // get the state_id of a character clasification, the individual |
144 | // traits classes then transform that state_id into a bitmask: |
145 | // |
146 | template <class charT> |
147 | struct character_pointer_range |
148 | { |
149 | const charT* p1; |
150 | const charT* p2; |
151 | |
152 | bool operator < (const character_pointer_range& r)const |
153 | { |
154 | return std::lexicographical_compare(p1, p2, r.p1, r.p2); |
155 | } |
156 | bool operator == (const character_pointer_range& r)const |
157 | { |
158 | // Not only do we check that the ranges are of equal size before |
159 | // calling std::equal, but there is no other algorithm available: |
160 | // not even a non-standard MS one. So forward to unchecked_equal |
161 | // in the MS case. |
162 | return ((p2 - p1) == (r.p2 - r.p1)) && BOOST_REGEX_DETAIL_NS::equal(p1, p2, r.p1); |
163 | } |
164 | }; |
165 | template <class charT> |
166 | int get_default_class_id(const charT* p1, const charT* p2) |
167 | { |
168 | static const charT data[73] = { |
169 | 'a', 'l', 'n', 'u', 'm', |
170 | 'a', 'l', 'p', 'h', 'a', |
171 | 'b', 'l', 'a', 'n', 'k', |
172 | 'c', 'n', 't', 'r', 'l', |
173 | 'd', 'i', 'g', 'i', 't', |
174 | 'g', 'r', 'a', 'p', 'h', |
175 | 'l', 'o', 'w', 'e', 'r', |
176 | 'p', 'r', 'i', 'n', 't', |
177 | 'p', 'u', 'n', 'c', 't', |
178 | 's', 'p', 'a', 'c', 'e', |
179 | 'u', 'n', 'i', 'c', 'o', 'd', 'e', |
180 | 'u', 'p', 'p', 'e', 'r', |
181 | 'v', |
182 | 'w', 'o', 'r', 'd', |
183 | 'x', 'd', 'i', 'g', 'i', 't', |
184 | }; |
185 | |
186 | static const character_pointer_range<charT> ranges[21] = |
187 | { |
188 | {data+0, data+5,}, // alnum |
189 | {data+5, data+10,}, // alpha |
190 | {data+10, data+15,}, // blank |
191 | {data+15, data+20,}, // cntrl |
192 | {data+20, data+21,}, // d |
193 | {data+20, data+25,}, // digit |
194 | {data+25, data+30,}, // graph |
195 | {data+29, data+30,}, // h |
196 | {data+30, data+31,}, // l |
197 | {data+30, data+35,}, // lower |
198 | {data+35, data+40,}, // print |
199 | {data+40, data+45,}, // punct |
200 | {data+45, data+46,}, // s |
201 | {data+45, data+50,}, // space |
202 | {data+57, data+58,}, // u |
203 | {data+50, data+57,}, // unicode |
204 | {data+57, data+62,}, // upper |
205 | {data+62, data+63,}, // v |
206 | {data+63, data+64,}, // w |
207 | {data+63, data+67,}, // word |
208 | {data+67, data+73,}, // xdigit |
209 | }; |
210 | static const character_pointer_range<charT>* ranges_begin = ranges; |
211 | static const character_pointer_range<charT>* ranges_end = ranges + (sizeof(ranges)/sizeof(ranges[0])); |
212 | |
213 | character_pointer_range<charT> t = { p1, p2, }; |
214 | const character_pointer_range<charT>* p = std::lower_bound(ranges_begin, ranges_end, t); |
215 | if((p != ranges_end) && (t == *p)) |
216 | return static_cast<int>(p - ranges); |
217 | return -1; |
218 | } |
219 | |
220 | // |
221 | // helper functions: |
222 | // |
223 | template <class charT> |
224 | std::ptrdiff_t global_length(const charT* p) |
225 | { |
226 | std::ptrdiff_t n = 0; |
227 | while(*p) |
228 | { |
229 | ++p; |
230 | ++n; |
231 | } |
232 | return n; |
233 | } |
234 | template<> |
235 | inline std::ptrdiff_t global_length<char>(const char* p) |
236 | { |
237 | return (std::strlen)(s: p); |
238 | } |
239 | #ifndef BOOST_NO_WREGEX |
240 | template<> |
241 | inline std::ptrdiff_t global_length<wchar_t>(const wchar_t* p) |
242 | { |
243 | return (std::wcslen)(s: p); |
244 | } |
245 | #endif |
246 | template <class charT> |
247 | inline charT BOOST_REGEX_CALL global_lower(charT c) |
248 | { |
249 | return c; |
250 | } |
251 | template <class charT> |
252 | inline charT BOOST_REGEX_CALL global_upper(charT c) |
253 | { |
254 | return c; |
255 | } |
256 | |
257 | BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_lower(char c); |
258 | BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_upper(char c); |
259 | #ifndef BOOST_NO_WREGEX |
260 | BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_lower(wchar_t c); |
261 | BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_upper(wchar_t c); |
262 | #endif |
263 | #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T |
264 | BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_lower(unsigned short c); |
265 | BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_upper(unsigned short c); |
266 | #endif |
267 | // |
268 | // This sucks: declare template specialisations of global_lower/global_upper |
269 | // that just forward to the non-template implementation functions. We do |
270 | // this because there is one compiler (Compaq Tru64 C++) that doesn't seem |
271 | // to differentiate between templates and non-template overloads.... |
272 | // what's more, the primary template, plus all overloads have to be |
273 | // defined in the same translation unit (if one is inline they all must be) |
274 | // otherwise the "local template instantiation" compiler option can pick |
275 | // the wrong instantiation when linking: |
276 | // |
277 | template<> inline char BOOST_REGEX_CALL global_lower<char>(char c){ return do_global_lower(c); } |
278 | template<> inline char BOOST_REGEX_CALL global_upper<char>(char c){ return do_global_upper(c); } |
279 | #ifndef BOOST_NO_WREGEX |
280 | template<> inline wchar_t BOOST_REGEX_CALL global_lower<wchar_t>(wchar_t c){ return do_global_lower(c); } |
281 | template<> inline wchar_t BOOST_REGEX_CALL global_upper<wchar_t>(wchar_t c){ return do_global_upper(c); } |
282 | #endif |
283 | #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T |
284 | template<> inline unsigned short BOOST_REGEX_CALL global_lower<unsigned short>(unsigned short c){ return do_global_lower(c); } |
285 | template<> inline unsigned short BOOST_REGEX_CALL global_upper<unsigned short>(unsigned short c){ return do_global_upper(c); } |
286 | #endif |
287 | |
288 | template <class charT> |
289 | int global_value(charT c) |
290 | { |
291 | static const charT zero = '0'; |
292 | static const charT nine = '9'; |
293 | static const charT a = 'a'; |
294 | static const charT f = 'f'; |
295 | static const charT A = 'A'; |
296 | static const charT F = 'F'; |
297 | |
298 | if(c > f) return -1; |
299 | if(c >= a) return 10 + (c - a); |
300 | if(c > F) return -1; |
301 | if(c >= A) return 10 + (c - A); |
302 | if(c > nine) return -1; |
303 | if(c >= zero) return c - zero; |
304 | return -1; |
305 | } |
306 | template <class charT, class traits> |
307 | int global_toi(const charT*& p1, const charT* p2, int radix, const traits& t) |
308 | { |
309 | (void)t; // warning suppression |
310 | int next_value = t.value(*p1, radix); |
311 | if((p1 == p2) || (next_value < 0) || (next_value >= radix)) |
312 | return -1; |
313 | int result = 0; |
314 | while(p1 != p2) |
315 | { |
316 | next_value = t.value(*p1, radix); |
317 | if((next_value < 0) || (next_value >= radix)) |
318 | break; |
319 | result *= radix; |
320 | result += next_value; |
321 | ++p1; |
322 | } |
323 | return result; |
324 | } |
325 | |
326 | template <class charT> |
327 | inline const charT* get_escape_R_string() |
328 | { |
329 | #ifdef BOOST_MSVC |
330 | # pragma warning(push) |
331 | # pragma warning(disable:4309 4245) |
332 | #endif |
333 | static const charT e1[] = { '(', '?', '>', '\x0D', '\x0A', '?', |
334 | '|', '[', '\x0A', '\x0B', '\x0C', static_cast<unsigned char>('\x85'), '\\', 'x', '{', '2', '0', '2', '8', '}', |
335 | '\\', 'x', '{', '2', '0', '2', '9', '}', ']', ')', '\0' }; |
336 | static const charT e2[] = { '(', '?', '>', '\x0D', '\x0A', '?', |
337 | '|', '[', '\x0A', '\x0B', '\x0C', static_cast<unsigned char>('\x85'), ']', ')', '\0' }; |
338 | |
339 | charT c = static_cast<charT>(0x2029u); |
340 | bool b = (static_cast<unsigned>(c) == 0x2029u); |
341 | |
342 | return (b ? e1 : e2); |
343 | #ifdef BOOST_MSVC |
344 | # pragma warning(pop) |
345 | #endif |
346 | } |
347 | |
348 | template <> |
349 | inline const char* get_escape_R_string<char>() |
350 | { |
351 | #ifdef BOOST_MSVC |
352 | # pragma warning(push) |
353 | # pragma warning(disable:4309) |
354 | #endif |
355 | static const char e2[] = { '(', '?', '>', '\x0D', '\x0A', '?', |
356 | '|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', '\0' }; |
357 | return e2; |
358 | #ifdef BOOST_MSVC |
359 | # pragma warning(pop) |
360 | #endif |
361 | } |
362 | |
363 | } // BOOST_REGEX_DETAIL_NS |
364 | } // boost |
365 | |
366 | #ifdef BOOST_MSVC |
367 | #pragma warning(push) |
368 | #pragma warning(disable: 4103) |
369 | #endif |
370 | #ifdef BOOST_HAS_ABI_HEADERS |
371 | # include BOOST_ABI_SUFFIX |
372 | #endif |
373 | #ifdef BOOST_MSVC |
374 | #pragma warning(pop) |
375 | #endif |
376 | |
377 | #endif |
378 | |