1 | /** |
2 | * @copyright |
3 | * ==================================================================== |
4 | * Licensed to the Apache Software Foundation (ASF) under one |
5 | * or more contributor license agreements. See the NOTICE file |
6 | * distributed with this work for additional information |
7 | * regarding copyright ownership. The ASF licenses this file |
8 | * to you under the Apache License, Version 2.0 (the |
9 | * "License"); you may not use this file except in compliance |
10 | * with the License. You may obtain a copy of the License at |
11 | * |
12 | * http://www.apache.org/licenses/LICENSE-2.0 |
13 | * |
14 | * Unless required by applicable law or agreed to in writing, |
15 | * software distributed under the License is distributed on an |
16 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
17 | * KIND, either express or implied. See the License for the |
18 | * specific language governing permissions and limitations |
19 | * under the License. |
20 | * ==================================================================== |
21 | * @endcopyright |
22 | * |
23 | * @file svn_string.h |
24 | * @brief Counted-length strings for Subversion, plus some C string goodies. |
25 | * |
26 | * There are two string datatypes: @c svn_string_t and @c svn_stringbuf_t. |
27 | * The former is a simple pointer/length pair useful for passing around |
28 | * strings (or arbitrary bytes) with a counted length. @c svn_stringbuf_t is |
29 | * buffered to enable efficient appending of strings without an allocation |
30 | * and copy for each append operation. |
31 | * |
32 | * @c svn_string_t contains a <tt>const char *</tt> for its data, so it is |
33 | * most appropriate for constant data and for functions which expect constant, |
34 | * counted data. Functions should generally use <tt>const @c svn_string_t |
35 | * *</tt> as their parameter to indicate they are expecting a constant, |
36 | * counted string. |
37 | * |
38 | * @c svn_stringbuf_t uses a plain <tt>char *</tt> for its data, so it is |
39 | * most appropriate for modifiable data. |
40 | * |
41 | * <h3>Invariants</h3> |
42 | * |
43 | * 1. Null termination: |
44 | * |
45 | * Both structures maintain a significant invariant: |
46 | * |
47 | * <tt>s->data[s->len] == '\\0'</tt> |
48 | * |
49 | * The functions defined within this header file will maintain |
50 | * the invariant (which does imply that memory is |
51 | * allocated/defined as @c len+1 bytes). If code outside of the |
52 | * @c svn_string.h functions manually builds these structures, |
53 | * then they must enforce this invariant. |
54 | * |
55 | * Note that an @c svn_string(buf)_t may contain binary data, |
56 | * which means that strlen(s->data) does not have to equal @c |
57 | * s->len. The null terminator is provided to make it easier to |
58 | * pass @c s->data to C string interfaces. |
59 | * |
60 | * |
61 | * 2. Non-NULL input: |
62 | * |
63 | * All the functions assume their input data pointer is non-NULL, |
64 | * unless otherwise documented, and may seg fault if passed |
65 | * NULL. The input data may *contain* null bytes, of course, just |
66 | * the data pointer itself must not be NULL. |
67 | * |
68 | * <h3>Memory allocation</h3> |
69 | * |
70 | * All the functions make a deep copy of all input data, and never store |
71 | * a pointer to the original input data. |
72 | */ |
73 | |
74 | |
75 | #ifndef SVN_STRING_H |
76 | #define SVN_STRING_H |
77 | |
78 | #include <apr.h> /* for apr_size_t */ |
79 | #include <apr_pools.h> /* for apr_pool_t */ |
80 | #include <apr_tables.h> /* for apr_array_header_t */ |
81 | |
82 | #include "svn_types.h" /* for svn_boolean_t, svn_error_t */ |
83 | |
84 | #ifdef __cplusplus |
85 | extern "C" { |
86 | #endif /* __cplusplus */ |
87 | |
88 | /** |
89 | * @defgroup svn_string String handling |
90 | * @{ |
91 | */ |
92 | |
93 | |
94 | |
95 | /** A simple counted string. */ |
96 | typedef struct svn_string_t |
97 | { |
98 | const char *data; /**< pointer to the bytestring */ |
99 | apr_size_t len; /**< length of bytestring */ |
100 | } svn_string_t; |
101 | |
102 | /** A buffered string, capable of appending without an allocation and copy |
103 | * for each append. */ |
104 | typedef struct svn_stringbuf_t |
105 | { |
106 | /** a pool from which this string was originally allocated, and is not |
107 | * necessarily specific to this string. This is used only for allocating |
108 | * more memory from when the string needs to grow. |
109 | */ |
110 | apr_pool_t *pool; |
111 | |
112 | /** pointer to the bytestring */ |
113 | char *data; |
114 | |
115 | /** length of bytestring */ |
116 | apr_size_t len; |
117 | |
118 | /** total size of buffer allocated */ |
119 | apr_size_t blocksize; |
120 | } svn_stringbuf_t; |
121 | |
122 | |
123 | /** |
124 | * @defgroup svn_string_svn_string_t svn_string_t functions |
125 | * @{ |
126 | */ |
127 | |
128 | /** Create a new string copied from the null-terminated C string @a cstring. |
129 | */ |
130 | svn_string_t * |
131 | svn_string_create(const char *cstring, apr_pool_t *pool); |
132 | |
133 | /** Create a new, empty string. |
134 | * |
135 | * @since New in 1.8. |
136 | */ |
137 | svn_string_t * |
138 | svn_string_create_empty(apr_pool_t *pool); |
139 | |
140 | /** Create a new string copied from a generic string of bytes, @a bytes, of |
141 | * length @a size bytes. @a bytes is NOT assumed to be null-terminated, but |
142 | * the new string will be. |
143 | */ |
144 | svn_string_t * |
145 | svn_string_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool); |
146 | |
147 | /** Create a new string copied from the stringbuf @a strbuf. |
148 | */ |
149 | svn_string_t * |
150 | svn_string_create_from_buf(const svn_stringbuf_t *strbuf, apr_pool_t *pool); |
151 | |
152 | /** Create a new string by printf-style formatting using @a fmt and the |
153 | * variable arguments, which are as appropriate for apr_psprintf(). |
154 | */ |
155 | svn_string_t * |
156 | svn_string_createf(apr_pool_t *pool, const char *fmt, ...) |
157 | __attribute__((format(printf, 2, 3))); |
158 | |
159 | /** Create a new string by printf-style formatting using @c fmt and @a ap. |
160 | * This is the same as svn_string_createf() except for the different |
161 | * way of passing the variable arguments. |
162 | */ |
163 | svn_string_t * |
164 | svn_string_createv(apr_pool_t *pool, const char *fmt, va_list ap) |
165 | __attribute__((format(printf, 2, 0))); |
166 | |
167 | /** Return TRUE if @a str is empty (has length zero). */ |
168 | svn_boolean_t |
169 | svn_string_isempty(const svn_string_t *str); |
170 | |
171 | /** Return a duplicate of @a original_string. */ |
172 | svn_string_t * |
173 | svn_string_dup(const svn_string_t *original_string, apr_pool_t *pool); |
174 | |
175 | /** Return @c TRUE iff @a str1 and @a str2 have identical length and data. */ |
176 | svn_boolean_t |
177 | svn_string_compare(const svn_string_t *str1, const svn_string_t *str2); |
178 | |
179 | /** Return offset of first non-whitespace character in @a str, or return |
180 | * @a str->len if none. |
181 | */ |
182 | apr_size_t |
183 | svn_string_first_non_whitespace(const svn_string_t *str); |
184 | |
185 | /** Return position of last occurrence of @a ch in @a str, or return |
186 | * @a str->len if no occurrence. |
187 | */ |
188 | apr_size_t |
189 | svn_string_find_char_backward(const svn_string_t *str, char ch); |
190 | |
191 | /** @} */ |
192 | |
193 | |
194 | /** |
195 | * @defgroup svn_string_svn_stringbuf_t svn_stringbuf_t functions |
196 | * @{ |
197 | */ |
198 | |
199 | /** Create a new stringbuf copied from the null-terminated C string |
200 | * @a cstring. |
201 | */ |
202 | svn_stringbuf_t * |
203 | svn_stringbuf_create(const char *cstring, apr_pool_t *pool); |
204 | |
205 | /** Create a new stringbuf copied from the generic string of bytes, @a bytes, |
206 | * of length @a size bytes. @a bytes is NOT assumed to be null-terminated, |
207 | * but the new stringbuf will be. |
208 | */ |
209 | svn_stringbuf_t * |
210 | svn_stringbuf_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool); |
211 | |
212 | /** Create a new, empty stringbuf. |
213 | * |
214 | * @since New in 1.8. |
215 | */ |
216 | svn_stringbuf_t * |
217 | svn_stringbuf_create_empty(apr_pool_t *pool); |
218 | |
219 | /** Create a new, empty stringbuf with at least @a minimum_size bytes of |
220 | * space available in the memory block. |
221 | * |
222 | * The allocated string buffer will be at least one byte larger than |
223 | * @a minimum_size to account for a final '\\0'. |
224 | * |
225 | * @since New in 1.6. |
226 | */ |
227 | svn_stringbuf_t * |
228 | svn_stringbuf_create_ensure(apr_size_t minimum_size, apr_pool_t *pool); |
229 | |
230 | /** Create a new stringbuf copied from the string @a str. |
231 | */ |
232 | svn_stringbuf_t * |
233 | svn_stringbuf_create_from_string(const svn_string_t *str, apr_pool_t *pool); |
234 | |
235 | /** Create a new stringbuf by printf-style formatting using @a fmt and the |
236 | * variable arguments, which are as appropriate for apr_psprintf(). |
237 | */ |
238 | svn_stringbuf_t * |
239 | svn_stringbuf_createf(apr_pool_t *pool, const char *fmt, ...) |
240 | __attribute__((format(printf, 2, 3))); |
241 | |
242 | /** Create a new stringbuf by printf-style formatting using @c fmt and @a ap. |
243 | * This is the same as svn_stringbuf_createf() except for the different |
244 | * way of passing the variable arguments. |
245 | */ |
246 | svn_stringbuf_t * |
247 | svn_stringbuf_createv(apr_pool_t *pool, const char *fmt, va_list ap) |
248 | __attribute__((format(printf, 2, 0))); |
249 | |
250 | /** Make sure that @a str has at least @a minimum_size |
251 | * bytes of space available in the memory block. |
252 | * |
253 | * The allocated string buffer will be at least one byte larger than |
254 | * @a minimum_size to account for a final '\\0'. |
255 | * |
256 | * @note: Before Subversion 1.8 this function did not ensure space for |
257 | * one byte more than @a minimum_size. If compatibility with pre-1.8 |
258 | * behaviour is required callers must assume space for only |
259 | * @a minimum_size-1 data bytes plus a final '\\0'. |
260 | */ |
261 | void |
262 | svn_stringbuf_ensure(svn_stringbuf_t *str, apr_size_t minimum_size); |
263 | |
264 | /** Set @a str to a copy of the null-terminated C string @a value. */ |
265 | void |
266 | svn_stringbuf_set(svn_stringbuf_t *str, const char *value); |
267 | |
268 | /** Set @a str to empty (zero length). */ |
269 | void |
270 | svn_stringbuf_setempty(svn_stringbuf_t *str); |
271 | |
272 | /** Return @c TRUE if @a str is empty (has length zero). */ |
273 | svn_boolean_t |
274 | svn_stringbuf_isempty(const svn_stringbuf_t *str); |
275 | |
276 | /** Chop @a nbytes bytes off end of @a str, but not more than @a str->len. */ |
277 | void |
278 | svn_stringbuf_chop(svn_stringbuf_t *str, apr_size_t nbytes); |
279 | |
280 | /** Fill @a str with character @a c. */ |
281 | void |
282 | svn_stringbuf_fillchar(svn_stringbuf_t *str, unsigned char c); |
283 | |
284 | /** Append the single character @a byte onto @a targetstr. |
285 | * |
286 | * This is an optimized version of svn_stringbuf_appendbytes() |
287 | * that is much faster to call and execute. Gains vary with the ABI. |
288 | * The advantages extend beyond the actual call because the reduced |
289 | * register pressure allows for more optimization within the caller. |
290 | * |
291 | * reallocs if necessary. @a targetstr is affected, nothing else is. |
292 | * @since New in 1.7. |
293 | */ |
294 | void |
295 | svn_stringbuf_appendbyte(svn_stringbuf_t *targetstr, |
296 | char byte); |
297 | |
298 | /** Append an array of bytes onto @a targetstr. |
299 | * |
300 | * reallocs if necessary. @a targetstr is affected, nothing else is. |
301 | */ |
302 | void |
303 | svn_stringbuf_appendbytes(svn_stringbuf_t *targetstr, |
304 | const char *bytes, |
305 | apr_size_t count); |
306 | |
307 | /** Append the stringbuf @c appendstr onto @a targetstr. |
308 | * |
309 | * reallocs if necessary. @a targetstr is affected, nothing else is. |
310 | */ |
311 | void |
312 | svn_stringbuf_appendstr(svn_stringbuf_t *targetstr, |
313 | const svn_stringbuf_t *appendstr); |
314 | |
315 | /** Append the C string @a cstr onto @a targetstr. |
316 | * |
317 | * reallocs if necessary. @a targetstr is affected, nothing else is. |
318 | */ |
319 | void |
320 | svn_stringbuf_appendcstr(svn_stringbuf_t *targetstr, |
321 | const char *cstr); |
322 | |
323 | /** Read @a count bytes from @a bytes and insert them into @a str at |
324 | * position @a pos and following. The resulting string will be |
325 | * @c count+str->len bytes long. If @c pos is larger or equal to the |
326 | * number of bytes currently used in @a str, simply append @a bytes. |
327 | * |
328 | * Reallocs if necessary. @a str is affected, nothing else is. |
329 | * |
330 | * @note The inserted string may be a sub-range if @a str. |
331 | * |
332 | * @since New in 1.8. |
333 | */ |
334 | void |
335 | svn_stringbuf_insert(svn_stringbuf_t *str, |
336 | apr_size_t pos, |
337 | const char *bytes, |
338 | apr_size_t count); |
339 | |
340 | /** Removes @a count bytes from @a str, starting at position @a pos. |
341 | * If that range exceeds the current string data, @a str gets truncated |
342 | * at @a pos. If the latter is larger or equal to @c str->pos, this will |
343 | * be a no-op. Otherwise, the resulting string will be @c str->len-count |
344 | * bytes long. |
345 | * |
346 | * @since New in 1.8. |
347 | */ |
348 | void |
349 | svn_stringbuf_remove(svn_stringbuf_t *str, |
350 | apr_size_t pos, |
351 | apr_size_t count); |
352 | |
353 | /** Replace in @a str the substring which starts at @a pos and is @a |
354 | * old_count bytes long with a new substring @a bytes (which is @a |
355 | * new_count bytes long). |
356 | * |
357 | * This is faster but functionally equivalent to the following sequence: |
358 | * @code |
359 | svn_stringbuf_remove(str, pos, old_count); |
360 | svn_stringbuf_insert(str, pos, bytes, new_count); |
361 | * @endcode |
362 | * |
363 | * @since New in 1.8. |
364 | */ |
365 | void |
366 | svn_stringbuf_replace(svn_stringbuf_t *str, |
367 | apr_size_t pos, |
368 | apr_size_t old_count, |
369 | const char *bytes, |
370 | apr_size_t new_count); |
371 | |
372 | /** Return a duplicate of @a original_string. */ |
373 | svn_stringbuf_t * |
374 | svn_stringbuf_dup(const svn_stringbuf_t *original_string, apr_pool_t *pool); |
375 | |
376 | /** Return @c TRUE iff @a str1 and @a str2 have identical length and data. */ |
377 | svn_boolean_t |
378 | svn_stringbuf_compare(const svn_stringbuf_t *str1, |
379 | const svn_stringbuf_t *str2); |
380 | |
381 | /** Return offset of first non-whitespace character in @a str, or return |
382 | * @a str->len if none. |
383 | */ |
384 | apr_size_t |
385 | svn_stringbuf_first_non_whitespace(const svn_stringbuf_t *str); |
386 | |
387 | /** Strip whitespace from both sides of @a str (modified in place). */ |
388 | void |
389 | svn_stringbuf_strip_whitespace(svn_stringbuf_t *str); |
390 | |
391 | /** Return position of last occurrence of @a ch in @a str, or return |
392 | * @a str->len if no occurrence. |
393 | */ |
394 | apr_size_t |
395 | svn_stringbuf_find_char_backward(const svn_stringbuf_t *str, char ch); |
396 | |
397 | /** Return @c TRUE iff @a str1 and @a str2 have identical length and data. */ |
398 | svn_boolean_t |
399 | svn_string_compare_stringbuf(const svn_string_t *str1, |
400 | const svn_stringbuf_t *str2); |
401 | |
402 | /** @} */ |
403 | |
404 | |
405 | /** |
406 | * @defgroup svn_string_cstrings C string functions |
407 | * @{ |
408 | */ |
409 | |
410 | /** Divide @a input into substrings along @a sep_chars boundaries, return an |
411 | * array of copies of those substrings (plain const char*), allocating both |
412 | * the array and the copies in @a pool. |
413 | * |
414 | * None of the elements added to the array contain any of the |
415 | * characters in @a sep_chars, and none of the new elements are empty |
416 | * (thus, it is possible that the returned array will have length |
417 | * zero). |
418 | * |
419 | * If @a chop_whitespace is TRUE, then remove leading and trailing |
420 | * whitespace from the returned strings. |
421 | */ |
422 | apr_array_header_t * |
423 | svn_cstring_split(const char *input, |
424 | const char *sep_chars, |
425 | svn_boolean_t chop_whitespace, |
426 | apr_pool_t *pool); |
427 | |
428 | /** Like svn_cstring_split(), but append to existing @a array instead of |
429 | * creating a new one. Allocate the copied substrings in @a pool |
430 | * (i.e., caller decides whether or not to pass @a array->pool as @a pool). |
431 | */ |
432 | void |
433 | svn_cstring_split_append(apr_array_header_t *array, |
434 | const char *input, |
435 | const char *sep_chars, |
436 | svn_boolean_t chop_whitespace, |
437 | apr_pool_t *pool); |
438 | |
439 | |
440 | /** Return @c TRUE iff @a str matches any of the elements of @a list, a list |
441 | * of zero or more glob patterns. |
442 | */ |
443 | svn_boolean_t |
444 | svn_cstring_match_glob_list(const char *str, const apr_array_header_t *list); |
445 | |
446 | /** Return @c TRUE iff @a str exactly matches any of the elements of @a list. |
447 | * |
448 | * @since new in 1.7 |
449 | */ |
450 | svn_boolean_t |
451 | svn_cstring_match_list(const char *str, const apr_array_header_t *list); |
452 | |
453 | /** |
454 | * Get the next token from @a *str interpreting any char from @a sep as a |
455 | * token separator. Separators at the beginning of @a str will be skipped. |
456 | * Returns a pointer to the beginning of the first token in @a *str or NULL |
457 | * if no token is left. Modifies @a str such that the next call will return |
458 | * the next token. |
459 | * |
460 | * @note The content of @a *str may be modified by this function. |
461 | * |
462 | * @since New in 1.8. |
463 | */ |
464 | char * |
465 | svn_cstring_tokenize(const char *sep, char **str); |
466 | |
467 | /** |
468 | * Return the number of line breaks in @a msg, allowing any kind of newline |
469 | * termination (CR, LF, CRLF, or LFCR), even inconsistent. |
470 | * |
471 | * @since New in 1.2. |
472 | */ |
473 | int |
474 | svn_cstring_count_newlines(const char *msg); |
475 | |
476 | /** |
477 | * Return a cstring which is the concatenation of @a strings (an array |
478 | * of char *) each followed by @a separator (that is, @a separator |
479 | * will also end the resulting string). Allocate the result in @a pool. |
480 | * If @a strings is empty, then return the empty string. |
481 | * |
482 | * @since New in 1.2. |
483 | */ |
484 | char * |
485 | svn_cstring_join(const apr_array_header_t *strings, |
486 | const char *separator, |
487 | apr_pool_t *pool); |
488 | |
489 | /** |
490 | * Compare two strings @a atr1 and @a atr2, treating case-equivalent |
491 | * unaccented Latin (ASCII subset) letters as equal. |
492 | * |
493 | * Returns in integer greater than, equal to, or less than 0, |
494 | * according to whether @a str1 is considered greater than, equal to, |
495 | * or less than @a str2. |
496 | * |
497 | * @since New in 1.5. |
498 | */ |
499 | int |
500 | svn_cstring_casecmp(const char *str1, const char *str2); |
501 | |
502 | /** |
503 | * Parse the C string @a str into a 64 bit number, and return it in @a *n. |
504 | * Assume that the number is represented in base @a base. |
505 | * Raise an error if conversion fails (e.g. due to overflow), or if the |
506 | * converted number is smaller than @a minval or larger than @a maxval. |
507 | * |
508 | * @since New in 1.7. |
509 | */ |
510 | svn_error_t * |
511 | svn_cstring_strtoi64(apr_int64_t *n, const char *str, |
512 | apr_int64_t minval, apr_int64_t maxval, |
513 | int base); |
514 | |
515 | /** |
516 | * Parse the C string @a str into a 64 bit number, and return it in @a *n. |
517 | * Assume that the number is represented in base 10. |
518 | * Raise an error if conversion fails (e.g. due to overflow). |
519 | * |
520 | * @since New in 1.7. |
521 | */ |
522 | svn_error_t * |
523 | svn_cstring_atoi64(apr_int64_t *n, const char *str); |
524 | |
525 | /** |
526 | * Parse the C string @a str into a 32 bit number, and return it in @a *n. |
527 | * Assume that the number is represented in base 10. |
528 | * Raise an error if conversion fails (e.g. due to overflow). |
529 | * |
530 | * @since New in 1.7. |
531 | */ |
532 | svn_error_t * |
533 | svn_cstring_atoi(int *n, const char *str); |
534 | |
535 | /** |
536 | * Parse the C string @a str into an unsigned 64 bit number, and return |
537 | * it in @a *n. Assume that the number is represented in base @a base. |
538 | * Raise an error if conversion fails (e.g. due to overflow), or if the |
539 | * converted number is smaller than @a minval or larger than @a maxval. |
540 | * |
541 | * @since New in 1.7. |
542 | */ |
543 | svn_error_t * |
544 | svn_cstring_strtoui64(apr_uint64_t *n, const char *str, |
545 | apr_uint64_t minval, apr_uint64_t maxval, |
546 | int base); |
547 | |
548 | /** |
549 | * Parse the C string @a str into an unsigned 64 bit number, and return |
550 | * it in @a *n. Assume that the number is represented in base 10. |
551 | * Raise an error if conversion fails (e.g. due to overflow). |
552 | * |
553 | * @since New in 1.7. |
554 | */ |
555 | svn_error_t * |
556 | svn_cstring_atoui64(apr_uint64_t *n, const char *str); |
557 | |
558 | /** |
559 | * Parse the C string @a str into an unsigned 32 bit number, and return |
560 | * it in @a *n. Assume that the number is represented in base 10. |
561 | * Raise an error if conversion fails (e.g. due to overflow). |
562 | * |
563 | * @since New in 1.7. |
564 | */ |
565 | svn_error_t * |
566 | svn_cstring_atoui(unsigned int *n, const char *str); |
567 | |
568 | /** @} */ |
569 | |
570 | /** @} */ |
571 | |
572 | |
573 | #ifdef __cplusplus |
574 | } |
575 | #endif /* __cplusplus */ |
576 | |
577 | #endif /* SVN_STRING_H */ |
578 | |