regex_internal.c source code [glibc/posix/regex_internal.c]

1	/ Extended regular expression matching and search library.*
2	Copyright (C) 2002-2024 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	static void re_string_construct_common (const char *str, Idx len,
21	re_string_t *pstr,
22	RE_TRANSLATE_TYPE trans, bool icase,
23	const re_dfa_t *dfa);
24	static re_dfastate_t create_ci_newstate (const* re_dfa_t *dfa,
25	const re_node_set *nodes,
26	re_hashval_t hash);
27	static re_dfastate_t create_cd_newstate (const* re_dfa_t *dfa,
28	const re_node_set *nodes,
29	unsigned int context,
30	re_hashval_t hash);
31	static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
32	Idx new_buf_len);
33	#ifdef RE_ENABLE_I18N
34	static void build_wcs_buffer (re_string_t *pstr);
35	static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr);
36	#endif /* RE_ENABLE_I18N */
37	static void build_upper_buffer (re_string_t *pstr);
38	static void re_string_translate_buffer (re_string_t *pstr);
39	static unsigned int re_string_context_at (const re_string_t *input, Idx idx,
40	int eflags) __attribute__ ((pure));
41
42	/ Functions for string operation. /
43
44	/ This function allocate the buffers. It is necessary to call*
45	re_string_reconstruct before using the object. /*
46
47	static reg_errcode_t
48	__attribute_warn_unused_result__
49	re_string_allocate (re_string_t pstr, const* char *str, Idx len, Idx init_len,
50	RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
51	{
52	reg_errcode_t ret;
53	Idx init_buf_len;
54
55	/ Ensure at least one character fits into the buffers. /
56	if (init_len < dfa->mb_cur_max)
57	init_len = dfa->mb_cur_max;
58	init_buf_len = (len + `1` < init_len) ? len + `1`: init_len;
59	re_string_construct_common (str, len, pstr, trans, icase, dfa);
60
61	ret = re_string_realloc_buffers (pstr, new_buf_len: init_buf_len);
62	if (__glibc_unlikely (ret != REG_NOERROR))
63	return ret;
64
65	pstr->word_char = dfa->word_char;
66	pstr->word_ops_used = dfa->word_ops_used;
67	pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
68	pstr->valid_len = (pstr->mbs_allocated \|\| dfa->mb_cur_max > `1`) ? `0` : len;
69	pstr->valid_raw_len = pstr->valid_len;
70	return REG_NOERROR;
71	}
72
73	/ This function allocate the buffers, and initialize them. /
74
75	static reg_errcode_t
76	__attribute_warn_unused_result__
77	re_string_construct (re_string_t pstr, const* char *str, Idx len,
78	RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
79	{
80	reg_errcode_t ret;
81	memset (pstr, `'\0'`, sizeof (re_string_t));
82	re_string_construct_common (str, len, pstr, trans, icase, dfa);
83
84	if (len > `0`)
85	{
86	ret = re_string_realloc_buffers (pstr, new_buf_len: len + `1`);
87	if (__glibc_unlikely (ret != REG_NOERROR))
88	return ret;
89	}
90	pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
91
92	if (icase)
93	{
94	#ifdef RE_ENABLE_I18N
95	if (dfa->mb_cur_max > `1`)
96	{
97	while (`1`)
98	{
99	ret = build_wcs_upper_buffer (pstr);
100	if (__glibc_unlikely (ret != REG_NOERROR))
101	return ret;
102	if (pstr->valid_raw_len >= len)
103	break;
104	if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
105	break;
106	ret = re_string_realloc_buffers (pstr, new_buf_len: pstr->bufs_len * `2`);
107	if (__glibc_unlikely (ret != REG_NOERROR))
108	return ret;
109	}
110	}
111	else
112	#endif /* RE_ENABLE_I18N */
113	build_upper_buffer (pstr);
114	}
115	else
116	{
117	#ifdef RE_ENABLE_I18N
118	if (dfa->mb_cur_max > `1`)
119	build_wcs_buffer (pstr);
120	else
121	#endif /* RE_ENABLE_I18N */
122	{
123	if (trans != NULL)
124	re_string_translate_buffer (pstr);
125	else
126	{
127	pstr->valid_len = pstr->bufs_len;
128	pstr->valid_raw_len = pstr->bufs_len;
129	}
130	}
131	}
132
133	return REG_NOERROR;
134	}
135
136	/ Helper functions for re_string_allocate, and re_string_construct. /
137
138	static reg_errcode_t
139	__attribute_warn_unused_result__
140	re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
141	{
142	#ifdef RE_ENABLE_I18N
143	if (pstr->mb_cur_max > `1`)
144	{
145	wint_t *new_wcs;
146
147	/ Avoid overflow in realloc. /
148	const size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
149	if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size)
150	< new_buf_len))
151	return REG_ESPACE;
152
153	new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
154	if (__glibc_unlikely (new_wcs == NULL))
155	return REG_ESPACE;
156	pstr->wcs = new_wcs;
157	if (pstr->offsets != NULL)
158	{
159	Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
160	if (__glibc_unlikely (new_offsets == NULL))
161	return REG_ESPACE;
162	pstr->offsets = new_offsets;
163	}
164	}
165	#endif /* RE_ENABLE_I18N */
166	if (pstr->mbs_allocated)
167	{
168	unsigned char new_mbs = re_realloc (pstr->mbs, unsigned* char,
169	new_buf_len);
170	if (__glibc_unlikely (new_mbs == NULL))
171	return REG_ESPACE;
172	pstr->mbs = new_mbs;
173	}
174	pstr->bufs_len = new_buf_len;
175	return REG_NOERROR;
176	}
177
178
179	static void
180	re_string_construct_common (const char str, Idx len, re_string_t pstr,
181	RE_TRANSLATE_TYPE trans, bool icase,
182	const re_dfa_t *dfa)
183	{
184	pstr->raw_mbs = (const unsigned char *) str;
185	pstr->len = len;
186	pstr->raw_len = len;
187	pstr->trans = trans;
188	pstr->icase = icase;
189	pstr->mbs_allocated = (trans != NULL \|\| icase);
190	pstr->mb_cur_max = dfa->mb_cur_max;
191	pstr->is_utf8 = dfa->is_utf8;
192	pstr->map_notascii = dfa->map_notascii;
193	pstr->stop = pstr->len;
194	pstr->raw_stop = pstr->stop;
195	}
196
197	#ifdef RE_ENABLE_I18N
198
199	/ Build wide character buffer PSTR->WCS.*
200	If the byte sequence of the string are:
201	<mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
202	Then wide character buffer will be:
203	<wc1> , WEOF , <wc2> , WEOF , <wc3>
204	We use WEOF for padding, they indicate that the position isn't
205	a first byte of a multibyte character.
206
207	Note that this function assumes PSTR->VALID_LEN elements are already
208	built and starts from PSTR->VALID_LEN. /*
209
210	static void
211	build_wcs_buffer (re_string_t *pstr)
212	{
213	#ifdef _LIBC
214	unsigned char buf[MB_LEN_MAX];
215	DEBUG_ASSERT (MB_LEN_MAX >= pstr->mb_cur_max);
216	#else
217	unsigned char buf[`64`];
218	#endif
219	mbstate_t prev_st;
220	Idx byte_idx, end_idx, remain_len;
221	size_t mbclen;
222
223	/ Build the buffers from pstr->valid_len to either pstr->len or*
224	pstr->bufs_len. /*
225	end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
226	for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
227	{
228	wchar_t wc;
229	const char *p;
230
231	remain_len = end_idx - byte_idx;
232	prev_st = pstr->cur_state;
233	/ Apply the translation if we need. /
234	if (__glibc_unlikely (pstr->trans != NULL))
235	{
236	int i, ch;
237
238	for (i = `0`; i < pstr->mb_cur_max && i < remain_len; ++i)
239	{
240	ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
241	buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
242	}
243	p = (const char *) buf;
244	}
245	else
246	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
247	mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
248	if (__glibc_unlikely (mbclen == (size_t) -`1` \|\| mbclen == `0`
249	\|\| (mbclen == (size_t) -`2`
250	&& pstr->bufs_len >= pstr->len)))
251	{
252	/ We treat these cases as a singlebyte character. /
253	mbclen = `1`;
254	wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
255	if (__glibc_unlikely (pstr->trans != NULL))
256	wc = pstr->trans[wc];
257	pstr->cur_state = prev_st;
258	}
259	else if (__glibc_unlikely (mbclen == (size_t) -`2`))
260	{
261	/ The buffer doesn't have enough space, finish to build. /
262	pstr->cur_state = prev_st;
263	break;
264	}
265
266	/ Write wide character and padding. /
267	pstr->wcs[byte_idx++] = wc;
268	/ Write paddings. /
269	for (remain_len = byte_idx + mbclen - `1`; byte_idx < remain_len ;)
270	pstr->wcs[byte_idx++] = WEOF;
271	}
272	pstr->valid_len = byte_idx;
273	pstr->valid_raw_len = byte_idx;
274	}
275
276	/ Build wide character buffer PSTR->WCS like build_wcs_buffer,*
277	but for REG_ICASE. /*
278
279	static reg_errcode_t
280	__attribute_warn_unused_result__
281	build_wcs_upper_buffer (re_string_t *pstr)
282	{
283	mbstate_t prev_st;
284	Idx src_idx, byte_idx, end_idx, remain_len;
285	size_t mbclen;
286	#ifdef _LIBC
287	char buf[MB_LEN_MAX];
288	DEBUG_ASSERT (pstr->mb_cur_max <= MB_LEN_MAX);
289	#else
290	char buf[`64`];
291	#endif
292
293	byte_idx = pstr->valid_len;
294	end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
295
296	/ The following optimization assumes that ASCII characters can be*
297	mapped to wide characters with a simple cast. /*
298	if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
299	{
300	while (byte_idx < end_idx)
301	{
302	wchar_t wc;
303	unsigned char ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
304
305	if (isascii (ch) && mbsinit (&pstr->cur_state))
306	{
307	/ The next step uses the assumption that wchar_t is encoded*
308	ASCII-safe: all ASCII values can be converted like this. /*
309	wchar_t wcu = __towupper (ch);
310	if (isascii (wcu))
311	{
312	pstr->mbs[byte_idx] = wcu;
313	pstr->wcs[byte_idx] = wcu;
314	byte_idx++;
315	continue;
316	}
317	}
318
319	remain_len = end_idx - byte_idx;
320	prev_st = pstr->cur_state;
321	mbclen = __mbrtowc (&wc,
322	((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
323	+ byte_idx), remain_len, &pstr->cur_state);
324	if (__glibc_likely (`0` < mbclen && mbclen < (size_t) -`2`))
325	{
326	wchar_t wcu = __towupper (wc);
327	if (wcu != wc)
328	{
329	size_t mbcdlen;
330
331	mbcdlen = __wcrtomb (s: buf, wc: wcu, ps: &prev_st);
332	if (__glibc_likely (mbclen == mbcdlen))
333	memcpy (pstr->mbs + byte_idx, buf, mbclen);
334	else
335	{
336	src_idx = byte_idx;
337	goto offsets_needed;
338	}
339	}
340	else
341	memcpy (pstr->mbs + byte_idx,
342	pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
343	pstr->wcs[byte_idx++] = wcu;
344	/ Write paddings. /
345	for (remain_len = byte_idx + mbclen - `1`; byte_idx < remain_len ;)
346	pstr->wcs[byte_idx++] = WEOF;
347	}
348	else if (mbclen == (size_t) -`1` \|\| mbclen == `0`
349	\|\| (mbclen == (size_t) -`2` && pstr->bufs_len >= pstr->len))
350	{
351	/ It is an invalid character, an incomplete character*
352	at the end of the string, or '\0'. Just use the byte. /*
353	pstr->mbs[byte_idx] = ch;
354	/ And also cast it to wide char. /
355	pstr->wcs[byte_idx++] = (wchar_t) ch;
356	if (__glibc_unlikely (mbclen == (size_t) -`1`))
357	pstr->cur_state = prev_st;
358	}
359	else
360	{
361	/ The buffer doesn't have enough space, finish to build. /
362	pstr->cur_state = prev_st;
363	break;
364	}
365	}
366	pstr->valid_len = byte_idx;
367	pstr->valid_raw_len = byte_idx;
368	return REG_NOERROR;
369	}
370	else
371	for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
372	{
373	wchar_t wc;
374	const char *p;
375	offsets_needed:
376	remain_len = end_idx - byte_idx;
377	prev_st = pstr->cur_state;
378	if (__glibc_unlikely (pstr->trans != NULL))
379	{
380	int i, ch;
381
382	for (i = `0`; i < pstr->mb_cur_max && i < remain_len; ++i)
383	{
384	ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
385	buf[i] = pstr->trans[ch];
386	}
387	p = (const char *) buf;
388	}
389	else
390	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
391	mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
392	if (__glibc_likely (`0` < mbclen && mbclen < (size_t) -`2`))
393	{
394	wchar_t wcu = __towupper (wc);
395	if (wcu != wc)
396	{
397	size_t mbcdlen;
398
399	mbcdlen = __wcrtomb (s: (char *) buf, wc: wcu, ps: &prev_st);
400	if (__glibc_likely (mbclen == mbcdlen))
401	memcpy (pstr->mbs + byte_idx, buf, mbclen);
402	else if (mbcdlen != (size_t) -`1`)
403	{
404	size_t i;
405
406	if (byte_idx + mbcdlen > pstr->bufs_len)
407	{
408	pstr->cur_state = prev_st;
409	break;
410	}
411
412	if (pstr->offsets == NULL)
413	{
414	pstr->offsets = re_malloc (Idx, pstr->bufs_len);
415
416	if (pstr->offsets == NULL)
417	return REG_ESPACE;
418	}
419	if (!pstr->offsets_needed)
420	{
421	for (i = `0`; i < (size_t) byte_idx; ++i)
422	pstr->offsets[i] = i;
423	pstr->offsets_needed = `1`;
424	}
425
426	memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
427	pstr->wcs[byte_idx] = wcu;
428	pstr->offsets[byte_idx] = src_idx;
429	for (i = `1`; i < mbcdlen; ++i)
430	{
431	pstr->offsets[byte_idx + i]
432	= src_idx + (i < mbclen ? i : mbclen - `1`);
433	pstr->wcs[byte_idx + i] = WEOF;
434	}
435	pstr->len += mbcdlen - mbclen;
436	if (pstr->raw_stop > src_idx)
437	pstr->stop += mbcdlen - mbclen;
438	end_idx = (pstr->bufs_len > pstr->len)
439	? pstr->len : pstr->bufs_len;
440	byte_idx += mbcdlen;
441	src_idx += mbclen;
442	continue;
443	}
444	else
445	memcpy (pstr->mbs + byte_idx, p, mbclen);
446	}
447	else
448	memcpy (pstr->mbs + byte_idx, p, mbclen);
449
450	if (__glibc_unlikely (pstr->offsets_needed != `0`))
451	{
452	size_t i;
453	for (i = `0`; i < mbclen; ++i)
454	pstr->offsets[byte_idx + i] = src_idx + i;
455	}
456	src_idx += mbclen;
457
458	pstr->wcs[byte_idx++] = wcu;
459	/ Write paddings. /
460	for (remain_len = byte_idx + mbclen - `1`; byte_idx < remain_len ;)
461	pstr->wcs[byte_idx++] = WEOF;
462	}
463	else if (mbclen == (size_t) -`1` \|\| mbclen == `0`
464	\|\| (mbclen == (size_t) -`2` && pstr->bufs_len >= pstr->len))
465	{
466	/ It is an invalid character or '\0'. Just use the byte. /
467	int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
468
469	if (__glibc_unlikely (pstr->trans != NULL))
470	ch = pstr->trans [ch];
471	pstr->mbs[byte_idx] = ch;
472
473	if (__glibc_unlikely (pstr->offsets_needed != `0`))
474	pstr->offsets[byte_idx] = src_idx;
475	++src_idx;
476
477	/ And also cast it to wide char. /
478	pstr->wcs[byte_idx++] = (wchar_t) ch;
479	if (__glibc_unlikely (mbclen == (size_t) -`1`))
480	pstr->cur_state = prev_st;
481	}
482	else
483	{
484	/ The buffer doesn't have enough space, finish to build. /
485	pstr->cur_state = prev_st;
486	break;
487	}
488	}
489	pstr->valid_len = byte_idx;
490	pstr->valid_raw_len = src_idx;
491	return REG_NOERROR;
492	}
493
494	/ Skip characters until the index becomes greater than NEW_RAW_IDX.*
495	Return the index. /*
496
497	static Idx
498	re_string_skip_chars (re_string_t pstr, Idx new_raw_idx, wint_t last_wc)
499	{
500	mbstate_t prev_st;
501	Idx rawbuf_idx;
502	size_t mbclen;
503	wint_t wc = WEOF;
504
505	/ Skip the characters which are not necessary to check. /
506	for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
507	rawbuf_idx < new_raw_idx;)
508	{
509	wchar_t wc2;
510	Idx remain_len = pstr->raw_len - rawbuf_idx;
511	prev_st = pstr->cur_state;
512	mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
513	remain_len, &pstr->cur_state);
514	if (__glibc_unlikely (mbclen == (size_t) -`2` \|\| mbclen == (size_t) -`1`
515	\|\| mbclen == `0`))
516	{
517	/ We treat these cases as a single byte character. /
518	if (mbclen == `0` \|\| remain_len == `0`)
519	wc = L`'\0'`;
520	else
521	wc = (unsigned* char *) (pstr->raw_mbs + rawbuf_idx);
522	mbclen = `1`;
523	pstr->cur_state = prev_st;
524	}
525	else
526	wc = wc2;
527	/ Then proceed the next character. /
528	rawbuf_idx += mbclen;
529	}
530	*last_wc = wc;
531	return rawbuf_idx;
532	}
533	#endif /* RE_ENABLE_I18N */
534
535	/ Build the buffer PSTR->MBS, and apply the translation if we need.*
536	This function is used in case of REG_ICASE. /*
537
538	static void
539	build_upper_buffer (re_string_t *pstr)
540	{
541	Idx char_idx, end_idx;
542	end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
543
544	for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
545	{
546	int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
547	if (__glibc_unlikely (pstr->trans != NULL))
548	ch = pstr->trans[ch];
549	pstr->mbs[char_idx] = toupper (ch);
550	}
551	pstr->valid_len = char_idx;
552	pstr->valid_raw_len = char_idx;
553	}
554
555	/ Apply TRANS to the buffer in PSTR. /
556
557	static void
558	re_string_translate_buffer (re_string_t *pstr)
559	{
560	Idx buf_idx, end_idx;
561	end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
562
563	for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
564	{
565	int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
566	pstr->mbs[buf_idx] = pstr->trans[ch];
567	}
568
569	pstr->valid_len = buf_idx;
570	pstr->valid_raw_len = buf_idx;
571	}
572
573	/ This function re-construct the buffers.*
574	Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
575	convert to upper case in case of REG_ICASE, apply translation. /*
576
577	static reg_errcode_t
578	__attribute_warn_unused_result__
579	re_string_reconstruct (re_string_t pstr, Idx idx, int* eflags)
580	{
581	Idx offset;
582
583	if (__glibc_unlikely (pstr->raw_mbs_idx <= idx))
584	offset = idx - pstr->raw_mbs_idx;
585	else
586	{
587	/ Reset buffer. /
588	#ifdef RE_ENABLE_I18N
589	if (pstr->mb_cur_max > `1`)
590	memset (&pstr->cur_state, `'\0'`, sizeof (mbstate_t));
591	#endif /* RE_ENABLE_I18N */
592	pstr->len = pstr->raw_len;
593	pstr->stop = pstr->raw_stop;
594	pstr->valid_len = `0`;
595	pstr->raw_mbs_idx = `0`;
596	pstr->valid_raw_len = `0`;
597	pstr->offsets_needed = `0`;
598	pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
599	: CONTEXT_NEWLINE \| CONTEXT_BEGBUF);
600	if (!pstr->mbs_allocated)
601	pstr->mbs = (unsigned char *) pstr->raw_mbs;
602	offset = idx;
603	}
604
605	if (__glibc_likely (offset != `0`))
606	{
607	/ Should the already checked characters be kept? /
608	if (__glibc_likely (offset < pstr->valid_raw_len))
609	{
610	/ Yes, move them to the front of the buffer. /
611	#ifdef RE_ENABLE_I18N
612	if (__glibc_unlikely (pstr->offsets_needed))
613	{
614	Idx low = `0`, high = pstr->valid_len, mid;
615	do
616	{
617	mid = (high + low) / `2`;
618	if (pstr->offsets[mid] > offset)
619	high = mid;
620	else if (pstr->offsets[mid] < offset)
621	low = mid + `1`;
622	else
623	break;
624	}
625	while (low < high);
626	if (pstr->offsets[mid] < offset)
627	++mid;
628	pstr->tip_context = re_string_context_at (input: pstr, idx: mid - `1`,
629	eflags);
630	/ This can be quite complicated, so handle specially*
631	only the common and easy case where the character with
632	different length representation of lower and upper
633	case is present at or after offset. /*
634	if (pstr->valid_len > offset
635	&& mid == offset && pstr->offsets[mid] == offset)
636	{
637	memmove (pstr->wcs, pstr->wcs + offset,
638	(pstr->valid_len - offset) * sizeof (wint_t));
639	memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
640	pstr->valid_len -= offset;
641	pstr->valid_raw_len -= offset;
642	for (low = `0`; low < pstr->valid_len; low++)
643	pstr->offsets[low] = pstr->offsets[low + offset] - offset;
644	}
645	else
646	{
647	/ Otherwise, just find out how long the partial multibyte*
648	character at offset is and fill it with WEOF/255. /*
649	pstr->len = pstr->raw_len - idx + offset;
650	pstr->stop = pstr->raw_stop - idx + offset;
651	pstr->offsets_needed = `0`;
652	while (mid > `0` && pstr->offsets[mid - `1`] == offset)
653	--mid;
654	while (mid < pstr->valid_len)
655	if (pstr->wcs[mid] != WEOF)
656	break;
657	else
658	++mid;
659	if (mid == pstr->valid_len)
660	pstr->valid_len = `0`;
661	else
662	{
663	pstr->valid_len = pstr->offsets[mid] - offset;
664	if (pstr->valid_len)
665	{
666	for (low = `0`; low < pstr->valid_len; ++low)
667	pstr->wcs[low] = WEOF;
668	memset (pstr->mbs, `255`, pstr->valid_len);
669	}
670	}
671	pstr->valid_raw_len = pstr->valid_len;
672	}
673	}
674	else
675	#endif
676	{
677	pstr->tip_context = re_string_context_at (input: pstr, idx: offset - `1`,
678	eflags);
679	#ifdef RE_ENABLE_I18N
680	if (pstr->mb_cur_max > `1`)
681	memmove (pstr->wcs, pstr->wcs + offset,
682	(pstr->valid_len - offset) * sizeof (wint_t));
683	#endif /* RE_ENABLE_I18N */
684	if (__glibc_unlikely (pstr->mbs_allocated))
685	memmove (pstr->mbs, pstr->mbs + offset,
686	pstr->valid_len - offset);
687	pstr->valid_len -= offset;
688	pstr->valid_raw_len -= offset;
689	DEBUG_ASSERT (pstr->valid_len > `0`);
690	}
691	}
692	else
693	{
694	#ifdef RE_ENABLE_I18N
695	/ No, skip all characters until IDX. /
696	Idx prev_valid_len = pstr->valid_len;
697
698	if (__glibc_unlikely (pstr->offsets_needed))
699	{
700	pstr->len = pstr->raw_len - idx + offset;
701	pstr->stop = pstr->raw_stop - idx + offset;
702	pstr->offsets_needed = `0`;
703	}
704	#endif
705	pstr->valid_len = `0`;
706	#ifdef RE_ENABLE_I18N
707	if (pstr->mb_cur_max > `1`)
708	{
709	Idx wcs_idx;
710	wint_t wc = WEOF;
711
712	if (pstr->is_utf8)
713	{
714	const unsigned char raw, p, *end;
715
716	/ Special case UTF-8. Multi-byte chars start with any*
717	byte other than 0x80 - 0xbf. /*
718	raw = pstr->raw_mbs + pstr->raw_mbs_idx;
719	end = raw + (offset - pstr->mb_cur_max);
720	if (end < pstr->raw_mbs)
721	end = pstr->raw_mbs;
722	p = raw + offset - `1`;
723	#ifdef _LIBC
724	/ We know the wchar_t encoding is UCS4, so for the simple*
725	case, ASCII characters, skip the conversion step. /*
726	if (isascii (*p) && __glibc_likely (pstr->trans == NULL))
727	{
728	memset (&pstr->cur_state, `'\0'`, sizeof (mbstate_t));
729	/ pstr->valid_len = 0; /
730	wc = (wchar_t) *p;
731	}
732	else
733	#endif
734	for (; p >= end; --p)
735	if ((*p & `0xc0`) != `0x80`)
736	{
737	mbstate_t cur_state;
738	wchar_t wc2;
739	Idx mlen = raw + pstr->len - p;
740	unsigned char buf[`6`];
741	size_t mbclen;
742
743	const unsigned char *pp = p;
744	if (__glibc_unlikely (pstr->trans != NULL))
745	{
746	int i = mlen < `6` ? mlen : `6`;
747	while (--i >= `0`)
748	buf[i] = pstr->trans[p[i]];
749	pp = buf;
750	}
751	/ XXX Don't use mbrtowc, we know which conversion*
752	to use (UTF-8 -> UCS4). /*
753	memset (&cur_state, `0`, sizeof (cur_state));
754	mbclen = __mbrtowc (&wc2, (const char *) pp, mlen,
755	&cur_state);
756	if (raw + offset - p <= mbclen
757	&& mbclen < (size_t) -`2`)
758	{
759	memset (&pstr->cur_state, `'\0'`,
760	sizeof (mbstate_t));
761	pstr->valid_len = mbclen - (raw + offset - p);
762	wc = wc2;
763	}
764	break;
765	}
766	}
767
768	if (wc == WEOF)
769	pstr->valid_len = re_string_skip_chars (pstr, new_raw_idx: idx, last_wc: &wc) - idx;
770	if (wc == WEOF)
771	pstr->tip_context
772	= re_string_context_at (input: pstr, idx: prev_valid_len - `1`, eflags);
773	else
774	pstr->tip_context = ((__glibc_unlikely (pstr->word_ops_used != `0`)
775	&& IS_WIDE_WORD_CHAR (wc))
776	? CONTEXT_WORD
777	: ((IS_WIDE_NEWLINE (wc)
778	&& pstr->newline_anchor)
779	? CONTEXT_NEWLINE : `0`));
780	if (__glibc_unlikely (pstr->valid_len))
781	{
782	for (wcs_idx = `0`; wcs_idx < pstr->valid_len; ++wcs_idx)
783	pstr->wcs[wcs_idx] = WEOF;
784	if (pstr->mbs_allocated)
785	memset (pstr->mbs, `255`, pstr->valid_len);
786	}
787	pstr->valid_raw_len = pstr->valid_len;
788	}
789	else
790	#endif /* RE_ENABLE_I18N */
791	{
792	int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - `1`];
793	pstr->valid_raw_len = `0`;
794	if (pstr->trans)
795	c = pstr->trans[c];
796	pstr->tip_context = (bitset_contain (set: pstr->word_char, i: c)
797	? CONTEXT_WORD
798	: ((IS_NEWLINE (c) && pstr->newline_anchor)
799	? CONTEXT_NEWLINE : `0`));
800	}
801	}
802	if (!__glibc_unlikely (pstr->mbs_allocated))
803	pstr->mbs += offset;
804	}
805	pstr->raw_mbs_idx = idx;
806	pstr->len -= offset;
807	pstr->stop -= offset;
808
809	/ Then build the buffers. /
810	#ifdef RE_ENABLE_I18N
811	if (pstr->mb_cur_max > `1`)
812	{
813	if (pstr->icase)
814	{
815	reg_errcode_t ret = build_wcs_upper_buffer (pstr);
816	if (__glibc_unlikely (ret != REG_NOERROR))
817	return ret;
818	}
819	else
820	build_wcs_buffer (pstr);
821	}
822	else
823	#endif /* RE_ENABLE_I18N */
824	if (__glibc_unlikely (pstr->mbs_allocated))
825	{
826	if (pstr->icase)
827	build_upper_buffer (pstr);
828	else if (pstr->trans != NULL)
829	re_string_translate_buffer (pstr);
830	}
831	else
832	pstr->valid_len = pstr->len;
833
834	pstr->cur_idx = `0`;
835	return REG_NOERROR;
836	}
837
838	static unsigned char
839	__attribute__ ((pure))
840	re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
841	{
842	int ch;
843	Idx off;
844
845	/ Handle the common (easiest) cases first. /
846	if (__glibc_likely (!pstr->mbs_allocated))
847	return re_string_peek_byte (pstr, idx);
848
849	#ifdef RE_ENABLE_I18N
850	if (pstr->mb_cur_max > `1`
851	&& ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
852	return re_string_peek_byte (pstr, idx);
853	#endif
854
855	off = pstr->cur_idx + idx;
856	#ifdef RE_ENABLE_I18N
857	if (pstr->offsets_needed)
858	off = pstr->offsets[off];
859	#endif
860
861	ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
862
863	#ifdef RE_ENABLE_I18N
864	/ Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I*
865	this function returns CAPITAL LETTER I instead of first byte of
866	DOTLESS SMALL LETTER I. The latter would confuse the parser,
867	since peek_byte_case doesn't advance cur_idx in any way. /*
868	if (pstr->offsets_needed && !isascii (ch))
869	return re_string_peek_byte (pstr, idx);
870	#endif
871
872	return ch;
873	}
874
875	static unsigned char
876	re_string_fetch_byte_case (re_string_t *pstr)
877	{
878	if (__glibc_likely (!pstr->mbs_allocated))
879	return re_string_fetch_byte (pstr);
880
881	#ifdef RE_ENABLE_I18N
882	if (pstr->offsets_needed)
883	{
884	Idx off;
885	int ch;
886
887	/ For tr_TR.UTF-8 [[:islower:]] there is*
888	[[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
889	in that case the whole multi-byte character and return
890	the original letter. On the other side, with
891	[[: DOTLESS SMALL LETTER I return [[:I, as doing
892	anything else would complicate things too much. /*
893
894	if (!re_string_first_byte (pstr, pstr->cur_idx))
895	return re_string_fetch_byte (pstr);
896
897	off = pstr->offsets[pstr->cur_idx];
898	ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
899
900	if (! isascii (ch))
901	return re_string_fetch_byte (pstr);
902
903	re_string_skip_bytes (pstr,
904	re_string_char_size_at (pstr, pstr->cur_idx));
905	return ch;
906	}
907	#endif
908
909	return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
910	}
911
912	static void
913	re_string_destruct (re_string_t *pstr)
914	{
915	#ifdef RE_ENABLE_I18N
916	re_free (pstr->wcs);
917	re_free (pstr->offsets);
918	#endif /* RE_ENABLE_I18N */
919	if (pstr->mbs_allocated)
920	re_free (pstr->mbs);
921	}
922
923	/ Return the context at IDX in INPUT. /
924
925	static unsigned int
926	re_string_context_at (const re_string_t input, Idx idx, int* eflags)
927	{
928	int c;
929	if (__glibc_unlikely (idx < `0`))
930	/ In this case, we use the value stored in input->tip_context,*
931	since we can't know the character in input->mbs[-1] here. /*
932	return input->tip_context;
933	if (__glibc_unlikely (idx == input->len))
934	return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
935	: CONTEXT_NEWLINE \| CONTEXT_ENDBUF);
936	#ifdef RE_ENABLE_I18N
937	if (input->mb_cur_max > `1`)
938	{
939	wint_t wc;
940	Idx wc_idx = idx;
941	while(input->wcs[wc_idx] == WEOF)
942	{
943	DEBUG_ASSERT (wc_idx >= `0`);
944	--wc_idx;
945	if (wc_idx < `0`)
946	return input->tip_context;
947	}
948	wc = input->wcs[wc_idx];
949	if (__glibc_unlikely (input->word_ops_used != `0`)
950	&& IS_WIDE_WORD_CHAR (wc))
951	return CONTEXT_WORD;
952	return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
953	? CONTEXT_NEWLINE : `0`);
954	}
955	else
956	#endif
957	{
958	c = re_string_byte_at (input, idx);
959	if (bitset_contain (set: input->word_char, i: c))
960	return CONTEXT_WORD;
961	return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : `0`;
962	}
963	}
964
965	/ Functions for set operation. /
966
967	static reg_errcode_t
968	__attribute_warn_unused_result__
969	re_node_set_alloc (re_node_set *set, Idx size)
970	{
971	set->alloc = size;
972	set->nelem = `0`;
973	set->elems = re_malloc (Idx, size);
974	if (__glibc_unlikely (set->elems == NULL)
975	&& (MALLOC_0_IS_NONNULL \|\| size != `0`))
976	return REG_ESPACE;
977	return REG_NOERROR;
978	}
979
980	static reg_errcode_t
981	__attribute_warn_unused_result__
982	re_node_set_init_1 (re_node_set *set, Idx elem)
983	{
984	set->alloc = `1`;
985	set->nelem = `1`;
986	set->elems = re_malloc (Idx, `1`);
987	if (__glibc_unlikely (set->elems == NULL))
988	{
989	set->alloc = set->nelem = `0`;
990	return REG_ESPACE;
991	}
992	set->elems[`0`] = elem;
993	return REG_NOERROR;
994	}
995
996	static reg_errcode_t
997	__attribute_warn_unused_result__
998	re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
999	{
1000	set->alloc = `2`;
1001	set->elems = re_malloc (Idx, `2`);
1002	if (__glibc_unlikely (set->elems == NULL))
1003	return REG_ESPACE;
1004	if (elem1 == elem2)
1005	{
1006	set->nelem = `1`;
1007	set->elems[`0`] = elem1;
1008	}
1009	else
1010	{
1011	set->nelem = `2`;
1012	if (elem1 < elem2)
1013	{
1014	set->elems[`0`] = elem1;
1015	set->elems[`1`] = elem2;
1016	}
1017	else
1018	{
1019	set->elems[`0`] = elem2;
1020	set->elems[`1`] = elem1;
1021	}
1022	}
1023	return REG_NOERROR;
1024	}
1025
1026	static reg_errcode_t
1027	__attribute_warn_unused_result__
1028	re_node_set_init_copy (re_node_set dest, const* re_node_set *src)
1029	{
1030	dest->nelem = src->nelem;
1031	if (src->nelem > `0`)
1032	{
1033	dest->alloc = dest->nelem;
1034	dest->elems = re_malloc (Idx, dest->alloc);
1035	if (__glibc_unlikely (dest->elems == NULL))
1036	{
1037	dest->alloc = dest->nelem = `0`;
1038	return REG_ESPACE;
1039	}
1040	memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1041	}
1042	else
1043	re_node_set_init_empty (dest);
1044	return REG_NOERROR;
1045	}
1046
1047	/ Calculate the intersection of the sets SRC1 and SRC2. And merge it to*
1048	DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1049	Note: We assume dest->elems is NULL, when dest->alloc is 0. /*
1050
1051	static reg_errcode_t
1052	__attribute_warn_unused_result__
1053	re_node_set_add_intersect (re_node_set dest, const* re_node_set *src1,
1054	const re_node_set *src2)
1055	{
1056	Idx i1, i2, is, id, delta, sbase;
1057	if (src1->nelem == `0` \|\| src2->nelem == `0`)
1058	return REG_NOERROR;
1059
1060	/ We need dest->nelem + 2 * elems_in_intersection; this is a*
1061	conservative estimate. /*
1062	if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1063	{
1064	Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
1065	Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
1066	if (__glibc_unlikely (new_elems == NULL))
1067	return REG_ESPACE;
1068	dest->elems = new_elems;
1069	dest->alloc = new_alloc;
1070	}
1071
1072	/ Find the items in the intersection of SRC1 and SRC2, and copy*
1073	into the top of DEST those that are not already in DEST itself. /*
1074	sbase = dest->nelem + src1->nelem + src2->nelem;
1075	i1 = src1->nelem - `1`;
1076	i2 = src2->nelem - `1`;
1077	id = dest->nelem - `1`;
1078	for (;;)
1079	{
1080	if (src1->elems[i1] == src2->elems[i2])
1081	{
1082	/ Try to find the item in DEST. Maybe we could binary search? /
1083	while (id >= `0` && dest->elems[id] > src1->elems[i1])
1084	--id;
1085
1086	if (id < `0` \|\| dest->elems[id] != src1->elems[i1])
1087	dest->elems[--sbase] = src1->elems[i1];
1088
1089	if (--i1 < `0` \|\| --i2 < `0`)
1090	break;
1091	}
1092
1093	/ Lower the highest of the two items. /
1094	else if (src1->elems[i1] < src2->elems[i2])
1095	{
1096	if (--i2 < `0`)
1097	break;
1098	}
1099	else
1100	{
1101	if (--i1 < `0`)
1102	break;
1103	}
1104	}
1105
1106	id = dest->nelem - `1`;
1107	is = dest->nelem + src1->nelem + src2->nelem - `1`;
1108	delta = is - sbase + `1`;
1109
1110	/ Now copy. When DELTA becomes zero, the remaining*
1111	DEST elements are already in place; this is more or
1112	less the same loop that is in re_node_set_merge. /*
1113	dest->nelem += delta;
1114	if (delta > `0` && id >= `0`)
1115	for (;;)
1116	{
1117	if (dest->elems[is] > dest->elems[id])
1118	{
1119	/ Copy from the top. /
1120	dest->elems[id + delta--] = dest->elems[is--];
1121	if (delta == `0`)
1122	break;
1123	}
1124	else
1125	{
1126	/ Slide from the bottom. /
1127	dest->elems[id + delta] = dest->elems[id];
1128	if (--id < `0`)
1129	break;
1130	}
1131	}
1132
1133	/ Copy remaining SRC elements. /
1134	memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
1135
1136	return REG_NOERROR;
1137	}
1138
1139	/ Calculate the union set of the sets SRC1 and SRC2. And store it to*
1140	DEST. Return value indicate the error code or REG_NOERROR if succeeded. /*
1141
1142	static reg_errcode_t
1143	__attribute_warn_unused_result__
1144	re_node_set_init_union (re_node_set dest, const* re_node_set *src1,
1145	const re_node_set *src2)
1146	{
1147	Idx i1, i2, id;
1148	if (src1 != NULL && src1->nelem > `0` && src2 != NULL && src2->nelem > `0`)
1149	{
1150	dest->alloc = src1->nelem + src2->nelem;
1151	dest->elems = re_malloc (Idx, dest->alloc);
1152	if (__glibc_unlikely (dest->elems == NULL))
1153	return REG_ESPACE;
1154	}
1155	else
1156	{
1157	if (src1 != NULL && src1->nelem > `0`)
1158	return re_node_set_init_copy (dest, src: src1);
1159	else if (src2 != NULL && src2->nelem > `0`)
1160	return re_node_set_init_copy (dest, src: src2);
1161	else
1162	re_node_set_init_empty (dest);
1163	return REG_NOERROR;
1164	}
1165	for (i1 = i2 = id = `0` ; i1 < src1->nelem && i2 < src2->nelem ;)
1166	{
1167	if (src1->elems[i1] > src2->elems[i2])
1168	{
1169	dest->elems[id++] = src2->elems[i2++];
1170	continue;
1171	}
1172	if (src1->elems[i1] == src2->elems[i2])
1173	++i2;
1174	dest->elems[id++] = src1->elems[i1++];
1175	}
1176	if (i1 < src1->nelem)
1177	{
1178	memcpy (dest->elems + id, src1->elems + i1,
1179	(src1->nelem - i1) * sizeof (Idx));
1180	id += src1->nelem - i1;
1181	}
1182	else if (i2 < src2->nelem)
1183	{
1184	memcpy (dest->elems + id, src2->elems + i2,
1185	(src2->nelem - i2) * sizeof (Idx));
1186	id += src2->nelem - i2;
1187	}
1188	dest->nelem = id;
1189	return REG_NOERROR;
1190	}
1191
1192	/ Calculate the union set of the sets DEST and SRC. And store it to*
1193	DEST. Return value indicate the error code or REG_NOERROR if succeeded. /*
1194
1195	static reg_errcode_t
1196	__attribute_warn_unused_result__
1197	re_node_set_merge (re_node_set dest, const* re_node_set *src)
1198	{
1199	Idx is, id, sbase, delta;
1200	if (src == NULL \|\| src->nelem == `0`)
1201	return REG_NOERROR;
1202	if (dest->alloc < `2` * src->nelem + dest->nelem)
1203	{
1204	Idx new_alloc = `2` * (src->nelem + dest->alloc);
1205	Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
1206	if (__glibc_unlikely (new_buffer == NULL))
1207	return REG_ESPACE;
1208	dest->elems = new_buffer;
1209	dest->alloc = new_alloc;
1210	}
1211
1212	if (__glibc_unlikely (dest->nelem == `0`))
1213	{
1214	/ Although we already guaranteed above that dest->alloc != 0 and*
1215	therefore dest->elems != NULL, add a debug assertion to pacify
1216	GCC 11.2.1's -fanalyzer. /*
1217	DEBUG_ASSERT (dest->elems);
1218	dest->nelem = src->nelem;
1219	memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1220	return REG_NOERROR;
1221	}
1222
1223	/ Copy into the top of DEST the items of SRC that are not*
1224	found in DEST. Maybe we could binary search in DEST? /*
1225	for (sbase = dest->nelem + `2` * src->nelem,
1226	is = src->nelem - `1`, id = dest->nelem - `1`; is >= `0` && id >= `0`; )
1227	{
1228	if (dest->elems[id] == src->elems[is])
1229	is--, id--;
1230	else if (dest->elems[id] < src->elems[is])
1231	dest->elems[--sbase] = src->elems[is--];
1232	else / if (dest->elems[id] > src->elems[is]) /
1233	--id;
1234	}
1235
1236	if (is >= `0`)
1237	{
1238	/ If DEST is exhausted, the remaining items of SRC must be unique. /
1239	sbase -= is + `1`;
1240	memcpy (dest->elems + sbase, src->elems, (is + `1`) * sizeof (Idx));
1241	}
1242
1243	id = dest->nelem - `1`;
1244	is = dest->nelem + `2` * src->nelem - `1`;
1245	delta = is - sbase + `1`;
1246	if (delta == `0`)
1247	return REG_NOERROR;
1248
1249	/ Now copy. When DELTA becomes zero, the remaining*
1250	DEST elements are already in place. /*
1251	dest->nelem += delta;
1252	for (;;)
1253	{
1254	if (dest->elems[is] > dest->elems[id])
1255	{
1256	/ Copy from the top. /
1257	dest->elems[id + delta--] = dest->elems[is--];
1258	if (delta == `0`)
1259	break;
1260	}
1261	else
1262	{
1263	/ Slide from the bottom. /
1264	dest->elems[id + delta] = dest->elems[id];
1265	if (--id < `0`)
1266	{
1267	/ Copy remaining SRC elements. /
1268	memcpy (dest->elems, dest->elems + sbase,
1269	delta * sizeof (Idx));
1270	break;
1271	}
1272	}
1273	}
1274
1275	return REG_NOERROR;
1276	}
1277
1278	/ Insert the new element ELEM to the re_node_set* SET.*
1279	SET should not already have ELEM.
1280	Return true if successful. /*
1281
1282	static bool
1283	__attribute_warn_unused_result__
1284	re_node_set_insert (re_node_set *set, Idx elem)
1285	{
1286	Idx idx;
1287	/ In case the set is empty. /
1288	if (set->alloc == `0`)
1289	return __glibc_likely (re_node_set_init_1 (set, elem) == REG_NOERROR);
1290
1291	if (__glibc_unlikely (set->nelem) == `0`)
1292	{
1293	/ Although we already guaranteed above that set->alloc != 0 and*
1294	therefore set->elems != NULL, add a debug assertion to pacify
1295	GCC 11.2 -fanalyzer. /*
1296	DEBUG_ASSERT (set->elems);
1297	set->elems[`0`] = elem;
1298	++set->nelem;
1299	return true;
1300	}
1301
1302	/ Realloc if we need. /
1303	if (set->alloc == set->nelem)
1304	{
1305	Idx *new_elems;
1306	set->alloc = set->alloc * `2`;
1307	new_elems = re_realloc (set->elems, Idx, set->alloc);
1308	if (__glibc_unlikely (new_elems == NULL))
1309	return false;
1310	set->elems = new_elems;
1311	}
1312
1313	/ Move the elements which follows the new element. Test the*
1314	first element separately to skip a check in the inner loop. /*
1315	if (elem < set->elems[`0`])
1316	{
1317	for (idx = set->nelem; idx > `0`; idx--)
1318	set->elems[idx] = set->elems[idx - `1`];
1319	}
1320	else
1321	{
1322	for (idx = set->nelem; set->elems[idx - `1`] > elem; idx--)
1323	set->elems[idx] = set->elems[idx - `1`];
1324	DEBUG_ASSERT (set->elems[idx - `1`] < elem);
1325	}
1326
1327	/ Insert the new element. /
1328	set->elems[idx] = elem;
1329	++set->nelem;
1330	return true;
1331	}
1332
1333	/ Insert the new element ELEM to the re_node_set* SET.*
1334	SET should not already have any element greater than or equal to ELEM.
1335	Return true if successful. /*
1336
1337	static bool
1338	__attribute_warn_unused_result__
1339	re_node_set_insert_last (re_node_set *set, Idx elem)
1340	{
1341	/ Realloc if we need. /
1342	if (set->alloc == set->nelem)
1343	{
1344	Idx *new_elems;
1345	set->alloc = (set->alloc + `1`) * `2`;
1346	new_elems = re_realloc (set->elems, Idx, set->alloc);
1347	if (__glibc_unlikely (new_elems == NULL))
1348	return false;
1349	set->elems = new_elems;
1350	}
1351
1352	/ Insert the new element. /
1353	set->elems[set->nelem++] = elem;
1354	return true;
1355	}
1356
1357	/ Compare two node sets SET1 and SET2.*
1358	Return true if SET1 and SET2 are equivalent. /*
1359
1360	static bool
1361	__attribute__ ((pure))
1362	re_node_set_compare (const re_node_set set1, const* re_node_set *set2)
1363	{
1364	Idx i;
1365	if (set1 == NULL \|\| set2 == NULL \|\| set1->nelem != set2->nelem)
1366	return false;
1367	for (i = set1->nelem ; --i >= `0` ; )
1368	if (set1->elems[i] != set2->elems[i])
1369	return false;
1370	return true;
1371	}
1372
1373	/ Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. /
1374
1375	static Idx
1376	__attribute__ ((pure))
1377	re_node_set_contains (const re_node_set *set, Idx elem)
1378	{
1379	__re_size_t idx, right, mid;
1380	if (set->nelem <= `0`)
1381	return `0`;
1382
1383	/ Binary search the element. /
1384	idx = `0`;
1385	right = set->nelem - `1`;
1386	while (idx < right)
1387	{
1388	mid = (idx + right) / `2`;
1389	if (set->elems[mid] < elem)
1390	idx = mid + `1`;
1391	else
1392	right = mid;
1393	}
1394	return set->elems[idx] == elem ? idx + `1` : `0`;
1395	}
1396
1397	static void
1398	re_node_set_remove_at (re_node_set *set, Idx idx)
1399	{
1400	if (idx < `0` \|\| idx >= set->nelem)
1401	return;
1402	--set->nelem;
1403	for (; idx < set->nelem; idx++)
1404	set->elems[idx] = set->elems[idx + `1`];
1405	}
1406
1407
1408	/ Add the token TOKEN to dfa->nodes, and return the index of the token.*
1409	Or return -1 if an error occurred. /*
1410
1411	static Idx
1412	re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1413	{
1414	if (__glibc_unlikely (dfa->nodes_len >= dfa->nodes_alloc))
1415	{
1416	size_t new_nodes_alloc = dfa->nodes_alloc * `2`;
1417	Idx new_nexts, new_indices;
1418	re_node_set new_edests, new_eclosures;
1419	re_token_t *new_nodes;
1420
1421	/ Avoid overflows in realloc. /
1422	const size_t max_object_size = MAX (sizeof (re_token_t),
1423	MAX (sizeof (re_node_set),
1424	sizeof (Idx)));
1425	if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size)
1426	< new_nodes_alloc))
1427	return -`1`;
1428
1429	new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1430	if (__glibc_unlikely (new_nodes == NULL))
1431	return -`1`;
1432	dfa->nodes = new_nodes;
1433	new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
1434	new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
1435	new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1436	new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1437	if (__glibc_unlikely (new_nexts == NULL \|\| new_indices == NULL
1438	\|\| new_edests == NULL \|\| new_eclosures == NULL))
1439	{
1440	re_free (new_nexts);
1441	re_free (new_indices);
1442	re_free (new_edests);
1443	re_free (new_eclosures);
1444	return -`1`;
1445	}
1446	dfa->nexts = new_nexts;
1447	dfa->org_indices = new_indices;
1448	dfa->edests = new_edests;
1449	dfa->eclosures = new_eclosures;
1450	dfa->nodes_alloc = new_nodes_alloc;
1451	}
1452	dfa->nodes[dfa->nodes_len] = token;
1453	dfa->nodes[dfa->nodes_len].constraint = `0`;
1454	#ifdef RE_ENABLE_I18N
1455	dfa->nodes[dfa->nodes_len].accept_mb =
1456	((token.type == OP_PERIOD && dfa->mb_cur_max > `1`)
1457	\|\| token.type == COMPLEX_BRACKET);
1458	#endif
1459	dfa->nexts[dfa->nodes_len] = -`1`;
1460	re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1461	re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1462	return dfa->nodes_len++;
1463	}
1464
1465	static re_hashval_t
1466	calc_state_hash (const re_node_set nodes, unsigned* int context)
1467	{
1468	re_hashval_t hash = nodes->nelem + context;
1469	Idx i;
1470	for (i = `0` ; i < nodes->nelem ; i++)
1471	hash += nodes->elems[i];
1472	return hash;
1473	}
1474
1475	/ Search for the state whose node_set is equivalent to NODES.*
1476	Return the pointer to the state, if we found it in the DFA.
1477	Otherwise create the new one and return it. In case of an error
1478	return NULL and set the error code in ERR.
1479	Note: - We assume NULL as the invalid state, then it is possible that
1480	return value is NULL and ERR is REG_NOERROR.
1481	- We never return non-NULL value in case of any errors, it is for
1482	optimization. /*
1483
1484	static re_dfastate_t *
1485	__attribute_warn_unused_result__
1486	re_acquire_state (reg_errcode_t err, const* re_dfa_t *dfa,
1487	const re_node_set *nodes)
1488	{
1489	re_hashval_t hash;
1490	re_dfastate_t *new_state;
1491	struct re_state_table_entry *spot;
1492	Idx i;
1493	#if defined GCC_LINT \|\| defined lint
1494	/ Suppress bogus uninitialized-variable warnings. /
1495	*err = REG_NOERROR;
1496	#endif
1497	if (__glibc_unlikely (nodes->nelem == `0`))
1498	{
1499	*err = REG_NOERROR;
1500	return NULL;
1501	}
1502	hash = calc_state_hash (nodes, context: `0`);
1503	spot = dfa->state_table + (hash & dfa->state_hash_mask);
1504
1505	for (i = `0` ; i < spot->num ; i++)
1506	{
1507	re_dfastate_t *state = spot->array[i];
1508	if (hash != state->hash)
1509	continue;
1510	if (re_node_set_compare (set1: &state->nodes, set2: nodes))
1511	return state;
1512	}
1513
1514	/ There are no appropriate state in the dfa, create the new one. /
1515	new_state = create_ci_newstate (dfa, nodes, hash);
1516	if (__glibc_unlikely (new_state == NULL))
1517	*err = REG_ESPACE;
1518
1519	return new_state;
1520	}
1521
1522	/ Search for the state whose node_set is equivalent to NODES and*
1523	whose context is equivalent to CONTEXT.
1524	Return the pointer to the state, if we found it in the DFA.
1525	Otherwise create the new one and return it. In case of an error
1526	return NULL and set the error code in ERR.
1527	Note: - We assume NULL as the invalid state, then it is possible that
1528	return value is NULL and ERR is REG_NOERROR.
1529	- We never return non-NULL value in case of any errors, it is for
1530	optimization. /*
1531
1532	static re_dfastate_t *
1533	__attribute_warn_unused_result__
1534	re_acquire_state_context (reg_errcode_t err, const* re_dfa_t *dfa,
1535	const re_node_set nodes, unsigned* int context)
1536	{
1537	re_hashval_t hash;
1538	re_dfastate_t *new_state;
1539	struct re_state_table_entry *spot;
1540	Idx i;
1541	#if defined GCC_LINT \|\| defined lint
1542	/ Suppress bogus uninitialized-variable warnings. /
1543	*err = REG_NOERROR;
1544	#endif
1545	if (nodes->nelem == `0`)
1546	{
1547	*err = REG_NOERROR;
1548	return NULL;
1549	}
1550	hash = calc_state_hash (nodes, context);
1551	spot = dfa->state_table + (hash & dfa->state_hash_mask);
1552
1553	for (i = `0` ; i < spot->num ; i++)
1554	{
1555	re_dfastate_t *state = spot->array[i];
1556	if (state->hash == hash
1557	&& state->context == context
1558	&& re_node_set_compare (set1: state->entrance_nodes, set2: nodes))
1559	return state;
1560	}
1561	/ There are no appropriate state in 'dfa', create the new one. /
1562	new_state = create_cd_newstate (dfa, nodes, context, hash);
1563	if (__glibc_unlikely (new_state == NULL))
1564	*err = REG_ESPACE;
1565
1566	return new_state;
1567	}
1568
1569	/ Finish initialization of the new state NEWSTATE, and using its hash value*
1570	HASH put in the appropriate bucket of DFA's state table. Return value
1571	indicates the error code if failed. /*
1572
1573	static reg_errcode_t
1574	__attribute_warn_unused_result__
1575	register_state (const re_dfa_t dfa, re_dfastate_t newstate,
1576	re_hashval_t hash)
1577	{
1578	struct re_state_table_entry *spot;
1579	reg_errcode_t err;
1580	Idx i;
1581
1582	newstate->hash = hash;
1583	err = re_node_set_alloc (set: &newstate->non_eps_nodes, size: newstate->nodes.nelem);
1584	if (__glibc_unlikely (err != REG_NOERROR))
1585	return REG_ESPACE;
1586	for (i = `0`; i < newstate->nodes.nelem; i++)
1587	{
1588	Idx elem = newstate->nodes.elems[i];
1589	if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1590	if (! re_node_set_insert_last (set: &newstate->non_eps_nodes, elem))
1591	return REG_ESPACE;
1592	}
1593
1594	spot = dfa->state_table + (hash & dfa->state_hash_mask);
1595	if (__glibc_unlikely (spot->alloc <= spot->num))
1596	{
1597	Idx new_alloc = `2` * spot->num + `2`;
1598	re_dfastate_t *new_array = re_realloc (spot->array, re_dfastate_t ,
1599	new_alloc);
1600	if (__glibc_unlikely (new_array == NULL))
1601	return REG_ESPACE;
1602	spot->array = new_array;
1603	spot->alloc = new_alloc;
1604	}
1605	spot->array[spot->num++] = newstate;
1606	return REG_NOERROR;
1607	}
1608
1609	static void
1610	free_state (re_dfastate_t *state)
1611	{
1612	re_node_set_free (&state->non_eps_nodes);
1613	re_node_set_free (&state->inveclosure);
1614	if (state->entrance_nodes != &state->nodes)
1615	{
1616	re_node_set_free (state->entrance_nodes);
1617	re_free (state->entrance_nodes);
1618	}
1619	re_node_set_free (&state->nodes);
1620	re_free (state->word_trtable);
1621	re_free (state->trtable);
1622	re_free (state);
1623	}
1624
1625	/ Create the new state which is independent of contexts.*
1626	Return the new state if succeeded, otherwise return NULL. /*
1627
1628	static re_dfastate_t *
1629	__attribute_warn_unused_result__
1630	create_ci_newstate (const re_dfa_t dfa, const* re_node_set *nodes,
1631	re_hashval_t hash)
1632	{
1633	Idx i;
1634	reg_errcode_t err;
1635	re_dfastate_t *newstate;
1636
1637	newstate = (re_dfastate_t ) calloc (nmemb: sizeof* (re_dfastate_t), size: `1`);
1638	if (__glibc_unlikely (newstate == NULL))
1639	return NULL;
1640	err = re_node_set_init_copy (dest: &newstate->nodes, src: nodes);
1641	if (__glibc_unlikely (err != REG_NOERROR))
1642	{
1643	re_free (newstate);
1644	return NULL;
1645	}
1646
1647	newstate->entrance_nodes = &newstate->nodes;
1648	for (i = `0` ; i < nodes->nelem ; i++)
1649	{
1650	re_token_t *node = dfa->nodes + nodes->elems[i];
1651	re_token_type_t type = node->type;
1652	if (type == CHARACTER && !node->constraint)
1653	continue;
1654	#ifdef RE_ENABLE_I18N
1655	newstate->accept_mb \|= node->accept_mb;
1656	#endif /* RE_ENABLE_I18N */
1657
1658	/ If the state has the halt node, the state is a halt state. /
1659	if (type == END_OF_RE)
1660	newstate->halt = `1`;
1661	else if (type == OP_BACK_REF)
1662	newstate->has_backref = `1`;
1663	else if (type == ANCHOR \|\| node->constraint)
1664	newstate->has_constraint = `1`;
1665	}
1666	err = register_state (dfa, newstate, hash);
1667	if (__glibc_unlikely (err != REG_NOERROR))
1668	{
1669	free_state (state: newstate);
1670	newstate = NULL;
1671	}
1672	return newstate;
1673	}
1674
1675	/ Create the new state which is depend on the context CONTEXT.*
1676	Return the new state if succeeded, otherwise return NULL. /*
1677
1678	static re_dfastate_t *
1679	__attribute_warn_unused_result__
1680	create_cd_newstate (const re_dfa_t dfa, const* re_node_set *nodes,
1681	unsigned int context, re_hashval_t hash)
1682	{
1683	Idx i, nctx_nodes = `0`;
1684	reg_errcode_t err;
1685	re_dfastate_t *newstate;
1686
1687	newstate = (re_dfastate_t ) calloc (nmemb: sizeof* (re_dfastate_t), size: `1`);
1688	if (__glibc_unlikely (newstate == NULL))
1689	return NULL;
1690	err = re_node_set_init_copy (dest: &newstate->nodes, src: nodes);
1691	if (__glibc_unlikely (err != REG_NOERROR))
1692	{
1693	re_free (newstate);
1694	return NULL;
1695	}
1696
1697	newstate->context = context;
1698	newstate->entrance_nodes = &newstate->nodes;
1699
1700	for (i = `0` ; i < nodes->nelem ; i++)
1701	{
1702	re_token_t *node = dfa->nodes + nodes->elems[i];
1703	re_token_type_t type = node->type;
1704	unsigned int constraint = node->constraint;
1705
1706	if (type == CHARACTER && !constraint)
1707	continue;
1708	#ifdef RE_ENABLE_I18N
1709	newstate->accept_mb \|= node->accept_mb;
1710	#endif /* RE_ENABLE_I18N */
1711
1712	/ If the state has the halt node, the state is a halt state. /
1713	if (type == END_OF_RE)
1714	newstate->halt = `1`;
1715	else if (type == OP_BACK_REF)
1716	newstate->has_backref = `1`;
1717
1718	if (constraint)
1719	{
1720	if (newstate->entrance_nodes == &newstate->nodes)
1721	{
1722	re_node_set *entrance_nodes = re_malloc (re_node_set, `1`);
1723	if (__glibc_unlikely (entrance_nodes == NULL))
1724	{
1725	free_state (state: newstate);
1726	return NULL;
1727	}
1728	newstate->entrance_nodes = entrance_nodes;
1729	if (re_node_set_init_copy (dest: newstate->entrance_nodes, src: nodes)
1730	!= REG_NOERROR)
1731	{
1732	free_state (state: newstate);
1733	return NULL;
1734	}
1735	nctx_nodes = `0`;
1736	newstate->has_constraint = `1`;
1737	}
1738
1739	if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1740	{
1741	re_node_set_remove_at (set: &newstate->nodes, idx: i - nctx_nodes);
1742	++nctx_nodes;
1743	}
1744	}
1745	}
1746	err = register_state (dfa, newstate, hash);
1747	if (__glibc_unlikely (err != REG_NOERROR))
1748	{
1749	free_state (state: newstate);
1750	newstate = NULL;
1751	}
1752	return newstate;
1753	}
1754

source code of glibc/posix/regex_internal.c