input.cc source code [gcc/input.cc]

1	/ Data and functions related to line maps and input files.*
2	Copyright (C) 2004-2024 Free Software Foundation, Inc.
3
4	This file is part of GCC.
5
6	GCC is free software; you can redistribute it and/or modify it under
7	the terms of the GNU General Public License as published by the Free
8	Software Foundation; either version 3, or (at your option) any later
9	version.
10
11	GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12	WARRANTY; without even the implied warranty of MERCHANTABILITY or
13	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14	for more details.
15
16	You should have received a copy of the GNU General Public License
17	along with GCC; see the file COPYING3. If not see
18	<http://www.gnu.org/licenses/>. /*
19
20	#include "config.h"
21	#include "system.h"
22	#include "coretypes.h"
23	#include "intl.h"
24	#include "diagnostic.h"
25	#include "selftest.h"
26	#include "cpplib.h"
27
28	#ifndef HAVE_ICONV
29	#define HAVE_ICONV 0
30	#endif
31
32	const char *
33	special_fname_builtin ()
34	{
35	return _("<built-in>");
36	}
37
38	/ Input charset configuration. /
39	static const char default_charset_callback (const* char *)
40	{
41	return nullptr;
42	}
43
44	void
45	file_cache::initialize_input_context (diagnostic_input_charset_callback ccb,
46	bool should_skip_bom)
47	{
48	in_context.ccb = (ccb ? ccb : default_charset_callback);
49	in_context.should_skip_bom = should_skip_bom;
50	}
51
52	/ This is a cache used by get_next_line to store the content of a*
53	file to be searched for file lines. /*
54	class file_cache_slot
55	{
56	public:
57	file_cache_slot ();
58	~file_cache_slot ();
59
60	bool read_line_num (size_t line_num,
61	char ** line, ssize_t *line_len);
62
63	/ Accessors. /
64	const char get_file_path () const* { return m_file_path; }
65	unsigned get_use_count () const { return m_use_count; }
66	bool missing_trailing_newline_p () const
67	{
68	return m_missing_trailing_newline;
69	}
70	char_span get_full_file_content ();
71
72	void inc_use_count () { m_use_count++; }
73
74	bool create (const file_cache::input_context &in_context,
75	const char file_path, FILE fp, unsigned highest_use_count);
76	void evict ();
77
78	private:
79	/ These are information used to store a line boundary. /
80	class line_info
81	{
82	public:
83	/ The line number. It starts from 1. /
84	size_t line_num;
85
86	/ The position (byte count) of the beginning of the line,*
87	relative to the file data pointer. This starts at zero. /*
88	size_t start_pos;
89
90	/ The position (byte count) of the last byte of the line. This*
91	normally points to the '\n' character, or to one byte after the
92	last byte of the file, if the file doesn't contain a '\n'
93	character. /*
94	size_t end_pos;
95
96	line_info (size_t l, size_t s, size_t e)
97	: line_num (l), start_pos (s), end_pos (e)
98	{}
99
100	line_info ()
101	:line_num (`0`), start_pos (`0`), end_pos (`0`)
102	{}
103	};
104
105	bool needs_read_p () const;
106	bool needs_grow_p () const;
107	void maybe_grow ();
108	bool read_data ();
109	bool maybe_read_data ();
110	bool get_next_line (char *line, ssize_t line_len);
111	bool read_next_line (char ** line, ssize_t *line_len);
112	bool goto_next_line ();
113
114	static const size_t buffer_size = `4` * `1024`;
115	static const size_t line_record_size = `100`;
116
117	/ The number of time this file has been accessed. This is used*
118	to designate which file cache to evict from the cache
119	array. /*
120	unsigned m_use_count;
121
122	/ The file_path is the key for identifying a particular file in*
123	the cache.
124	For libcpp-using code, the underlying buffer for this field is
125	owned by the corresponding _cpp_file within the cpp_reader. /*
126	const char *m_file_path;
127
128	FILE *m_fp;
129
130	/ This points to the content of the file that we've read so*
131	far. /*
132	char *m_data;
133
134	/ The allocated buffer to be freed may start a little earlier than DATA,*
135	e.g. if a UTF8 BOM was skipped at the beginning. /*
136	int m_alloc_offset;
137
138	/ The size of the DATA array above./
139	size_t m_size;
140
141	/ The number of bytes read from the underlying file so far. This*
142	must be less (or equal) than SIZE above. /*
143	size_t m_nb_read;
144
145	/ The index of the beginning of the current line. /
146	size_t m_line_start_idx;
147
148	/ The number of the previous line read. This starts at 1. Zero*
149	means we've read no line so far. /*
150	size_t m_line_num;
151
152	/ This is the total number of lines of the current file. At the*
153	moment, we try to get this information from the line map
154	subsystem. Note that this is just a hint. When using the C++
155	front-end, this hint is correct because the input file is then
156	completely tokenized before parsing starts; so the line map knows
157	the number of lines before compilation really starts. For e.g,
158	the C front-end, it can happen that we start emitting diagnostics
159	before the line map has seen the end of the file. /*
160	size_t m_total_lines;
161
162	/ Could this file be missing a trailing newline on its final line?*
163	Initially true (to cope with empty files), set to true/false
164	as each line is read. /*
165	bool m_missing_trailing_newline;
166
167	/ This is a record of the beginning and end of the lines we've seen*
168	while reading the file. This is useful to avoid walking the data
169	from the beginning when we are asked to read a line that is
170	before LINE_START_IDX above. Note that the maximum size of this
171	record is line_record_size, so that the memory consumption
172	doesn't explode. We thus scale total_lines down to
173	line_record_size. /*
174	vec<line_info, va_heap> m_line_record;
175
176	void offset_buffer (int offset)
177	{
178	gcc_assert (offset < `0` ? m_alloc_offset + offset >= `0`
179	: (size_t) offset <= m_size);
180	gcc_assert (m_data);
181	m_alloc_offset += offset;
182	m_data += offset;
183	m_size -= offset;
184	}
185
186	};
187
188	/ Current position in real source file. /
189
190	location_t input_location = UNKNOWN_LOCATION;
191
192	class line_maps *line_table;
193
194	/ A stashed copy of "line_table" for use by selftest::line_table_test.*
195	This needs to be a global so that it can be a GC root, and thus
196	prevent the stashed copy from being garbage-collected if the GC runs
197	during a line_table_test. /*
198
199	class line_maps *saved_line_table;
200
201	/ Expand the source location LOC into a human readable location. If*
202	LOC resolves to a builtin location, the file name of the readable
203	location is set to the string "<built-in>". If EXPANSION_POINT_P is
204	TRUE and LOC is virtual, then it is resolved to the expansion
205	point of the involved macro. Otherwise, it is resolved to the
206	spelling location of the token.
207
208	When resolving to the spelling location of the token, if the
209	resulting location is for a built-in location (that is, it has no
210	associated line/column) in the context of a macro expansion, the
211	returned location is the first one (while unwinding the macro
212	location towards its expansion point) that is in real source
213	code.
214
215	ASPECT controls which part of the location to use. /*
216
217	static expanded_location
218	expand_location_1 (const line_maps *set,
219	location_t loc,
220	bool expansion_point_p,
221	enum location_aspect aspect)
222	{
223	expanded_location xloc;
224	const line_map_ordinary *map;
225	enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
226	tree block = NULL;
227
228	if (IS_ADHOC_LOC (loc))
229	{
230	block = LOCATION_BLOCK (loc);
231	loc = LOCATION_LOCUS (loc);
232	}
233
234	memset (s: &xloc, c: `0`, n: sizeof (xloc));
235
236	if (loc >= RESERVED_LOCATION_COUNT)
237	{
238	if (!expansion_point_p)
239	{
240	/ We want to resolve LOC to its spelling location.*
241
242	But if that spelling location is a reserved location that
243	appears in the context of a macro expansion (like for a
244	location for a built-in token), let's consider the first
245	location (toward the expansion point) that is not reserved;
246	that is, the first location that is in real source code. /*
247	loc = linemap_unwind_to_first_non_reserved_loc (set,
248	loc, NULL);
249	lrk = LRK_SPELLING_LOCATION;
250	}
251	loc = linemap_resolve_location (set, loc, lrk, loc_map: &map);
252
253	/ loc is now either in an ordinary map, or is a reserved location.*
254	If it is a compound location, the caret is in a spelling location,
255	but the start/finish might still be a virtual location.
256	Depending of what the caller asked for, we may need to recurse
257	one level in order to resolve any virtual locations in the
258	end-points. /*
259	switch (aspect)
260	{
261	default:
262	gcc_unreachable ();
263	/ Fall through. /
264	case LOCATION_ASPECT_CARET:
265	break;
266	case LOCATION_ASPECT_START:
267	{
268	location_t start = get_start (loc);
269	if (start != loc)
270	return expand_location_1 (set, loc: start, expansion_point_p, aspect);
271	}
272	break;
273	case LOCATION_ASPECT_FINISH:
274	{
275	location_t finish = get_finish (loc);
276	if (finish != loc)
277	return expand_location_1 (set, loc: finish, expansion_point_p, aspect);
278	}
279	break;
280	}
281	xloc = linemap_expand_location (set, map, loc);
282	}
283
284	xloc.data = block;
285	if (loc <= BUILTINS_LOCATION)
286	xloc.file = loc == UNKNOWN_LOCATION ? NULL : special_fname_builtin ();
287
288	return xloc;
289	}
290
291	/ Return the total lines number that have been read so far by the*
292	line map (in the preprocessor) so far. For languages like C++ that
293	entirely preprocess the input file before starting to parse, this
294	equals the actual number of lines of the file. /*
295
296	static size_t
297	total_lines_num (const char *file_path)
298	{
299	size_t r = `0`;
300	location_t l = `0`;
301	if (linemap_get_file_highest_location (set: line_table, file_name: file_path, loc: &l))
302	{
303	gcc_assert (l >= RESERVED_LOCATION_COUNT);
304	expanded_location xloc = expand_location (l);
305	r = xloc.line;
306	}
307	return r;
308	}
309
310	/ Lookup the cache used for the content of a given file accessed by*
311	caret diagnostic. Return the found cached file, or NULL if no
312	cached file was found. /*
313
314	file_cache_slot *
315	file_cache::lookup_file (const char *file_path)
316	{
317	gcc_assert (file_path);
318
319	/ This will contain the found cached file. /
320	file_cache_slot *r = NULL;
321	for (unsigned i = `0`; i < num_file_slots; ++i)
322	{
323	file_cache_slot *c = &m_file_slots[i];
324	if (c->get_file_path () && !strcmp (s1: c->get_file_path (), s2: file_path))
325	{
326	c->inc_use_count ();
327	r = c;
328	}
329	}
330
331	if (r)
332	r->inc_use_count ();
333
334	return r;
335	}
336
337	/ Purge any mention of FILENAME from the cache of files used for*
338	printing source code. For use in selftests when working
339	with tempfiles. /*
340
341	void
342	file_cache::forcibly_evict_file (const char *file_path)
343	{
344	gcc_assert (file_path);
345
346	file_cache_slot *r = lookup_file (file_path);
347	if (!r)
348	/ Not found. /
349	return;
350
351	r->evict ();
352	}
353
354	/ Determine if FILE_PATH missing a trailing newline on its final line.*
355	Only valid to call once all of the file has been loaded, by
356	requesting a line number beyond the end of the file. /*
357
358	bool
359	file_cache::missing_trailing_newline_p (const char *file_path)
360	{
361	gcc_assert (file_path);
362
363	file_cache_slot *r = lookup_or_add_file (file_path);
364	return r->missing_trailing_newline_p ();
365	}
366
367	void
368	file_cache_slot::evict ()
369	{
370	m_file_path = NULL;
371	if (m_fp)
372	fclose (stream: m_fp);
373	m_fp = NULL;
374	m_nb_read = `0`;
375	m_line_start_idx = `0`;
376	m_line_num = `0`;
377	m_line_record.truncate (size: `0`);
378	m_use_count = `0`;
379	m_total_lines = `0`;
380	m_missing_trailing_newline = true;
381	}
382
383	/ Return the file cache that has been less used, recently, or the*
384	first empty one. If HIGHEST_USE_COUNT is non-null,
385	*HIGHEST_USE_COUNT is set to the highest use count of the entries
386	in the cache table. /*
387
388	file_cache_slot*
389	file_cache::evicted_cache_tab_entry (unsigned *highest_use_count)
390	{
391	file_cache_slot *to_evict = &m_file_slots[`0`];
392	unsigned huc = to_evict->get_use_count ();
393	for (unsigned i = `1`; i < num_file_slots; ++i)
394	{
395	file_cache_slot *c = &m_file_slots[i];
396	bool c_is_empty = (c->get_file_path () == NULL);
397
398	if (c->get_use_count () < to_evict->get_use_count ()
399	\|\| (to_evict->get_file_path () && c_is_empty))
400	/ We evict C because it's either an entry with a lower use*
401	count or one that is empty. /*
402	to_evict = c;
403
404	if (huc < c->get_use_count ())
405	huc = c->get_use_count ();
406
407	if (c_is_empty)
408	/ We've reached the end of the cache; subsequent elements are*
409	all empty. /*
410	break;
411	}
412
413	if (highest_use_count)
414	*highest_use_count = huc;
415
416	return to_evict;
417	}
418
419	/ Create the cache used for the content of a given file to be*
420	accessed by caret diagnostic. This cache is added to an array of
421	cache and can be retrieved by lookup_file_in_cache_tab. This
422	function returns the created cache. Note that only the last
423	num_file_slots files are cached.
424
425	This can return nullptr if the FILE_PATH can't be opened for
426	reading, or if the content can't be converted to the input_charset. /*
427
428	file_cache_slot*
429	file_cache::add_file (const char *file_path)
430	{
431
432	FILE *fp = fopen (filename: file_path, modes: "r");
433	if (fp == NULL)
434	return NULL;
435
436	unsigned highest_use_count = `0`;
437	file_cache_slot *r = evicted_cache_tab_entry (highest_use_count: &highest_use_count);
438	if (!r->create (in_context, file_path, fp, highest_use_count))
439	return NULL;
440	return r;
441	}
442
443	/ Get a borrowed char_span to the full content of this file*
444	as decoded according to the input charset, encoded as UTF-8. /*
445
446	char_span
447	file_cache_slot::get_full_file_content ()
448	{
449	char *line;
450	ssize_t line_len;
451	while (get_next_line (line: &line, line_len: &line_len))
452	{
453	}
454	return char_span (m_data, m_nb_read);
455	}
456
457	/ Populate this slot for use on FILE_PATH and FP, dropping any*
458	existing cached content within it. /*
459
460	bool
461	file_cache_slot::create (const file_cache::input_context &in_context,
462	const char file_path, FILE fp,
463	unsigned highest_use_count)
464	{
465	m_file_path = file_path;
466	if (m_fp)
467	fclose (stream: m_fp);
468	m_fp = fp;
469	if (m_alloc_offset)
470	offset_buffer (offset: -m_alloc_offset);
471	m_nb_read = `0`;
472	m_line_start_idx = `0`;
473	m_line_num = `0`;
474	m_line_record.truncate (size: `0`);
475	/ Ensure that this cache entry doesn't get evicted next time*
476	add_file_to_cache_tab is called. /*
477	m_use_count = ++highest_use_count;
478	m_total_lines = total_lines_num (file_path);
479	m_missing_trailing_newline = true;
480
481
482	/ Check the input configuration to determine if we need to do any*
483	transformations, such as charset conversion or BOM skipping. /*
484	if (const char *input_charset = in_context.ccb (file_path))
485	{
486	/ Need a full-blown conversion of the input charset. /
487	fclose (stream: m_fp);
488	m_fp = NULL;
489	const cpp_converted_source cs
490	= cpp_get_converted_source (fname: file_path, input_charset);
491	if (!cs.data)
492	return false;
493	if (m_data)
494	XDELETEVEC (m_data);
495	m_data = cs.data;
496	m_nb_read = m_size = cs.len;
497	m_alloc_offset = cs.data - cs.to_free;
498	}
499	else if (in_context.should_skip_bom)
500	{
501	if (read_data ())
502	{
503	const int offset = cpp_check_utf8_bom (data: m_data, data_length: m_nb_read);
504	offset_buffer (offset);
505	m_nb_read -= offset;
506	}
507	}
508
509	return true;
510	}
511
512	/ file_cache's ctor. /
513
514	file_cache::file_cache ()
515	: m_file_slots (new file_cache_slot[num_file_slots])
516	{
517	initialize_input_context (ccb: nullptr, should_skip_bom: false);
518	}
519
520	/ file_cache's dtor. /
521
522	file_cache::~file_cache ()
523	{
524	delete[] m_file_slots;
525	}
526
527	/ Lookup the cache used for the content of a given file accessed by*
528	caret diagnostic. If no cached file was found, create a new cache
529	for this file, add it to the array of cached file and return
530	it.
531
532	This can return nullptr on a cache miss if FILE_PATH can't be opened for
533	reading, or if the content can't be converted to the input_charset. /*
534
535	file_cache_slot*
536	file_cache::lookup_or_add_file (const char *file_path)
537	{
538	file_cache_slot *r = lookup_file (file_path);
539	if (r == NULL)
540	r = add_file (file_path);
541	return r;
542	}
543
544	/ Default constructor for a cache of file used by caret*
545	diagnostic. /*
546
547	file_cache_slot::file_cache_slot ()
548	: m_use_count (`0`), m_file_path (NULL), m_fp (NULL), m_data (`0`),
549	m_alloc_offset (`0`), m_size (`0`), m_nb_read (`0`), m_line_start_idx (`0`),
550	m_line_num (`0`), m_total_lines (`0`), m_missing_trailing_newline (true)
551	{
552	m_line_record.create (nelems: `0`);
553	}
554
555	/ Destructor for a cache of file used by caret diagnostic. /
556
557	file_cache_slot::~file_cache_slot ()
558	{
559	if (m_fp)
560	{
561	fclose (stream: m_fp);
562	m_fp = NULL;
563	}
564	if (m_data)
565	{
566	offset_buffer (offset: -m_alloc_offset);
567	XDELETEVEC (m_data);
568	m_data = `0`;
569	}
570	m_line_record.release ();
571	}
572
573	/ Returns TRUE iff the cache would need to be filled with data coming*
574	from the file. That is, either the cache is empty or full or the
575	current line is empty. Note that if the cache is full, it would
576	need to be extended and filled again. /*
577
578	bool
579	file_cache_slot::needs_read_p () const
580	{
581	return m_fp && (m_nb_read == `0`
582	\|\| m_nb_read == m_size
583	\|\| (m_line_start_idx >= m_nb_read - `1`));
584	}
585
586	/ Return TRUE iff the cache is full and thus needs to be*
587	extended. /*
588
589	bool
590	file_cache_slot::needs_grow_p () const
591	{
592	return m_nb_read == m_size;
593	}
594
595	/ Grow the cache if it needs to be extended. /
596
597	void
598	file_cache_slot::maybe_grow ()
599	{
600	if (!needs_grow_p ())
601	return;
602
603	if (!m_data)
604	{
605	gcc_assert (m_size == `0` && m_alloc_offset == `0`);
606	m_size = buffer_size;
607	m_data = XNEWVEC (char, m_size);
608	}
609	else
610	{
611	const int offset = m_alloc_offset;
612	offset_buffer (offset: -offset);
613	m_size *= `2`;
614	m_data = XRESIZEVEC (char, m_data, m_size);
615	offset_buffer (offset);
616	}
617	}
618
619	/ Read more data into the cache. Extends the cache if need be.*
620	Returns TRUE iff new data could be read. /*
621
622	bool
623	file_cache_slot::read_data ()
624	{
625	if (feof (stream: m_fp) \|\| ferror (stream: m_fp))
626	return false;
627
628	maybe_grow ();
629
630	char * from = m_data + m_nb_read;
631	size_t to_read = m_size - m_nb_read;
632	size_t nb_read = fread (ptr: from, size: `1`, n: to_read, stream: m_fp);
633
634	if (ferror (stream: m_fp))
635	return false;
636
637	m_nb_read += nb_read;
638	return !!nb_read;
639	}
640
641	/ Read new data iff the cache needs to be filled with more data*
642	coming from the file FP. Return TRUE iff the cache was filled with
643	mode data. /*
644
645	bool
646	file_cache_slot::maybe_read_data ()
647	{
648	if (!needs_read_p ())
649	return false;
650	return read_data ();
651	}
652
653	/ Helper function for file_cache_slot::get_next_line (), to find the end of*
654	the next line. Returns with the memchr convention, i.e. nullptr if a line
655	terminator was not found. We need to determine line endings in the same
656	manner that libcpp does: any of \n, \r\n, or \r is a line ending. /*
657
658	static char *
659	find_end_of_line (char *s, size_t len)
660	{
661	for (const auto end = s + len; s != end; ++s)
662	{
663	if (*s == `'\n'`)
664	return s;
665	if (*s == `'\r'`)
666	{
667	const auto next = s + `1`;
668	if (next == end)
669	{
670	/ Don't find the line ending if \r is the very last character*
671	in the buffer; we do not know if it's the end of the file or
672	just the end of what has been read so far, and we wouldn't
673	want to break in the middle of what's actually a \r\n
674	sequence. Instead, we will handle the case of a file ending
675	in a \r later. /*
676	break;
677	}
678	return (*next == `'\n'` ? next : s);
679	}
680	}
681	return nullptr;
682	}
683
684	/ Read a new line from file FP, using C as a cache for the data*
685	coming from the file. Upon successful completion, LINE is set to*
686	the beginning of the line found. LINE points directly in the*
687	line cache and is only valid until the next call of get_next_line.
688	*LINE_LEN is set to the length of the line. Note that the line
689	does not contain any terminal delimiter. This function returns
690	true if some data was read or process from the cache, false
691	otherwise. Note that subsequent calls to get_next_line might
692	make the content of LINE invalid. /
693
694	bool
695	file_cache_slot::get_next_line (char *line, ssize_t line_len)
696	{
697	/ Fill the cache with data to process. /
698	maybe_read_data ();
699
700	size_t remaining_size = m_nb_read - m_line_start_idx;
701	if (remaining_size == `0`)
702	/ There is no more data to process. /
703	return false;
704
705	char *line_start = m_data + m_line_start_idx;
706
707	char *next_line_start = NULL;
708	size_t len = `0`;
709	char *line_end = find_end_of_line (s: line_start, len: remaining_size);
710	if (line_end == NULL)
711	{
712	/ We haven't found an end-of-line delimiter in the cache.*
713	Fill the cache with more data from the file and look again. /*
714	while (maybe_read_data ())
715	{
716	line_start = m_data + m_line_start_idx;
717	remaining_size = m_nb_read - m_line_start_idx;
718	line_end = find_end_of_line (s: line_start, len: remaining_size);
719	if (line_end != NULL)
720	{
721	next_line_start = line_end + `1`;
722	break;
723	}
724	}
725	if (line_end == NULL)
726	{
727	/ We've loaded all the file into the cache and still no*
728	terminator. Let's say the line ends up at one byte past the
729	end of the file. This is to stay consistent with the case
730	of when the line ends up with a terminator and line_end points to
731	that. That consistency is useful below in the len calculation.
732
733	If the file ends in a \r, we didn't identify it as a line
734	terminator above, so do that now instead. /*
735	line_end = m_data + m_nb_read;
736	if (m_nb_read && line_end[-`1`] == `'\r'`)
737	{
738	--line_end;
739	m_missing_trailing_newline = false;
740	}
741	else
742	m_missing_trailing_newline = true;
743	}
744	else
745	m_missing_trailing_newline = false;
746	}
747	else
748	{
749	next_line_start = line_end + `1`;
750	m_missing_trailing_newline = false;
751	}
752
753	if (m_fp && ferror (stream: m_fp))
754	return false;
755
756	/ At this point, we've found the end of the of line. It either points to*
757	the line terminator or to one byte after the last byte of the file. /*
758	gcc_assert (line_end != NULL);
759
760	len = line_end - line_start;
761
762	if (m_line_start_idx < m_nb_read)
763	*line = line_start;
764
765	++m_line_num;
766
767	/ Before we update our line record, make sure the hint about the*
768	total number of lines of the file is correct. If it's not, then
769	we give up recording line boundaries from now on. /*
770	bool update_line_record = true;
771	if (m_line_num > m_total_lines)
772	update_line_record = false;
773
774	/ Now update our line record so that re-reading lines from the*
775	before m_line_start_idx is faster. /*
776	if (update_line_record
777	&& m_line_record.length () < line_record_size)
778	{
779	/ If the file lines fits in the line record, we just record all*
780	its lines .../*
781	if (m_total_lines <= line_record_size
782	&& m_line_num > m_line_record.length ())
783	m_line_record.safe_push
784	(obj: file_cache_slot::line_info (m_line_num,
785	m_line_start_idx,
786	line_end - m_data));
787	else if (m_total_lines > line_record_size)
788	{
789	/ ... otherwise, we just scale total_lines down to*
790	(line_record_size lines. /*
791	size_t n = (m_line_num * line_record_size) / m_total_lines;
792	if (m_line_record.length () == `0`
793	\|\| n >= m_line_record.length ())
794	m_line_record.safe_push
795	(obj: file_cache_slot::line_info (m_line_num,
796	m_line_start_idx,
797	line_end - m_data));
798	}
799	}
800
801	/ Update m_line_start_idx so that it points to the next line to be*
802	read. /*
803	if (next_line_start)
804	m_line_start_idx = next_line_start - m_data;
805	else
806	/ We didn't find any terminal '\n'. Let's consider that the end*
807	of line is the end of the data in the cache. The next
808	invocation of get_next_line will either read more data from the
809	underlying file or return false early because we've reached the
810	end of the file. /*
811	m_line_start_idx = m_nb_read;
812
813	*line_len = len;
814
815	return true;
816	}
817
818	/ Consume the next bytes coming from the cache (or from its*
819	underlying file if there are remaining unread bytes in the file)
820	until we reach the next end-of-line (or end-of-file). There is no
821	copying from the cache involved. Return TRUE upon successful
822	completion. /*
823
824	bool
825	file_cache_slot::goto_next_line ()
826	{
827	char *l;
828	ssize_t len;
829
830	return get_next_line (line: &l, line_len: &len);
831	}
832
833	/ Read an arbitrary line number LINE_NUM from the file cached in C.*
834	If the line was read successfully, LINE points to the beginning*
835	of the line in the file cache and LINE_LEN is the length of the*
836	line. LINE is not nul-terminated, but may contain zero bytes.*
837	*LINE is only valid until the next call of read_line_num.
838	This function returns bool if a line was read. /*
839
840	bool
841	file_cache_slot::read_line_num (size_t line_num,
842	char ** line, ssize_t *line_len)
843	{
844	gcc_assert (line_num > `0`);
845
846	if (line_num <= m_line_num)
847	{
848	/ We've been asked to read lines that are before m_line_num.*
849	So lets use our line record (if it's not empty) to try to
850	avoid re-reading the file from the beginning again. /*
851
852	if (m_line_record.is_empty ())
853	{
854	m_line_start_idx = `0`;
855	m_line_num = `0`;
856	}
857	else
858	{
859	file_cache_slot::line_info *i = NULL;
860	if (m_total_lines <= line_record_size)
861	{
862	/ In languages where the input file is not totally*
863	preprocessed up front, the m_total_lines hint
864	can be smaller than the number of lines of the
865	file. In that case, only the first
866	m_total_lines have been recorded.
867
868	Otherwise, the first m_total_lines we've read have
869	their start/end recorded here. /*
870	i = (line_num <= m_total_lines)
871	? &m_line_record [line_num - `1`]
872	: &m_line_record [m_total_lines - `1`];
873	gcc_assert (i->line_num <= line_num);
874	}
875	else
876	{
877	/ So the file had more lines than our line record*
878	size. Thus the number of lines we've recorded has
879	been scaled down to line_record_size. Let's
880	pick the start/end of the recorded line that is
881	closest to line_num. /*
882	size_t n = (line_num <= m_total_lines)
883	? line_num * line_record_size / m_total_lines
884	: m_line_record.length () - `1`;
885	if (n < m_line_record.length ())
886	{
887	i = &m_line_record [n];
888	gcc_assert (i->line_num <= line_num);
889	}
890	}
891
892	if (i && i->line_num == line_num)
893	{
894	/ We have the start/end of the line. /
895	*line = m_data + i->start_pos;
896	*line_len = i->end_pos - i->start_pos;
897	return true;
898	}
899
900	if (i)
901	{
902	m_line_start_idx = i->start_pos;
903	m_line_num = i->line_num - `1`;
904	}
905	else
906	{
907	m_line_start_idx = `0`;
908	m_line_num = `0`;
909	}
910	}
911	}
912
913	/ Let's walk from line m_line_num up to line_num - 1, without*
914	copying any line. /*
915	while (m_line_num < line_num - `1`)
916	if (!goto_next_line ())
917	return false;
918
919	/ The line we want is the next one. Let's read and copy it back to*
920	the caller. /*
921	return get_next_line (line, line_len);
922	}
923
924	/ Return the physical source line that corresponds to FILE_PATH/LINE.*
925	The line is not nul-terminated. The returned pointer is only
926	valid until the next call of location_get_source_line.
927	Note that the line can contain several null characters,
928	so the returned value's length has the actual length of the line.
929	If the function fails, a NULL char_span is returned. /*
930
931	char_span
932	file_cache::get_source_line (const char file_path, int* line)
933	{
934	char *buffer = NULL;
935	ssize_t len;
936
937	if (line == `0`)
938	return char_span (NULL, `0`);
939
940	if (file_path == NULL)
941	return char_span (NULL, `0`);
942
943	file_cache_slot *c = lookup_or_add_file (file_path);
944	if (c == NULL)
945	return char_span (NULL, `0`);
946
947	bool read = c->read_line_num (line_num: line, line: &buffer, line_len: &len);
948	if (!read)
949	return char_span (NULL, `0`);
950
951	return char_span (buffer, len);
952	}
953
954	/ Return a NUL-terminated copy of the source text between two locations, or*
955	NULL if the arguments are invalid. The caller is responsible for freeing
956	the return value. /*
957
958	char *
959	get_source_text_between (file_cache &fc, location_t start, location_t end)
960	{
961	expanded_location expstart =
962	expand_location_to_spelling_point (start, aspect: LOCATION_ASPECT_START);
963	expanded_location expend =
964	expand_location_to_spelling_point (end, aspect: LOCATION_ASPECT_FINISH);
965
966	/ If the locations are in different files or the end comes before the*
967	start, give up and return nothing. /*
968	if (!expstart.file \|\| !expend.file)
969	return NULL;
970	if (strcmp (s1: expstart.file, s2: expend.file) != `0`)
971	return NULL;
972	if (expstart.line > expend.line)
973	return NULL;
974	if (expstart.line == expend.line
975	&& expstart.column > expend.column)
976	return NULL;
977	/ These aren't real column numbers, give up. /
978	if (expstart.column == `0` \|\| expend.column == `0`)
979	return NULL;
980
981	/ For a single line we need to trim both edges. /
982	if (expstart.line == expend.line)
983	{
984	char_span line = fc.get_source_line (file_path: expstart.file, line: expstart.line);
985	if (line.length () < `1`)
986	return NULL;
987	int s = expstart.column - `1`;
988	int len = expend.column - s;
989	if (line.length () < (size_t)expend.column)
990	return NULL;
991	return line.subspan (offset: s, n_elts: len).xstrdup ();
992	}
993
994	struct obstack buf_obstack;
995	obstack_init (&buf_obstack);
996
997	/ Loop through all lines in the range and append each to buf; may trim*
998	parts of the start and end lines off depending on column values. /*
999	for (int lnum = expstart.line; lnum <= expend.line; ++lnum)
1000	{
1001	char_span line = fc.get_source_line (file_path: expstart.file, line: lnum);
1002	if (line.length () < `1` && (lnum != expstart.line && lnum != expend.line))
1003	continue;
1004
1005	/ For the first line in the range, only start at expstart.column /
1006	if (lnum == expstart.line)
1007	{
1008	unsigned off = expstart.column - `1`;
1009	if (line.length () < off)
1010	return NULL;
1011	line = line.subspan (offset: off, n_elts: line.length() - off);
1012	}
1013	/ For the last line, don't go past expend.column /
1014	else if (lnum == expend.line)
1015	{
1016	if (line.length () < (size_t)expend.column)
1017	return NULL;
1018	line = line.subspan (offset: `0`, n_elts: expend.column);
1019	}
1020
1021	/ Combine spaces at the beginning of later lines. /
1022	if (lnum > expstart.line)
1023	{
1024	unsigned off;
1025	for (off = `0`; off < line.length(); ++off)
1026	if (line [off] != `' '` && line [off] != `'\t'`)
1027	break;
1028	if (off > `0`)
1029	{
1030	obstack_1grow (&buf_obstack, `' '`);
1031	line = line.subspan (offset: off, n_elts: line.length() - off);
1032	}
1033	}
1034
1035	/ This does not include any trailing newlines. /
1036	obstack_grow (&buf_obstack, line.get_buffer (), line.length ());
1037	}
1038
1039	/ NUL-terminate and finish the buf obstack. /
1040	obstack_1grow (&buf_obstack, `0`);
1041	const char buf = (const* char *) obstack_finish (&buf_obstack);
1042
1043	return xstrdup (buf);
1044	}
1045
1046
1047	char_span
1048	file_cache::get_source_file_content (const char *file_path)
1049	{
1050	file_cache_slot *c = lookup_or_add_file (file_path);
1051	if (c == nullptr)
1052	return char_span (nullptr, `0`);
1053	return c->get_full_file_content ();
1054	}
1055
1056	/ Test if the location originates from the spelling location of a*
1057	builtin-tokens. That is, return TRUE if LOC is a (possibly
1058	virtual) location of a built-in token that appears in the expansion
1059	list of a macro. Please note that this function also works on
1060	tokens that result from built-in tokens. For instance, the
1061	function would return true if passed a token "4" that is the result
1062	of the expansion of the built-in __LINE__ macro. /*
1063	bool
1064	is_location_from_builtin_token (location_t loc)
1065	{
1066	const line_map_ordinary *map = NULL;
1067	loc = linemap_resolve_location (line_table, loc,
1068	lrk: LRK_SPELLING_LOCATION, loc_map: &map);
1069	return loc == BUILTINS_LOCATION;
1070	}
1071
1072	/ Expand the source location LOC into a human readable location. If*
1073	LOC is virtual, it resolves to the expansion point of the involved
1074	macro. If LOC resolves to a builtin location, the file name of the
1075	readable location is set to the string "<built-in>". /*
1076
1077	expanded_location
1078	expand_location (location_t loc)
1079	{
1080	return expand_location_1 (set: line_table, loc, /expansion_point_p=/true,
1081	aspect: LOCATION_ASPECT_CARET);
1082	}
1083
1084	/ Expand the source location LOC into a human readable location. If*
1085	LOC is virtual, it resolves to the expansion location of the
1086	relevant macro. If LOC resolves to a builtin location, the file
1087	name of the readable location is set to the string
1088	"<built-in>". /*
1089
1090	expanded_location
1091	expand_location_to_spelling_point (location_t loc,
1092	enum location_aspect aspect)
1093	{
1094	return expand_location_1 (set: line_table, loc, /expansion_point_p=/false,
1095	aspect);
1096	}
1097
1098	/ The rich_location class within libcpp requires a way to expand*
1099	location_t instances, and relies on the client code
1100	providing a symbol named
1101	linemap_client_expand_location_to_spelling_point
1102	to do this.
1103
1104	This is the implementation for libcommon.a (all host binaries),
1105	which simply calls into expand_location_1. /*
1106
1107	expanded_location
1108	linemap_client_expand_location_to_spelling_point (const line_maps *set,
1109	location_t loc,
1110	enum location_aspect aspect)
1111	{
1112	return expand_location_1 (set, loc, /expansion_point_p=/false, aspect);
1113	}
1114
1115
1116	/ If LOCATION is in a system header and if it is a virtual location*
1117	for a token coming from the expansion of a macro, unwind it to
1118	the location of the expansion point of the macro. If the expansion
1119	point is also in a system header return the original LOCATION.
1120	Otherwise, return the location of the expansion point.
1121
1122	This is used for instance when we want to emit diagnostics about a
1123	token that may be located in a macro that is itself defined in a
1124	system header, for example, for the NULL macro. In such a case, if
1125	LOCATION were passed directly to diagnostic functions such as
1126	warning_at, the diagnostic would be suppressed (unless
1127	-Wsystem-headers). /*
1128
1129	location_t
1130	expansion_point_location_if_in_system_header (location_t location)
1131	{
1132	if (!in_system_header_at (loc: location))
1133	return location;
1134
1135	location_t xloc = linemap_resolve_location (line_table, loc: location,
1136	lrk: LRK_MACRO_EXPANSION_POINT,
1137	NULL);
1138	return in_system_header_at (loc: xloc) ? location : xloc;
1139	}
1140
1141	/ If LOCATION is a virtual location for a token coming from the expansion*
1142	of a macro, unwind to the location of the expansion point of the macro. /*
1143
1144	location_t
1145	expansion_point_location (location_t location)
1146	{
1147	return linemap_resolve_location (line_table, loc: location,
1148	lrk: LRK_MACRO_EXPANSION_POINT, NULL);
1149	}
1150
1151	/ Construct a location with caret at CARET, ranging from START to*
1152	FINISH.
1153
1154	For example, consider:
1155
1156	11111111112
1157	12345678901234567890
1158	522
1159	523 return foo + bar;
1160	~~~~^~~~~
1161	524
1162
1163	The location's caret is at the "+", line 523 column 15, but starts
1164	earlier, at the "f" of "foo" at column 11. The finish is at the "r"
1165	of "bar" at column 19. /*
1166
1167	location_t
1168	make_location (location_t caret, location_t start, location_t finish)
1169	{
1170	return line_table->make_location (caret, start, finish);
1171	}
1172
1173	/ Same as above, but taking a source range rather than two locations. /
1174
1175	location_t
1176	make_location (location_t caret, source_range src_range)
1177	{
1178	location_t pure_loc = get_pure_location (loc: caret);
1179	return line_table->get_or_create_combined_loc (locus: pure_loc, src_range,
1180	data: nullptr, discriminator: `0`);
1181	}
1182
1183	/ An expanded_location stores the column in byte units. This function*
1184	converts that column to display units. That requires reading the associated
1185	source line in order to calculate the display width. If that cannot be done
1186	for any reason, then returns the byte column as a fallback. /*
1187	int
1188	location_compute_display_column (file_cache &fc,
1189	expanded_location exploc,
1190	const cpp_char_column_policy &policy)
1191	{
1192	if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
1193	return exploc.column;
1194	char_span line = fc.get_source_line (file_path: exploc.file, line: exploc.line);
1195	/ If line is NULL, this function returns exploc.column which is the*
1196	desired fallback. /*
1197	return cpp_byte_column_to_display_column (data: line.get_buffer (), data_length: line.length (),
1198	column: exploc.column, policy);
1199	}
1200
1201	/ Dump statistics to stderr about the memory usage of the line_table*
1202	set of line maps. This also displays some statistics about macro
1203	expansion. /*
1204
1205	void
1206	dump_line_table_statistics (void)
1207	{
1208	struct linemap_stats s;
1209	long total_used_map_size,
1210	macro_maps_size,
1211	total_allocated_map_size;
1212
1213	memset (s: &s, c: `0`, n: sizeof (s));
1214
1215	linemap_get_statistics (line_table, &s);
1216
1217	macro_maps_size = s.macro_maps_used_size
1218	+ s.macro_maps_locations_size;
1219
1220	total_allocated_map_size = s.ordinary_maps_allocated_size
1221	+ s.macro_maps_allocated_size
1222	+ s.macro_maps_locations_size;
1223
1224	total_used_map_size = s.ordinary_maps_used_size
1225	+ s.macro_maps_used_size
1226	+ s.macro_maps_locations_size;
1227
1228	fprintf (stderr, format: "Number of expanded macros: %5ld\n",
1229	s.num_expanded_macros);
1230	if (s.num_expanded_macros != `0`)
1231	fprintf (stderr, format: "Average number of tokens per macro expansion: %5ld\n",
1232	s.num_macro_tokens / s.num_expanded_macros);
1233	fprintf (stderr,
1234	format: "\nLine Table allocations during the "
1235	"compilation process\n");
1236	fprintf (stderr, format: "Number of ordinary maps used: " PRsa (`5`) "\n",
1237	SIZE_AMOUNT (s.num_ordinary_maps_used));
1238	fprintf (stderr, format: "Ordinary map used size: " PRsa (`5`) "\n",
1239	SIZE_AMOUNT (s.ordinary_maps_used_size));
1240	fprintf (stderr, format: "Number of ordinary maps allocated: " PRsa (`5`) "\n",
1241	SIZE_AMOUNT (s.num_ordinary_maps_allocated));
1242	fprintf (stderr, format: "Ordinary maps allocated size: " PRsa (`5`) "\n",
1243	SIZE_AMOUNT (s.ordinary_maps_allocated_size));
1244	fprintf (stderr, format: "Number of macro maps used: " PRsa (`5`) "\n",
1245	SIZE_AMOUNT (s.num_macro_maps_used));
1246	fprintf (stderr, format: "Macro maps used size: " PRsa (`5`) "\n",
1247	SIZE_AMOUNT (s.macro_maps_used_size));
1248	fprintf (stderr, format: "Macro maps locations size: " PRsa (`5`) "\n",
1249	SIZE_AMOUNT (s.macro_maps_locations_size));
1250	fprintf (stderr, format: "Macro maps size: " PRsa (`5`) "\n",
1251	SIZE_AMOUNT (macro_maps_size));
1252	fprintf (stderr, format: "Duplicated maps locations size: " PRsa (`5`) "\n",
1253	SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
1254	fprintf (stderr, format: "Total allocated maps size: " PRsa (`5`) "\n",
1255	SIZE_AMOUNT (total_allocated_map_size));
1256	fprintf (stderr, format: "Total used maps size: " PRsa (`5`) "\n",
1257	SIZE_AMOUNT (total_used_map_size));
1258	fprintf (stderr, format: "Ad-hoc table size: " PRsa (`5`) "\n",
1259	SIZE_AMOUNT (s.adhoc_table_size));
1260	fprintf (stderr, format: "Ad-hoc table entries used: " PRsa (`5`) "\n",
1261	SIZE_AMOUNT (s.adhoc_table_entries_used));
1262	fprintf (stderr, format: "optimized_ranges: " PRsa (`5`) "\n",
1263	SIZE_AMOUNT (line_table->m_num_optimized_ranges));
1264	fprintf (stderr, format: "unoptimized_ranges: " PRsa (`5`) "\n",
1265	SIZE_AMOUNT (line_table->m_num_unoptimized_ranges));
1266
1267	fprintf (stderr, format: "\n");
1268	}
1269
1270	/ Get location one beyond the final location in ordinary map IDX. /
1271
1272	static location_t
1273	get_end_location (class line_maps set, unsigned* int idx)
1274	{
1275	if (idx == LINEMAPS_ORDINARY_USED (set) - `1`)
1276	return set->highest_location;
1277
1278	struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, index: idx + `1`);
1279	return MAP_START_LOCATION (map: next_map);
1280	}
1281
1282	/ Helper function for write_digit_row. /
1283
1284	static void
1285	write_digit (FILE stream, int* digit)
1286	{
1287	fputc (c: `'0'` + (digit % `10`), stream: stream);
1288	}
1289
1290	/ Helper function for dump_location_info.*
1291	Write a row of numbers to STREAM, numbering a source line,
1292	giving the units, tens, hundreds etc of the column number. /*
1293
1294	static void
1295	write_digit_row (FILE stream, int* indent,
1296	const line_map_ordinary *map,
1297	location_t loc, int max_col, int divisor)
1298	{
1299	fprintf (stream: stream, format: "%*c", indent, `' '`);
1300	fprintf (stream: stream, format: "\|");
1301	for (int column = `1`; column < max_col; column++)
1302	{
1303	location_t column_loc = loc + (column << map->m_range_bits);
1304	write_digit (stream, digit: column_loc / divisor);
1305	}
1306	fprintf (stream: stream, format: "\n");
1307	}
1308
1309	/ Write a half-closed (START) / half-open (END) interval of*
1310	location_t to STREAM. /*
1311
1312	static void
1313	dump_location_range (FILE *stream,
1314	location_t start, location_t end)
1315	{
1316	fprintf (stream: stream,
1317	format: " location_t interval: %u <= loc < %u\n",
1318	start, end);
1319	}
1320
1321	/ Write a labelled description of a half-closed (START) / half-open (END)*
1322	interval of location_t to STREAM. /*
1323
1324	static void
1325	dump_labelled_location_range (FILE *stream,
1326	const char *name,
1327	location_t start, location_t end)
1328	{
1329	fprintf (stream: stream, format: "%s\n", name);
1330	dump_location_range (stream, start, end);
1331	fprintf (stream: stream, format: "\n");
1332	}
1333
1334	/ Write a visualization of the locations in the line_table to STREAM. /
1335
1336	void
1337	dump_location_info (FILE *stream)
1338	{
1339	file_cache fc;
1340
1341	/ Visualize the reserved locations. /
1342	dump_labelled_location_range (stream, name: "RESERVED LOCATIONS",
1343	start: `0`, end: RESERVED_LOCATION_COUNT);
1344
1345	/ Visualize the ordinary line_map instances, rendering the sources. /
1346	for (unsigned int idx = `0`; idx < LINEMAPS_ORDINARY_USED (set: line_table); idx++)
1347	{
1348	location_t end_location = get_end_location (set: line_table, idx);
1349	/ half-closed: doesn't include this one. /
1350
1351	const line_map_ordinary *map
1352	= LINEMAPS_ORDINARY_MAP_AT (set: line_table, index: idx);
1353	fprintf (stream: stream, format: "ORDINARY MAP: %i\n", idx);
1354	dump_location_range (stream,
1355	start: MAP_START_LOCATION (map), end: end_location);
1356	fprintf (stream: stream, format: " file: %s\n", ORDINARY_MAP_FILE_NAME (ord_map: map));
1357	fprintf (stream: stream, format: " starting at line: %i\n",
1358	ORDINARY_MAP_STARTING_LINE_NUMBER (ord_map: map));
1359	fprintf (stream: stream, format: " column and range bits: %i\n",
1360	map->m_column_and_range_bits);
1361	fprintf (stream: stream, format: " column bits: %i\n",
1362	map->m_column_and_range_bits - map->m_range_bits);
1363	fprintf (stream: stream, format: " range bits: %i\n",
1364	map->m_range_bits);
1365	const char * reason;
1366	switch (map->reason) {
1367	case LC_ENTER:
1368	reason = "LC_ENTER";
1369	break;
1370	case LC_LEAVE:
1371	reason = "LC_LEAVE";
1372	break;
1373	case LC_RENAME:
1374	reason = "LC_RENAME";
1375	break;
1376	case LC_RENAME_VERBATIM:
1377	reason = "LC_RENAME_VERBATIM";
1378	break;
1379	case LC_ENTER_MACRO:
1380	reason = "LC_RENAME_MACRO";
1381	break;
1382	default:
1383	reason = "Unknown";
1384	}
1385	fprintf (stream: stream, format: " reason: %d (%s)\n", map->reason, reason);
1386
1387	const line_map_ordinary *includer_map
1388	= linemap_included_from_linemap (set: line_table, map);
1389	fprintf (stream: stream, format: " included from location: %d",
1390	linemap_included_from (ord_map: map));
1391	if (includer_map) {
1392	fprintf (stream: stream, format: " (in ordinary map %d)",
1393	int (includer_map - line_table->info_ordinary.maps));
1394	}
1395	fprintf (stream: stream, format: "\n");
1396
1397	/ Render the span of source lines that this "map" covers. /
1398	for (location_t loc = MAP_START_LOCATION (map);
1399	loc < end_location;
1400	loc += (`1` << map->m_range_bits) )
1401	{
1402	gcc_assert (pure_location_p (line_table, loc) );
1403
1404	expanded_location exploc
1405	= linemap_expand_location (line_table, map, loc);
1406
1407	if (exploc.column == `0`)
1408	{
1409	/ Beginning of a new source line: draw the line. /
1410
1411	char_span line_text = fc.get_source_line (file_path: exploc.file,
1412	line: exploc.line);
1413	if (!line_text)
1414	break;
1415	fprintf (stream: stream,
1416	format: "%s:%3i\|loc:%5i\|%.*s\n",
1417	exploc.file, exploc.line,
1418	loc,
1419	(int)line_text.length (), line_text.get_buffer ());
1420
1421	/ "loc" is at column 0, which means "the whole line".*
1422	Render the locations within* the line, by underlining*
1423	it, showing the location_t numeric values
1424	at each column. /*
1425	size_t max_col = (`1` << map->m_column_and_range_bits) - `1`;
1426	if (max_col > line_text.length ())
1427	max_col = line_text.length () + `1`;
1428
1429	int len_lnum = num_digits (exploc.line);
1430	if (len_lnum < `3`)
1431	len_lnum = `3`;
1432	int len_loc = num_digits (loc);
1433	if (len_loc < `5`)
1434	len_loc = `5`;
1435
1436	int indent = `6` + strlen (s: exploc.file) + len_lnum + len_loc;
1437
1438	/ Thousands. /
1439	if (end_location > `999`)
1440	write_digit_row (stream, indent, map, loc, max_col, divisor: `1000`);
1441
1442	/ Hundreds. /
1443	if (end_location > `99`)
1444	write_digit_row (stream, indent, map, loc, max_col, divisor: `100`);
1445
1446	/ Tens. /
1447	write_digit_row (stream, indent, map, loc, max_col, divisor: `10`);
1448
1449	/ Units. /
1450	write_digit_row (stream, indent, map, loc, max_col, divisor: `1`);
1451	}
1452	}
1453	fprintf (stream: stream, format: "\n");
1454	}
1455
1456	/ Visualize unallocated values. /
1457	dump_labelled_location_range (stream, name: "UNALLOCATED LOCATIONS",
1458	start: line_table->highest_location,
1459	end: LINEMAPS_MACRO_LOWEST_LOCATION (set: line_table));
1460
1461	/ Visualize the macro line_map instances, rendering the sources. /
1462	for (unsigned int i = `0`; i < LINEMAPS_MACRO_USED (set: line_table); i++)
1463	{
1464	/ Each macro map that is allocated owns location_t values*
1465	that are lower* that the one before them.*
1466	Hence it's meaningful to view them either in order of ascending
1467	source locations, or in order of ascending macro map index. /*
1468	const bool ascending_location_ts = true;
1469	unsigned int idx = (ascending_location_ts
1470	? (LINEMAPS_MACRO_USED (set: line_table) - (i + `1`))
1471	: i);
1472	const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (set: line_table, index: idx);
1473	fprintf (stream: stream, format: "MACRO %i: %s (%u tokens)\n",
1474	idx,
1475	linemap_map_get_macro_name (map),
1476	MACRO_MAP_NUM_MACRO_TOKENS (macro_map: map));
1477	dump_location_range (stream,
1478	start: map->start_location,
1479	end: (map->start_location
1480	+ MACRO_MAP_NUM_MACRO_TOKENS (macro_map: map)));
1481	inform (map->get_expansion_point_location (),
1482	"expansion point is location %i",
1483	map->get_expansion_point_location ());
1484	fprintf (stream: stream, format: " map->start_location: %u\n",
1485	map->start_location);
1486
1487	fprintf (stream: stream, format: " macro_locations:\n");
1488	for (unsigned int i = `0`; i < MACRO_MAP_NUM_MACRO_TOKENS (macro_map: map); i++)
1489	{
1490	location_t x = MACRO_MAP_LOCATIONS (macro_map: map)[`2` * i];
1491	location_t y = MACRO_MAP_LOCATIONS (macro_map: map)[(`2` * i) + `1`];
1492
1493	/ linemap_add_macro_token encodes token numbers in an expansion*
1494	by putting them after MAP_START_LOCATION. /*
1495
1496	/ I'm typically seeing 4 uninitialized entries at the end of*
1497	0xafafafaf.
1498	This appears to be due to macro.cc:replace_args
1499	adding 2 extra args for padding tokens; presumably there may
1500	be a leading and/or trailing padding token injected,
1501	each for 2 more location slots.
1502	This would explain there being up to 4 location_ts slots
1503	that may be uninitialized. /*
1504
1505	fprintf (stream: stream, format: " %u: %u, %u\n",
1506	i,
1507	x,
1508	y);
1509	if (x == y)
1510	{
1511	if (x < MAP_START_LOCATION (map))
1512	inform (x, "token %u has %<x-location == y-location == %u%>",
1513	i, x);
1514	else
1515	fprintf (stream: stream,
1516	format: "x-location == y-location == %u encodes token # %u\n",
1517	x, x - MAP_START_LOCATION (map));
1518	}
1519	else
1520	{
1521	inform (x, "token %u has %<x-location == %u%>", i, x);
1522	inform (x, "token %u has %<y-location == %u%>", i, y);
1523	}
1524	}
1525	fprintf (stream: stream, format: "\n");
1526	}
1527
1528	/ It appears that MAX_LOCATION_T itself is never assigned to a*
1529	macro map, presumably due to an off-by-one error somewhere
1530	between the logic in linemap_enter_macro and
1531	LINEMAPS_MACRO_LOWEST_LOCATION. /*
1532	dump_labelled_location_range (stream, name: "MAX_LOCATION_T",
1533	start: MAX_LOCATION_T,
1534	end: MAX_LOCATION_T + `1`);
1535
1536	/ Visualize ad-hoc values. /
1537	dump_labelled_location_range (stream, name: "AD-HOC LOCATIONS",
1538	start: MAX_LOCATION_T + `1`, UINT_MAX);
1539	}
1540
1541	/ string_concat's constructor. /
1542
1543	string_concat::string_concat (int num, location_t *locs)
1544	: m_num (num)
1545	{
1546	m_locs = ggc_vec_alloc <location_t> (c: num);
1547	for (int i = `0`; i < num; i++)
1548	m_locs[i] = locs[i];
1549	}
1550
1551	/ string_concat_db's constructor. /
1552
1553	string_concat_db::string_concat_db ()
1554	{
1555	m_table = hash_map <location_hash, string_concat *>::create_ggc (size: `64`);
1556	}
1557
1558	/ Record that a string concatenation occurred, covering NUM*
1559	string literal tokens. LOCS is an array of size NUM, containing the
1560	locations of the tokens. A copy of LOCS is taken. /*
1561
1562	void
1563	string_concat_db::record_string_concatenation (int num, location_t *locs)
1564	{
1565	gcc_assert (num > `1`);
1566	gcc_assert (locs);
1567
1568	location_t key_loc = get_key_loc (loc: locs[`0`]);
1569	/ We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values:*
1570	any data now recorded under key 'key_loc' would be overwritten by a
1571	subsequent call with the same key 'key_loc'. /*
1572	if (RESERVED_LOCATION_P (key_loc))
1573	return;
1574
1575	string_concat *concat
1576	= new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1577	m_table->put (k: key_loc, v: concat);
1578	}
1579
1580	/ Determine if LOC was the location of the initial token of a*
1581	concatenation of string literal tokens.
1582	If so, OUT_NUM is written to with the number of tokens, and*
1583	*OUT_LOCS with the location of an array of locations of the
1584	tokens, and return true. OUT_LOCS is a borrowed pointer to*
1585	storage owned by the string_concat_db.
1586	Otherwise, return false. /*
1587
1588	bool
1589	string_concat_db::get_string_concatenation (location_t loc,
1590	int *out_num,
1591	location_t **out_locs)
1592	{
1593	gcc_assert (out_num);
1594	gcc_assert (out_locs);
1595
1596	location_t key_loc = get_key_loc (loc);
1597	/ We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values; see*
1598	discussion in 'string_concat_db::record_string_concatenation'. /*
1599	if (RESERVED_LOCATION_P (key_loc))
1600	return false;
1601
1602	string_concat **concat = m_table->get (k: key_loc);
1603	if (!concat)
1604	return false;
1605
1606	out_num = (concat)->m_num;
1607	out_locs =(concat)->m_locs;
1608	return true;
1609	}
1610
1611	/ Internal function. Canonicalize LOC into a form suitable for*
1612	use as a key within the database, stripping away macro expansion,
1613	ad-hoc information, and range information, using the location of
1614	the start of LOC within an ordinary linemap. /*
1615
1616	location_t
1617	string_concat_db::get_key_loc (location_t loc)
1618	{
1619	loc = linemap_resolve_location (line_table, loc, lrk: LRK_SPELLING_LOCATION,
1620	NULL);
1621
1622	loc = get_range_from_loc (set: line_table, loc).m_start;
1623
1624	return loc;
1625	}
1626
1627	/ Helper class for use within get_substring_ranges_for_loc.*
1628	An vec of cpp_string with responsibility for releasing all of the
1629	str->text for each str in the vector. /*
1630
1631	class auto_cpp_string_vec : public auto_vec <cpp_string>
1632	{
1633	public:
1634	auto_cpp_string_vec (int alloc)
1635	: auto_vec <cpp_string> (alloc) {}
1636
1637	~auto_cpp_string_vec ()
1638	{
1639	/ Clean up the copies within this vec. /
1640	int i;
1641	cpp_string *str;
1642	FOR_EACH_VEC_ELT (*this, i, str)
1643	free (ptr: const_cast <unsigned char *> (str->text));
1644	}
1645	};
1646
1647	/ Attempt to populate RANGES with source location information on the*
1648	individual characters within the string literal found at STRLOC.
1649	If CONCATS is non-NULL, then any string literals that the token at
1650	STRLOC was concatenated with are also added to RANGES.
1651
1652	Return NULL if successful, or an error message if any errors occurred (in
1653	which case RANGES may be only partially populated and should not
1654	be used).
1655
1656	This is implemented by re-parsing the relevant source line(s). /*
1657
1658	static const char *
1659	get_substring_ranges_for_loc (cpp_reader *pfile,
1660	file_cache &fc,
1661	string_concat_db *concats,
1662	location_t strloc,
1663	enum cpp_ttype type,
1664	cpp_substring_ranges &ranges)
1665	{
1666	gcc_assert (pfile);
1667
1668	if (strloc == UNKNOWN_LOCATION)
1669	return "unknown location";
1670
1671	/ Reparsing the strings requires accurate location information.*
1672	If -ftrack-macro-expansion has been overridden from its default
1673	of 2, then we might have a location of a macro expansion point,
1674	rather than the location of the literal itself.
1675	Avoid this by requiring that we have full macro expansion tracking
1676	for substring locations to be available. /*
1677	if (cpp_get_options (pfile)->track_macro_expansion != `2`)
1678	return "track_macro_expansion != 2";
1679
1680	/ If #line or # 44 "file"-style directives are present, then there's*
1681	no guarantee that the line numbers we have can be used to locate
1682	the strings. For example, we might have a .i file with # directives
1683	pointing back to lines within a .c file, but the .c file might
1684	have been edited since the .i file was created.
1685	In such a case, the safest course is to disable on-demand substring
1686	locations. /*
1687	if (line_table->seen_line_directive)
1688	return "seen line directive";
1689
1690	/ If string concatenation has occurred at STRLOC, get the locations*
1691	of all of the literal tokens making up the compound string.
1692	Otherwise, just use STRLOC. /*
1693	int num_locs = `1`;
1694	location_t *strlocs = &strloc;
1695	if (concats)
1696	concats->get_string_concatenation (loc: strloc, out_num: &num_locs, out_locs: &strlocs);
1697
1698	auto_cpp_string_vec strs (num_locs);
1699	auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1700	for (int i = `0`; i < num_locs; i++)
1701	{
1702	/ Get range of strloc. We will use it to locate the start and finish*
1703	of the literal token within the line. /*
1704	source_range src_range = get_range_from_loc (set: line_table, loc: strlocs[i]);
1705
1706	if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (set: line_table))
1707	{
1708	/ If the string token was within a macro expansion, then we can*
1709	cope with it for the simple case where we have a single token.
1710	Otherwise, bail out. /*
1711	if (src_range.m_start != src_range.m_finish)
1712	return "macro expansion";
1713	}
1714	else
1715	{
1716	if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1717	/ If so, we can't reliably determine where the token started within*
1718	its line. /*
1719	return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1720
1721	if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1722	/ If so, we can't reliably determine where the token finished*
1723	within its line. /*
1724	return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1725	}
1726
1727	expanded_location start
1728	= expand_location_to_spelling_point (loc: src_range.m_start,
1729	aspect: LOCATION_ASPECT_START);
1730	expanded_location finish
1731	= expand_location_to_spelling_point (loc: src_range.m_finish,
1732	aspect: LOCATION_ASPECT_FINISH);
1733	if (start.file != finish.file)
1734	return "range endpoints are in different files";
1735	if (start.line != finish.line)
1736	return "range endpoints are on different lines";
1737	if (start.column > finish.column)
1738	return "range endpoints are reversed";
1739
1740	char_span line = fc.get_source_line (file_path: start.file, line: start.line);
1741	if (!line)
1742	return "unable to read source line";
1743
1744	/ Determine the location of the literal (including quotes*
1745	and leading prefix chars, such as the 'u' in a u""
1746	token). /*
1747	size_t literal_length = finish.column - start.column + `1`;
1748
1749	/ Ensure that we don't crash if we got the wrong location. /
1750	if (start.column < `1`)
1751	return "zero start column";
1752	if (line.length () < (start.column - `1` + literal_length))
1753	return "line is not wide enough";
1754
1755	char_span literal = line.subspan (offset: start.column - `1`, n_elts: literal_length);
1756
1757	cpp_string from;
1758	from.len = literal_length;
1759	/ Make a copy of the literal, to avoid having to rely on*
1760	the lifetime of the copy of the line within the cache.
1761	This will be released by the auto_cpp_string_vec dtor. /*
1762	from.text = (unsigned char *)literal.xstrdup ();
1763	strs.safe_push (obj: from);
1764
1765	/ For very long lines, a new linemap could have started*
1766	halfway through the token.
1767	Ensure that the loc_reader uses the linemap of the
1768	end of the token for its start location. */
1769	const line_map_ordinary *start_ord_map;
1770	linemap_resolve_location (line_table, loc: src_range.m_start,
1771	lrk: LRK_SPELLING_LOCATION, loc_map: &start_ord_map);
1772	const line_map_ordinary *final_ord_map;
1773	linemap_resolve_location (line_table, loc: src_range.m_finish,
1774	lrk: LRK_SPELLING_LOCATION, loc_map: &final_ord_map);
1775	if (start_ord_map == NULL \|\| final_ord_map == NULL)
1776	return "failed to get ordinary maps";
1777	/ Bulletproofing. We ought to only have different ordinary maps*
1778	for start vs finish due to line-length jumps. /*
1779	if (start_ord_map != final_ord_map
1780	&& start_ord_map->to_file != final_ord_map->to_file)
1781	return "start and finish are spelled in different ordinary maps";
1782	/ The file from linemap_resolve_location ought to match that from*
1783	expand_location_to_spelling_point. /*
1784	if (start_ord_map->to_file != start.file)
1785	return "mismatching file after resolving linemap";
1786
1787	location_t start_loc
1788	= linemap_position_for_line_and_column (set: line_table, final_ord_map,
1789	start.line, start.column);
1790
1791	cpp_string_location_reader loc_reader (start_loc, line_table);
1792	loc_readers.safe_push (obj: loc_reader);
1793	}
1794
1795	/ Rerun cpp_interpret_string, or rather, a modified version of it. /
1796	const char *err = cpp_interpret_string_ranges (pfile, from: strs.address (),
1797	loc_readers.address (),
1798	count: num_locs, out: &ranges, type);
1799	if (err)
1800	return err;
1801
1802	/ Success: "ranges" should now contain information on the string. /
1803	return NULL;
1804	}
1805
1806	/ Attempt to populate OUT_LOC with source location information on the
1807	given characters within the string literal found at STRLOC.
1808	CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1809	character set.
1810
1811	For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1812	and string literal "012345\n789"
1813	*OUT_LOC is written to with:
1814	"012345\n789"
1815	~^~~~~
1816
1817	If CONCATS is non-NULL, then any string literals that the token at
1818	STRLOC was concatenated with are also considered.
1819
1820	This is implemented by re-parsing the relevant source line(s).
1821
1822	Return NULL if successful, or an error message if any errors occurred.
1823	Error messages are intended for GCC developers (to help debugging) rather
1824	than for end-users. /*
1825
1826	const char *
1827	get_location_within_string (cpp_reader *pfile,
1828	file_cache &fc,
1829	string_concat_db *concats,
1830	location_t strloc,
1831	enum cpp_ttype type,
1832	int caret_idx, int start_idx, int end_idx,
1833	location_t *out_loc)
1834	{
1835	gcc_checking_assert (caret_idx >= `0`);
1836	gcc_checking_assert (start_idx >= `0`);
1837	gcc_checking_assert (end_idx >= `0`);
1838	gcc_assert (out_loc);
1839
1840	cpp_substring_ranges ranges;
1841	const char *err
1842	= get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges);
1843	if (err)
1844	return err;
1845
1846	if (caret_idx >= ranges.get_num_ranges ())
1847	return "caret_idx out of range";
1848	if (start_idx >= ranges.get_num_ranges ())
1849	return "start_idx out of range";
1850	if (end_idx >= ranges.get_num_ranges ())
1851	return "end_idx out of range";
1852
1853	*out_loc = make_location (caret: ranges.get_range (idx: caret_idx).m_start,
1854	start: ranges.get_range (idx: start_idx).m_start,
1855	finish: ranges.get_range (idx: end_idx).m_finish);
1856	return NULL;
1857	}
1858
1859	/ Associate the DISCRIMINATOR with LOCUS, and return a new locus. /
1860
1861	location_t
1862	location_with_discriminator (location_t locus, int discriminator)
1863	{
1864	tree block = LOCATION_BLOCK (locus);
1865	source_range src_range = get_range_from_loc (set: line_table, loc: locus);
1866	locus = get_pure_location (loc: locus);
1867
1868	if (locus == UNKNOWN_LOCATION)
1869	return locus;
1870
1871	return line_table->get_or_create_combined_loc (locus, src_range, data: block,
1872	discriminator);
1873	}
1874
1875	/ Return TRUE if LOCUS represents a location with a discriminator. /
1876
1877	bool
1878	has_discriminator (location_t locus)
1879	{
1880	return get_discriminator_from_loc (locus) != `0`;
1881	}
1882
1883	/ Return the discriminator for LOCUS. /
1884
1885	int
1886	get_discriminator_from_loc (location_t locus)
1887	{
1888	return get_discriminator_from_loc (set: line_table, loc: locus);
1889	}
1890
1891	#if CHECKING_P
1892
1893	namespace selftest {
1894
1895	/ Selftests of location handling. /
1896
1897	/ Attempt to populate OUT_RANGE with source location information on the
1898	given character within the string literal found at STRLOC.
1899	CHAR_IDX refers to an offset within the execution character set.
1900	If CONCATS is non-NULL, then any string literals that the token at
1901	STRLOC was concatenated with are also considered.
1902
1903	This is implemented by re-parsing the relevant source line(s).
1904
1905	Return NULL if successful, or an error message if any errors occurred.
1906	Error messages are intended for GCC developers (to help debugging) rather
1907	than for end-users. /*
1908
1909	static const char *
1910	get_source_range_for_char (cpp_reader *pfile,
1911	file_cache &fc,
1912	string_concat_db *concats,
1913	location_t strloc,
1914	enum cpp_ttype type,
1915	int char_idx,
1916	source_range *out_range)
1917	{
1918	gcc_checking_assert (char_idx >= `0`);
1919	gcc_assert (out_range);
1920
1921	cpp_substring_ranges ranges;
1922	const char *err
1923	= get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges);
1924	if (err)
1925	return err;
1926
1927	if (char_idx >= ranges.get_num_ranges ())
1928	return "char_idx out of range";
1929
1930	*out_range = ranges.get_range (idx: char_idx);
1931	return NULL;
1932	}
1933
1934	/ As get_source_range_for_char, but write to OUT the number
1935	of ranges that are available. /*
1936
1937	static const char *
1938	get_num_source_ranges_for_substring (cpp_reader *pfile,
1939	file_cache &fc,
1940	string_concat_db *concats,
1941	location_t strloc,
1942	enum cpp_ttype type,
1943	int *out)
1944	{
1945	gcc_assert (out);
1946
1947	cpp_substring_ranges ranges;
1948	const char *err
1949	= get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges);
1950
1951	if (err)
1952	return err;
1953
1954	*out = ranges.get_num_ranges ();
1955	return NULL;
1956	}
1957
1958	/ Selftests of location handling. /
1959
1960	/ Verify that compare() on linenum_type handles comparisons over the full*
1961	range of the type. /*
1962
1963	static void
1964	test_linenum_comparisons ()
1965	{
1966	linenum_type min_line (`0`);
1967	linenum_type max_line (`0xffffffff`);
1968	ASSERT_EQ (`0`, compare (min_line, min_line));
1969	ASSERT_EQ (`0`, compare (max_line, max_line));
1970
1971	ASSERT_GT (compare (max_line, min_line), `0`);
1972	ASSERT_LT (compare (min_line, max_line), `0`);
1973	}
1974
1975	/ Helper function for verifying location data: when location_t*
1976	values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1977	as having column 0. /*
1978
1979	static bool
1980	should_have_column_data_p (location_t loc)
1981	{
1982	if (IS_ADHOC_LOC (loc))
1983	loc = get_location_from_adhoc_loc (line_table, loc);
1984	if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1985	return false;
1986	return true;
1987	}
1988
1989	/ Selftest for should_have_column_data_p. /
1990
1991	static void
1992	test_should_have_column_data_p ()
1993	{
1994	ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1995	ASSERT_TRUE
1996	(should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1997	ASSERT_FALSE
1998	(should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + `1`));
1999	}
2000
2001	/ Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN*
2002	on LOC. /*
2003
2004	static void
2005	assert_loceq (const char exp_filename, int* exp_linenum, int exp_colnum,
2006	location_t loc)
2007	{
2008	ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
2009	ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
2010	/ If location_t values are sufficiently high, then column numbers*
2011	will be unavailable and LOCATION_COLUMN (loc) will be 0.
2012	When close to the threshold, column numbers may* be present: if*
2013	the final linemap before the threshold contains a line that straddles
2014	the threshold, locations in that line have column information. /*
2015	if (should_have_column_data_p (loc))
2016	ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
2017	}
2018
2019	/ Various selftests involve constructing a line table and one or more*
2020	line maps within it.
2021
2022	For maximum test coverage we want to run these tests with a variety
2023	of situations:
2024	- line_table->default_range_bits: some frontends use a non-zero value
2025	and others use zero
2026	- the fallback modes within line-map.cc: there are various threshold
2027	values for location_t beyond line-map.cc changes
2028	behavior (disabling of the range-packing optimization, disabling
2029	of column-tracking). We can exercise these by starting the line_table
2030	at interesting values at or near these thresholds.
2031
2032	The following struct describes a particular case within our test
2033	matrix. /*
2034
2035	class line_table_case
2036	{
2037	public:
2038	line_table_case (int default_range_bits, int base_location)
2039	: m_default_range_bits (default_range_bits),
2040	m_base_location (base_location)
2041	{}
2042
2043	int m_default_range_bits;
2044	int m_base_location;
2045	};
2046
2047	/ Constructor. Store the old value of line_table, and create a new*
2048	one, using sane defaults. /*
2049
2050	line_table_test::line_table_test ()
2051	{
2052	gcc_assert (saved_line_table == NULL);
2053	saved_line_table = line_table;
2054	line_table = ggc_alloc<line_maps> ();
2055	linemap_init (set: line_table, BUILTINS_LOCATION);
2056	gcc_assert (saved_line_table->m_reallocator);
2057	line_table->m_reallocator = saved_line_table->m_reallocator;
2058	gcc_assert (saved_line_table->m_round_alloc_size);
2059	line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size;
2060	line_table->default_range_bits = `0`;
2061	}
2062
2063	/ Constructor. Store the old value of line_table, and create a new*
2064	one, using the sitation described in CASE_. /*
2065
2066	line_table_test::line_table_test (const line_table_case &case_)
2067	{
2068	gcc_assert (saved_line_table == NULL);
2069	saved_line_table = line_table;
2070	line_table = ggc_alloc<line_maps> ();
2071	linemap_init (set: line_table, BUILTINS_LOCATION);
2072	gcc_assert (saved_line_table->m_reallocator);
2073	line_table->m_reallocator = saved_line_table->m_reallocator;
2074	gcc_assert (saved_line_table->m_round_alloc_size);
2075	line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size;
2076	line_table->default_range_bits = case_.m_default_range_bits;
2077	if (case_.m_base_location)
2078	{
2079	line_table->highest_location = case_.m_base_location;
2080	line_table->highest_line = case_.m_base_location;
2081	}
2082	}
2083
2084	/ Destructor. Restore the old value of line_table. /
2085
2086	line_table_test::~line_table_test ()
2087	{
2088	gcc_assert (saved_line_table != NULL);
2089	line_table = saved_line_table;
2090	saved_line_table = NULL;
2091	}
2092
2093	/ Verify basic operation of ordinary linemaps. /
2094
2095	static void
2096	test_accessing_ordinary_linemaps (const line_table_case &case_)
2097	{
2098	line_table_test ltt (case_);
2099
2100	/ Build a simple linemap describing some locations. /
2101	linemap_add (line_table, LC_ENTER, sysp: false, to_file: "foo.c", to_line: `0`);
2102
2103	linemap_line_start (set: line_table, to_line: `1`, max_column_hint: `100`);
2104	location_t loc_a = linemap_position_for_column (line_table, `1`);
2105	location_t loc_b = linemap_position_for_column (line_table, `23`);
2106
2107	linemap_line_start (set: line_table, to_line: `2`, max_column_hint: `100`);
2108	location_t loc_c = linemap_position_for_column (line_table, `1`);
2109	location_t loc_d = linemap_position_for_column (line_table, `17`);
2110
2111	/ Example of a very long line. /
2112	linemap_line_start (set: line_table, to_line: `3`, max_column_hint: `2000`);
2113	location_t loc_e = linemap_position_for_column (line_table, `700`);
2114
2115	/ Transitioning back to a short line. /
2116	linemap_line_start (set: line_table, to_line: `4`, max_column_hint: `0`);
2117	location_t loc_back_to_short = linemap_position_for_column (line_table, `100`);
2118
2119	if (should_have_column_data_p (loc: loc_back_to_short))
2120	{
2121	/ Verify that we switched to short lines in the linemap. /
2122	line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (set: line_table);
2123	ASSERT_EQ (`7`, map->m_column_and_range_bits - map->m_range_bits);
2124	}
2125
2126	/ Example of a line that will eventually be seen to be longer*
2127	than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
2128	below that. /*
2129	linemap_line_start (set: line_table, to_line: `5`, max_column_hint: `2000`);
2130
2131	location_t loc_start_of_very_long_line
2132	= linemap_position_for_column (line_table, `2000`);
2133	location_t loc_too_wide
2134	= linemap_position_for_column (line_table, `4097`);
2135	location_t loc_too_wide_2
2136	= linemap_position_for_column (line_table, `4098`);
2137
2138	/ ...and back to a sane line length. /
2139	linemap_line_start (set: line_table, to_line: `6`, max_column_hint: `100`);
2140	location_t loc_sane_again = linemap_position_for_column (line_table, `10`);
2141
2142	linemap_add (line_table, LC_LEAVE, sysp: false, NULL, to_line: `0`);
2143
2144	/ Multiple files. /
2145	linemap_add (line_table, LC_ENTER, sysp: false, to_file: "bar.c", to_line: `0`);
2146	linemap_line_start (set: line_table, to_line: `1`, max_column_hint: `200`);
2147	location_t loc_f = linemap_position_for_column (line_table, `150`);
2148	linemap_add (line_table, LC_LEAVE, sysp: false, NULL, to_line: `0`);
2149
2150	/ Verify that we can recover the location info. /
2151	assert_loceq (exp_filename: "foo.c", exp_linenum: `1`, exp_colnum: `1`, loc: loc_a);
2152	assert_loceq (exp_filename: "foo.c", exp_linenum: `1`, exp_colnum: `23`, loc: loc_b);
2153	assert_loceq (exp_filename: "foo.c", exp_linenum: `2`, exp_colnum: `1`, loc: loc_c);
2154	assert_loceq (exp_filename: "foo.c", exp_linenum: `2`, exp_colnum: `17`, loc: loc_d);
2155	assert_loceq (exp_filename: "foo.c", exp_linenum: `3`, exp_colnum: `700`, loc: loc_e);
2156	assert_loceq (exp_filename: "foo.c", exp_linenum: `4`, exp_colnum: `100`, loc: loc_back_to_short);
2157
2158	/ In the very wide line, the initial location should be fully tracked. /
2159	assert_loceq (exp_filename: "foo.c", exp_linenum: `5`, exp_colnum: `2000`, loc: loc_start_of_very_long_line);
2160	/ ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should*
2161	be disabled. /*
2162	assert_loceq (exp_filename: "foo.c", exp_linenum: `5`, exp_colnum: `0`, loc: loc_too_wide);
2163	assert_loceq (exp_filename: "foo.c", exp_linenum: `5`, exp_colnum: `0`, loc: loc_too_wide_2);
2164	/...and column-tracking should be re-enabled for subsequent lines. /
2165	assert_loceq (exp_filename: "foo.c", exp_linenum: `6`, exp_colnum: `10`, loc: loc_sane_again);
2166
2167	assert_loceq (exp_filename: "bar.c", exp_linenum: `1`, exp_colnum: `150`, loc: loc_f);
2168
2169	ASSERT_FALSE (is_location_from_builtin_token (loc_a));
2170	ASSERT_TRUE (pure_location_p (line_table, loc_a));
2171
2172	/ Verify using make_location to build a range, and extracting data*
2173	back from it. /*
2174	location_t range_c_b_d = make_location (caret: loc_c, start: loc_b, finish: loc_d);
2175	ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
2176	ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
2177	source_range src_range = get_range_from_loc (set: line_table, loc: range_c_b_d);
2178	ASSERT_EQ (loc_b, src_range.m_start);
2179	ASSERT_EQ (loc_d, src_range.m_finish);
2180	}
2181
2182	/ Verify various properties of UNKNOWN_LOCATION. /
2183
2184	static void
2185	test_unknown_location ()
2186	{
2187	ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
2188	ASSERT_EQ (`0`, LOCATION_LINE (UNKNOWN_LOCATION));
2189	ASSERT_EQ (`0`, LOCATION_COLUMN (UNKNOWN_LOCATION));
2190	}
2191
2192	/ Verify various properties of BUILTINS_LOCATION. /
2193
2194	static void
2195	test_builtins ()
2196	{
2197	assert_loceq (exp_filename: special_fname_builtin (), exp_linenum: `0`, exp_colnum: `0`, BUILTINS_LOCATION);
2198	ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
2199	}
2200
2201	/ Regression test for make_location.*
2202	Ensure that we use pure locations for the start/finish of the range,
2203	rather than storing a packed or ad-hoc range as the start/finish. /*
2204
2205	static void
2206	test_make_location_nonpure_range_endpoints (const line_table_case &case_)
2207	{
2208	/ Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c*
2209	with C++ frontend.
2210	....................0000000001111111111222.
2211	....................1234567890123456789012. /*
2212	const char *content = " r += !aaa == bbb;\n";
2213	temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
2214	line_table_test ltt (case_);
2215	linemap_add (line_table, LC_ENTER, sysp: false, to_file: tmp.get_filename (), to_line: `1`);
2216
2217	const location_t c11 = linemap_position_for_column (line_table, `11`);
2218	const location_t c12 = linemap_position_for_column (line_table, `12`);
2219	const location_t c13 = linemap_position_for_column (line_table, `13`);
2220	const location_t c14 = linemap_position_for_column (line_table, `14`);
2221	const location_t c21 = linemap_position_for_column (line_table, `21`);
2222
2223	if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
2224	return;
2225
2226	/ Use column 13 for the caret location, arbitrarily, to verify that we*
2227	handle start != caret. /*
2228	const location_t aaa = make_location (caret: c13, start: c12, finish: c14);
2229	ASSERT_EQ (c13, get_pure_location (aaa));
2230	ASSERT_EQ (c12, get_start (aaa));
2231	ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
2232	ASSERT_EQ (c14, get_finish (aaa));
2233	ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
2234
2235	/ Make a location using a location with a range as the start-point. /
2236	const location_t not_aaa = make_location (caret: c11, start: aaa, finish: c14);
2237	ASSERT_EQ (c11, get_pure_location (not_aaa));
2238	/ It should use the start location of the range, not store the range*
2239	itself. /*
2240	ASSERT_EQ (c12, get_start (not_aaa));
2241	ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
2242	ASSERT_EQ (c14, get_finish (not_aaa));
2243	ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
2244
2245	/ Similarly, make a location with a range as the end-point. /
2246	const location_t aaa_eq_bbb = make_location (caret: c12, start: c12, finish: c21);
2247	ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
2248	ASSERT_EQ (c12, get_start (aaa_eq_bbb));
2249	ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
2250	ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
2251	ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
2252	const location_t not_aaa_eq_bbb = make_location (caret: c11, start: c12, finish: aaa_eq_bbb);
2253	/ It should use the finish location of the range, not store the range*
2254	itself. /*
2255	ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
2256	ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
2257	ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
2258	ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
2259	ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
2260	}
2261
2262	/ Verify reading of input files (e.g. for caret-based diagnostics). /
2263
2264	static void
2265	test_reading_source_line ()
2266	{
2267	/ Create a tempfile and write some text to it. /
2268	temp_source_file tmp (SELFTEST_LOCATION, ".txt",
2269	"01234567890123456789\n"
2270	"This is the test text\n"
2271	"This is the 3rd line");
2272	file_cache fc;
2273
2274	/ Read back a specific line from the tempfile. /
2275	char_span source_line = fc.get_source_line (file_path: tmp.get_filename (), line: `3`);
2276	ASSERT_TRUE (source_line);
2277	ASSERT_TRUE (source_line.get_buffer () != NULL);
2278	ASSERT_EQ (`20`, source_line.length ());
2279	ASSERT_TRUE (!strncmp ("This is the 3rd line",
2280	source_line.get_buffer (), source_line.length ()));
2281
2282	source_line = fc.get_source_line (file_path: tmp.get_filename (), line: `2`);
2283	ASSERT_TRUE (source_line);
2284	ASSERT_TRUE (source_line.get_buffer () != NULL);
2285	ASSERT_EQ (`21`, source_line.length ());
2286	ASSERT_TRUE (!strncmp ("This is the test text",
2287	source_line.get_buffer (), source_line.length ()));
2288
2289	source_line = fc.get_source_line (file_path: tmp.get_filename (), line: `4`);
2290	ASSERT_FALSE (source_line);
2291	ASSERT_TRUE (source_line.get_buffer () == NULL);
2292	}
2293
2294	/ Tests of lexing. /
2295
2296	/ Verify that token TOK from PARSER has cpp_token_as_text*
2297	equal to EXPECTED_TEXT. /*
2298
2299	#define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
2300	SELFTEST_BEGIN_STMT \
2301	unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
2302	ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
2303	SELFTEST_END_STMT
2304
2305	/ Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,*
2306	and ranges from EXP_START_COL to EXP_FINISH_COL.
2307	Use LOC as the effective location of the selftest. /*
2308
2309	static void
2310	assert_token_loc_eq (const location &loc,
2311	const cpp_token *tok,
2312	const char exp_filename, int* exp_linenum,
2313	int exp_start_col, int exp_finish_col)
2314	{
2315	location_t tok_loc = tok->src_loc;
2316	ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
2317	ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
2318
2319	/ If location_t values are sufficiently high, then column numbers*
2320	will be unavailable. /*
2321	if (!should_have_column_data_p (loc: tok_loc))
2322	return;
2323
2324	ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
2325	source_range tok_range = get_range_from_loc (set: line_table, loc: tok_loc);
2326	ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
2327	ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
2328	}
2329
2330	/ Use assert_token_loc_eq to verify the TOK->src_loc, using*
2331	SELFTEST_LOCATION as the effective location of the selftest. /*
2332
2333	#define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
2334	EXP_START_COL, EXP_FINISH_COL) \
2335	assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
2336	(EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
2337
2338	/ Test of lexing a file using libcpp, verifying tokens and their*
2339	location information. /*
2340
2341	static void
2342	test_lexer (const line_table_case &case_)
2343	{
2344	/ Create a tempfile and write some text to it. /
2345	const char *content =
2346	/00000000011111111112222222222333333.3333444444444.455555555556*
2347	12345678901234567890123456789012345.6789012345678.901234567890. /*
2348	("test_name /* c-style comment */\n"
2349	" \"test literal\"\n"
2350	" // test c++-style comment\n"
2351	" 42\n");
2352	temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2353
2354	line_table_test ltt (case_);
2355
2356	cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2357
2358	const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2359	ASSERT_NE (fname, NULL);
2360
2361	/ Verify that we get the expected tokens back, with the correct*
2362	location information. /*
2363
2364	location_t loc;
2365	const cpp_token *tok;
2366	tok = cpp_get_token_with_location (parser, &loc);
2367	ASSERT_NE (tok, NULL);
2368	ASSERT_EQ (tok->type, CPP_NAME);
2369	ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2370	ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), `1`, `1`, `9`);
2371
2372	tok = cpp_get_token_with_location (parser, &loc);
2373	ASSERT_NE (tok, NULL);
2374	ASSERT_EQ (tok->type, CPP_STRING);
2375	ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2376	ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), `2`, `35`, `48`);
2377
2378	tok = cpp_get_token_with_location (parser, &loc);
2379	ASSERT_NE (tok, NULL);
2380	ASSERT_EQ (tok->type, CPP_NUMBER);
2381	ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2382	ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), `4`, `4`, `5`);
2383
2384	tok = cpp_get_token_with_location (parser, &loc);
2385	ASSERT_NE (tok, NULL);
2386	ASSERT_EQ (tok->type, CPP_EOF);
2387
2388	cpp_finish (parser, NULL);
2389	cpp_destroy (parser);
2390	}
2391
2392	/ Forward decls. /
2393
2394	class lexer_test;
2395	class lexer_test_options;
2396
2397	/ A class for specifying options of a lexer_test.*
2398	The "apply" vfunc is called during the lexer_test constructor. /*
2399
2400	class lexer_test_options
2401	{
2402	public:
2403	virtual void apply (lexer_test &) = `0`;
2404	};
2405
2406	/ Wrapper around an cpp_reader , which calls cpp_finish and cpp_destroy
2407	in its dtor.
2408
2409	This is needed by struct lexer_test to ensure that the cleanup of the
2410	cpp_reader happens after* the cleanup of the temp_source_file. /
2411
2412	class cpp_reader_ptr
2413	{
2414	public:
2415	cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2416
2417	~cpp_reader_ptr ()
2418	{
2419	cpp_finish (m_ptr, NULL);
2420	cpp_destroy (m_ptr);
2421	}
2422
2423	operator cpp_reader * () const { return m_ptr; }
2424
2425	private:
2426	cpp_reader *m_ptr;
2427	};
2428
2429	/ A struct for writing lexer tests. /
2430
2431	class lexer_test
2432	{
2433	public:
2434	lexer_test (const line_table_case &case_, const char *content,
2435	lexer_test_options *options);
2436	~lexer_test ();
2437
2438	const cpp_token *get_token ();
2439
2440	/ The ordering of these fields matters.*
2441	The line_table_test must be first, since the cpp_reader_ptr
2442	uses it.
2443	The cpp_reader must be cleaned up after* the temp_source_file*
2444	since the filenames in input.cc's input cache are owned by the
2445	cpp_reader; in particular, when ~temp_source_file evicts the
2446	filename the filenames must still be alive. /*
2447	line_table_test m_ltt;
2448	cpp_reader_ptr m_parser;
2449	temp_source_file m_tempfile;
2450	file_cache m_file_cache;
2451	string_concat_db m_concats;
2452	bool m_implicitly_expect_EOF;
2453	};
2454
2455	/ Use an EBCDIC encoding for the execution charset, specifically*
2456	IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2457
2458	This exercises iconv integration within libcpp.
2459	Not every build of iconv supports the given charset,
2460	so we need to flag this error and handle it gracefully. /*
2461
2462	class ebcdic_execution_charset : public lexer_test_options
2463	{
2464	public:
2465	ebcdic_execution_charset () : m_num_iconv_errors (`0`)
2466	{
2467	gcc_assert (s_singleton == NULL);
2468	s_singleton = this;
2469	}
2470	~ebcdic_execution_charset ()
2471	{
2472	gcc_assert (s_singleton == this);
2473	s_singleton = NULL;
2474	}
2475
2476	void apply (lexer_test &test) final override
2477	{
2478	cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2479	cpp_opts->narrow_charset = "IBM1047";
2480
2481	cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2482	callbacks->diagnostic = on_diagnostic;
2483	}
2484
2485	static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2486	enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2487	enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2488	rich_location *richloc ATTRIBUTE_UNUSED,
2489	const char msgid, va_list ap ATTRIBUTE_UNUSED)
2490	ATTRIBUTE_FPTR_PRINTF(`5`,`0`)
2491	{
2492	gcc_assert (s_singleton);
2493	/ Avoid exgettext from picking this up, it is translated in libcpp. /
2494	const char *msg = "conversion from %s to %s not supported by iconv";
2495	#ifdef ENABLE_NLS
2496	msg = dgettext (domainname: "cpplib", msgid: msg);
2497	#endif
2498	/ Detect and record errors emitted by libcpp/charset.cc:init_iconv_desc*
2499	when the local iconv build doesn't support the conversion. /*
2500	if (strcmp (s1: msgid, s2: msg) == `0`)
2501	{
2502	s_singleton->m_num_iconv_errors++;
2503	return true;
2504	}
2505
2506	/ Otherwise, we have an unexpected error. /
2507	abort ();
2508	}
2509
2510	bool iconv_errors_occurred_p () const { return m_num_iconv_errors > `0`; }
2511
2512	private:
2513	static ebcdic_execution_charset *s_singleton;
2514	int m_num_iconv_errors;
2515	};
2516
2517	ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2518
2519	/ A lexer_test_options subclass that records a list of diagnostic*
2520	messages emitted by the lexer. /*
2521
2522	class lexer_diagnostic_sink : public lexer_test_options
2523	{
2524	public:
2525	lexer_diagnostic_sink ()
2526	{
2527	gcc_assert (s_singleton == NULL);
2528	s_singleton = this;
2529	}
2530	~lexer_diagnostic_sink ()
2531	{
2532	gcc_assert (s_singleton == this);
2533	s_singleton = NULL;
2534
2535	int i;
2536	char *str;
2537	FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2538	free (ptr: str);
2539	}
2540
2541	void apply (lexer_test &test) final override
2542	{
2543	cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2544	callbacks->diagnostic = on_diagnostic;
2545	}
2546
2547	static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2548	enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2549	enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2550	rich_location *richloc ATTRIBUTE_UNUSED,
2551	const char msgid, va_list ap)
2552	ATTRIBUTE_FPTR_PRINTF(`5`,`0`)
2553	{
2554	char msg = xvasprintf (msgid, ap);
2555	s_singleton->m_diagnostics.safe_push (obj: msg);
2556	return true;
2557	}
2558
2559	auto_vec<char *> m_diagnostics;
2560
2561	private:
2562	static lexer_diagnostic_sink *s_singleton;
2563	};
2564
2565	lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2566
2567	/ Constructor. Override line_table with a new instance based on CASE_,*
2568	and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2569	start parsing the tempfile. /*
2570
2571	lexer_test::lexer_test (const line_table_case &case_, const char *content,
2572	lexer_test_options *options)
2573	: m_ltt (case_),
2574	m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2575	/ Create a tempfile and write the text to it. /
2576	m_tempfile (SELFTEST_LOCATION, ".c", content),
2577	m_concats (),
2578	m_implicitly_expect_EOF (true)
2579	{
2580	if (options)
2581	options->apply (*this);
2582
2583	cpp_init_iconv (m_parser);
2584
2585	/ Parse the file. /
2586	const char *fname = cpp_read_main_file (m_parser,
2587	m_tempfile.get_filename ());
2588	ASSERT_NE (fname, NULL);
2589	}
2590
2591	/ Destructor. By default, verify that the next token in m_parser is EOF. /
2592
2593	lexer_test::~lexer_test ()
2594	{
2595	location_t loc;
2596	const cpp_token *tok;
2597
2598	if (m_implicitly_expect_EOF)
2599	{
2600	tok = cpp_get_token_with_location (m_parser, &loc);
2601	ASSERT_NE (tok, NULL);
2602	ASSERT_EQ (tok->type, CPP_EOF);
2603	}
2604	}
2605
2606	/ Get the next token from m_parser. /
2607
2608	const cpp_token *
2609	lexer_test::get_token ()
2610	{
2611	location_t loc;
2612	const cpp_token *tok;
2613
2614	tok = cpp_get_token_with_location (m_parser, &loc);
2615	ASSERT_NE (tok, NULL);
2616	return tok;
2617	}
2618
2619	/ Verify that locations within string literals are correctly handled. /
2620
2621	/ Verify get_source_range_for_substring for token(s) at STRLOC,*
2622	using the string concatenation database for TEST.
2623
2624	Assert that the character at index IDX is on EXPECTED_LINE,
2625	and that it begins at column EXPECTED_START_COL and ends at
2626	EXPECTED_FINISH_COL (unless the locations are beyond
2627	LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2628	columns). /*
2629
2630	static void
2631	assert_char_at_range (const location &loc,
2632	lexer_test& test,
2633	location_t strloc, enum cpp_ttype type, int idx,
2634	int expected_line, int expected_start_col,
2635	int expected_finish_col)
2636	{
2637	cpp_reader *pfile = test.m_parser;
2638	string_concat_db *concats = &test.m_concats;
2639
2640	source_range actual_range = source_range ();
2641	const char *err
2642	= get_source_range_for_char (pfile, fc&: test.m_file_cache,
2643	concats, strloc, type, char_idx: idx,
2644	out_range: &actual_range);
2645	if (should_have_column_data_p (loc: strloc))
2646	ASSERT_EQ_AT (loc, NULL, err);
2647	else
2648	{
2649	ASSERT_STREQ_AT (loc,
2650	"range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2651	err);
2652	return;
2653	}
2654
2655	int actual_start_line = LOCATION_LINE (actual_range.m_start);
2656	ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2657	int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2658	ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2659
2660	if (should_have_column_data_p (loc: actual_range.m_start))
2661	{
2662	int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2663	ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2664	}
2665	if (should_have_column_data_p (loc: actual_range.m_finish))
2666	{
2667	int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2668	ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2669	}
2670	}
2671
2672	/ Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for*
2673	the effective location of any errors. /*
2674
2675	#define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2676	EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2677	assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2678	(IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2679	(EXPECTED_FINISH_COL))
2680
2681	/ Verify get_num_source_ranges_for_substring for token(s) at STRLOC,*
2682	using the string concatenation database for TEST.
2683
2684	Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. /*
2685
2686	static void
2687	assert_num_substring_ranges (const location &loc,
2688	lexer_test& test,
2689	location_t strloc,
2690	enum cpp_ttype type,
2691	int expected_num_ranges)
2692	{
2693	cpp_reader *pfile = test.m_parser;
2694	string_concat_db *concats = &test.m_concats;
2695
2696	int actual_num_ranges = -`1`;
2697	const char *err
2698	= get_num_source_ranges_for_substring (pfile, fc&: test.m_file_cache,
2699	concats, strloc, type,
2700	out: &actual_num_ranges);
2701	if (should_have_column_data_p (loc: strloc))
2702	ASSERT_EQ_AT (loc, NULL, err);
2703	else
2704	{
2705	ASSERT_STREQ_AT (loc,
2706	"range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2707	err);
2708	return;
2709	}
2710	ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2711	}
2712
2713	/ Macro for calling assert_num_substring_ranges, supplying*
2714	SELFTEST_LOCATION for the effective location of any errors. /*
2715
2716	#define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2717	EXPECTED_NUM_RANGES) \
2718	assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2719	(TYPE), (EXPECTED_NUM_RANGES))
2720
2721
2722	/ Verify that get_num_source_ranges_for_substring for token(s) at STRLOC*
2723	returns an error (using the string concatenation database for TEST). /*
2724
2725	static void
2726	assert_has_no_substring_ranges (const location &loc,
2727	lexer_test& test,
2728	location_t strloc,
2729	enum cpp_ttype type,
2730	const char *expected_err)
2731	{
2732	cpp_reader *pfile = test.m_parser;
2733	string_concat_db *concats = &test.m_concats;
2734	cpp_substring_ranges ranges;
2735	const char *actual_err
2736	= get_substring_ranges_for_loc (pfile, fc&: test.m_file_cache, concats, strloc,
2737	type, ranges);
2738	if (should_have_column_data_p (loc: strloc))
2739	ASSERT_STREQ_AT (loc, expected_err, actual_err);
2740	else
2741	ASSERT_STREQ_AT (loc,
2742	"range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2743	actual_err);
2744	}
2745
2746	#define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2747	assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2748	(STRLOC), (TYPE), (ERR))
2749
2750	/ Lex a simple string literal. Verify the substring location data, before*
2751	and after running cpp_interpret_string on it. /*
2752
2753	static void
2754	test_lexer_string_locations_simple (const line_table_case &case_)
2755	{
2756	/ Digits 0-9 (with 0 at column 10), the simple way.*
2757	....................000000000.11111111112.2222222223333333333
2758	....................123456789.01234567890.1234567890123456789
2759	We add a trailing comment to ensure that we correctly locate
2760	the end of the string literal token. /*
2761	const char content = " \"0123456789\" / not a string */\n";
2762	lexer_test test (case_, content, NULL);
2763
2764	/ Verify that we get the expected token back, with the correct*
2765	location information. /*
2766	const cpp_token *tok = test.get_token ();
2767	ASSERT_EQ (tok->type, CPP_STRING);
2768	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2769	ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), `1`, `9`, `20`);
2770
2771	/ At this point in lexing, the quote characters are treated as part of*
2772	the string (they are stripped off by cpp_interpret_string). /*
2773
2774	ASSERT_EQ (tok->val.str.len, `12`);
2775
2776	/ Verify that cpp_interpret_string works. /
2777	cpp_string dst_string;
2778	const enum cpp_ttype type = CPP_STRING;
2779	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
2780	&dst_string, type);
2781	ASSERT_TRUE (result);
2782	ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2783	free (ptr: const_cast <unsigned char *> (dst_string.text));
2784
2785	/ Verify ranges of individual characters. This no longer includes the*
2786	opening quote, but does include the closing quote. /*
2787	for (int i = `0`; i <= `10`; i++)
2788	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`,
2789	`10` + i, `10` + i);
2790
2791	ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, `11`);
2792	}
2793
2794	/ As test_lexer_string_locations_simple, but use an EBCDIC execution*
2795	encoding. /*
2796
2797	static void
2798	test_lexer_string_locations_ebcdic (const line_table_case &case_)
2799	{
2800	/ EBCDIC support requires iconv. /
2801	if (!HAVE_ICONV)
2802	return;
2803
2804	/ Digits 0-9 (with 0 at column 10), the simple way.*
2805	....................000000000.11111111112.2222222223333333333
2806	....................123456789.01234567890.1234567890123456789
2807	We add a trailing comment to ensure that we correctly locate
2808	the end of the string literal token. /*
2809	const char content = " \"0123456789\" / not a string */\n";
2810	ebcdic_execution_charset use_ebcdic;
2811	lexer_test test (case_, content, &use_ebcdic);
2812
2813	/ Verify that we get the expected token back, with the correct*
2814	location information. /*
2815	const cpp_token *tok = test.get_token ();
2816	ASSERT_EQ (tok->type, CPP_STRING);
2817	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2818	ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), `1`, `9`, `20`);
2819
2820	/ At this point in lexing, the quote characters are treated as part of*
2821	the string (they are stripped off by cpp_interpret_string). /*
2822
2823	ASSERT_EQ (tok->val.str.len, `12`);
2824
2825	/ The remainder of the test requires an iconv implementation that*
2826	can convert from UTF-8 to the EBCDIC encoding requested above. /*
2827	if (use_ebcdic.iconv_errors_occurred_p ())
2828	return;
2829
2830	/ Verify that cpp_interpret_string works. /
2831	cpp_string dst_string;
2832	const enum cpp_ttype type = CPP_STRING;
2833	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
2834	&dst_string, type);
2835	ASSERT_TRUE (result);
2836	/ We should now have EBCDIC-encoded text, specifically*
2837	IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2838	The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. /*
2839	ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2840	(const char *)dst_string.text);
2841	free (ptr: const_cast <unsigned char *> (dst_string.text));
2842
2843	/ Verify that we don't attempt to record substring location information*
2844	for such cases. /*
2845	ASSERT_HAS_NO_SUBSTRING_RANGES
2846	(test, tok->src_loc, type,
2847	"execution character set != source character set");
2848	}
2849
2850	/ Lex a string literal containing a hex-escaped character.*
2851	Verify the substring location data, before and after running
2852	cpp_interpret_string on it. /*
2853
2854	static void
2855	test_lexer_string_locations_hex (const line_table_case &case_)
2856	{
2857	/ Digits 0-9, expressing digit 5 in ASCII as "\x35"*
2858	and with a space in place of digit 6, to terminate the escaped
2859	hex code.
2860	....................000000000.111111.11112222.
2861	....................123456789.012345.67890123. /*
2862	const char *content = " \"01234\\x35 789\"\n";
2863	lexer_test test (case_, content, NULL);
2864
2865	/ Verify that we get the expected token back, with the correct*
2866	location information. /*
2867	const cpp_token *tok = test.get_token ();
2868	ASSERT_EQ (tok->type, CPP_STRING);
2869	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2870	ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), `1`, `9`, `23`);
2871
2872	/ At this point in lexing, the quote characters are treated as part of*
2873	the string (they are stripped off by cpp_interpret_string). /*
2874	ASSERT_EQ (tok->val.str.len, `15`);
2875
2876	/ Verify that cpp_interpret_string works. /
2877	cpp_string dst_string;
2878	const enum cpp_ttype type = CPP_STRING;
2879	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
2880	&dst_string, type);
2881	ASSERT_TRUE (result);
2882	ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2883	free (ptr: const_cast <unsigned char *> (dst_string.text));
2884
2885	/ Verify ranges of individual characters. This no longer includes the*
2886	opening quote, but does include the closing quote. /*
2887	for (int i = `0`; i <= `4`; i++)
2888	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `10` + i, `10` + i);
2889	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, `5`, `1`, `15`, `18`);
2890	for (int i = `6`; i <= `10`; i++)
2891	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `13` + i, `13` + i);
2892
2893	ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, `11`);
2894	}
2895
2896	/ Lex a string literal containing an octal-escaped character.*
2897	Verify the substring location data after running cpp_interpret_string
2898	on it. /*
2899
2900	static void
2901	test_lexer_string_locations_oct (const line_table_case &case_)
2902	{
2903	/ Digits 0-9, expressing digit 5 in ASCII as "\065"*
2904	and with a space in place of digit 6, to terminate the escaped
2905	octal code.
2906	....................000000000.111111.11112222.2222223333333333444
2907	....................123456789.012345.67890123.4567890123456789012 /*
2908	const char content = " \"01234\\065 789\" / not a string */\n";
2909	lexer_test test (case_, content, NULL);
2910
2911	/ Verify that we get the expected token back, with the correct*
2912	location information. /*
2913	const cpp_token *tok = test.get_token ();
2914	ASSERT_EQ (tok->type, CPP_STRING);
2915	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2916
2917	/ Verify that cpp_interpret_string works. /
2918	cpp_string dst_string;
2919	const enum cpp_ttype type = CPP_STRING;
2920	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
2921	&dst_string, type);
2922	ASSERT_TRUE (result);
2923	ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2924	free (ptr: const_cast <unsigned char *> (dst_string.text));
2925
2926	/ Verify ranges of individual characters. This no longer includes the*
2927	opening quote, but does include the closing quote. /*
2928	for (int i = `0`; i < `5`; i++)
2929	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `10` + i, `10` + i);
2930	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, `5`, `1`, `15`, `18`);
2931	for (int i = `6`; i <= `10`; i++)
2932	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `13` + i, `13` + i);
2933
2934	ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, `11`);
2935	}
2936
2937	/ Test of string literal containing letter escapes. /
2938
2939	static void
2940	test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2941	{
2942	/ The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.*
2943	.....................000000000.1.11111.1.1.11222.22222223333333
2944	.....................123456789.0.12345.6.7.89012.34567890123456. /*
2945	const char content = (" \"\\tfoo\\\\\\nbar\" / non-str */\n");
2946	lexer_test test (case_, content, NULL);
2947
2948	/ Verify that we get the expected tokens back. /
2949	const cpp_token *tok = test.get_token ();
2950	ASSERT_EQ (tok->type, CPP_STRING);
2951	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2952
2953	/ Verify ranges of individual characters. /
2954	/ "\t". /
2955	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2956	`0`, `1`, `10`, `11`);
2957	/ "foo". /
2958	for (int i = `1`; i <= `3`; i++)
2959	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2960	i, `1`, `11` + i, `11` + i);
2961	/ "\\" and "\n". /
2962	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2963	`4`, `1`, `15`, `16`);
2964	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2965	`5`, `1`, `17`, `18`);
2966
2967	/ "bar" and closing quote for nul-terminator. /
2968	for (int i = `6`; i <= `9`; i++)
2969	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2970	i, `1`, `13` + i, `13` + i);
2971
2972	ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, `10`);
2973	}
2974
2975	/ Another test of a string literal containing a letter escape.*
2976	Based on string seen in
2977	printf ("%-%\n");
2978	in gcc.dg/format/c90-printf-1.c. /*
2979
2980	static void
2981	test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2982	{
2983	/ .....................000000000.1111.11.1111.22222222223.*
2984	.....................123456789.0123.45.6789.01234567890. /*
2985	const char content = (" \"%-%\\n\" / non-str */\n");
2986	lexer_test test (case_, content, NULL);
2987
2988	/ Verify that we get the expected tokens back. /
2989	const cpp_token *tok = test.get_token ();
2990	ASSERT_EQ (tok->type, CPP_STRING);
2991	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2992
2993	/ Verify ranges of individual characters. /
2994	/ "%-%". /
2995	for (int i = `0`; i < `3`; i++)
2996	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2997	i, `1`, `10` + i, `10` + i);
2998	/ "\n". /
2999	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3000	`3`, `1`, `13`, `14`);
3001
3002	/ Closing quote for nul-terminator. /
3003	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3004	`4`, `1`, `15`, `15`);
3005
3006	ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, `5`);
3007	}
3008
3009	/ Lex a string literal containing UCN 4 characters.*
3010	Verify the substring location data after running cpp_interpret_string
3011	on it. /*
3012
3013	static void
3014	test_lexer_string_locations_ucn4 (const line_table_case &case_)
3015	{
3016	/ Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed*
3017	as UCN 4.
3018	....................000000000.111111.111122.222222223.33333333344444
3019	....................123456789.012345.678901.234567890.12345678901234 /*
3020	const char content = " \"01234\\u2174\\u2175789\" / non-str */\n";
3021	lexer_test test (case_, content, NULL);
3022
3023	/ Verify that we get the expected token back, with the correct*
3024	location information. /*
3025	const cpp_token *tok = test.get_token ();
3026	ASSERT_EQ (tok->type, CPP_STRING);
3027	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
3028
3029	/ Verify that cpp_interpret_string works.*
3030	The string should be encoded in the execution character
3031	set. Assuming that is UTF-8, we should have the following:
3032	----------- ---- ----- ------- ----------------
3033	Byte offset Byte Octal Unicode Source Column(s)
3034	----------- ---- ----- ------- ----------------
3035	0 0x30 '0' 10
3036	1 0x31 '1' 11
3037	2 0x32 '2' 12
3038	3 0x33 '3' 13
3039	4 0x34 '4' 14
3040	5 0xE2 \342 U+2174 15-20
3041	6 0x85 \205 (cont) 15-20
3042	7 0xB4 \264 (cont) 15-20
3043	8 0xE2 \342 U+2175 21-26
3044	9 0x85 \205 (cont) 21-26
3045	10 0xB5 \265 (cont) 21-26
3046	11 0x37 '7' 27
3047	12 0x38 '8' 28
3048	13 0x39 '9' 29
3049	14 0x00 30 (closing quote)
3050	----------- ---- ----- ------- ---------------. /*
3051
3052	cpp_string dst_string;
3053	const enum cpp_ttype type = CPP_STRING;
3054	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
3055	&dst_string, type);
3056	ASSERT_TRUE (result);
3057	ASSERT_STREQ ("01234\342\205\264\342\205\265789",
3058	(const char *)dst_string.text);
3059	free (ptr: const_cast <unsigned char *> (dst_string.text));
3060
3061	/ Verify ranges of individual characters. This no longer includes the*
3062	opening quote, but does include the closing quote.
3063	'01234'. /*
3064	for (int i = `0`; i <= `4`; i++)
3065	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `10` + i, `10` + i);
3066	/ U+2174. /
3067	for (int i = `5`; i <= `7`; i++)
3068	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `15`, `20`);
3069	/ U+2175. /
3070	for (int i = `8`; i <= `10`; i++)
3071	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `21`, `26`);
3072	/ '789' and nul terminator /
3073	for (int i = `11`; i <= `14`; i++)
3074	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `16` + i, `16` + i);
3075
3076	ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, `15`);
3077	}
3078
3079	/ Lex a string literal containing UCN 8 characters.*
3080	Verify the substring location data after running cpp_interpret_string
3081	on it. /*
3082
3083	static void
3084	test_lexer_string_locations_ucn8 (const line_table_case &case_)
3085	{
3086	/ Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.*
3087	....................000000000.111111.1111222222.2222333333333.344444
3088	....................123456789.012345.6789012345.6789012345678.901234 /*
3089	const char content = " \"01234\\U00002174\\U00002175789\" / */\n";
3090	lexer_test test (case_, content, NULL);
3091
3092	/ Verify that we get the expected token back, with the correct*
3093	location information. /*
3094	const cpp_token *tok = test.get_token ();
3095	ASSERT_EQ (tok->type, CPP_STRING);
3096	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
3097	"\"01234\\U00002174\\U00002175789\"");
3098
3099	/ Verify that cpp_interpret_string works.*
3100	The UTF-8 encoding of the string is identical to that from
3101	the ucn4 testcase above; the only difference is the column
3102	locations. /*
3103	cpp_string dst_string;
3104	const enum cpp_ttype type = CPP_STRING;
3105	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
3106	&dst_string, type);
3107	ASSERT_TRUE (result);
3108	ASSERT_STREQ ("01234\342\205\264\342\205\265789",
3109	(const char *)dst_string.text);
3110	free (ptr: const_cast <unsigned char *> (dst_string.text));
3111
3112	/ Verify ranges of individual characters. This no longer includes the*
3113	opening quote, but does include the closing quote.
3114	'01234'. /*
3115	for (int i = `0`; i <= `4`; i++)
3116	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `10` + i, `10` + i);
3117	/ U+2174. /
3118	for (int i = `5`; i <= `7`; i++)
3119	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `15`, `24`);
3120	/ U+2175. /
3121	for (int i = `8`; i <= `10`; i++)
3122	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `25`, `34`);
3123	/ '789' at columns 35-37 /
3124	for (int i = `11`; i <= `13`; i++)
3125	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `24` + i, `24` + i);
3126	/ Closing quote/nul-terminator at column 38. /
3127	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, `14`, `1`, `38`, `38`);
3128
3129	ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, `15`);
3130	}
3131
3132	/ Fetch a big-endian 32-bit value and convert to host endianness. /
3133
3134	static uint32_t
3135	uint32_from_big_endian (const uint32_t *ptr_be_value)
3136	{
3137	const unsigned char buf = (const* unsigned char *)ptr_be_value;
3138	return (((uint32_t) buf[`0`] << `24`)
3139	\| ((uint32_t) buf[`1`] << `16`)
3140	\| ((uint32_t) buf[`2`] << `8`)
3141	\| (uint32_t) buf[`3`]);
3142	}
3143
3144	/ Lex a wide string literal and verify that attempts to read substring*
3145	location data from it fail gracefully. /*
3146
3147	static void
3148	test_lexer_string_locations_wide_string (const line_table_case &case_)
3149	{
3150	/ Digits 0-9.*
3151	....................000000000.11111111112.22222222233333
3152	....................123456789.01234567890.12345678901234 /*
3153	const char content = " L\"0123456789\" / non-str */\n";
3154	lexer_test test (case_, content, NULL);
3155
3156	/ Verify that we get the expected token back, with the correct*
3157	location information. /*
3158	const cpp_token *tok = test.get_token ();
3159	ASSERT_EQ (tok->type, CPP_WSTRING);
3160	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
3161
3162	/ Verify that cpp_interpret_string works, using CPP_WSTRING. /
3163	cpp_string dst_string;
3164	const enum cpp_ttype type = CPP_WSTRING;
3165	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
3166	&dst_string, type);
3167	ASSERT_TRUE (result);
3168	/ The cpp_reader defaults to big-endian with*
3169	CHAR_BIT sizeof (int) for the wchar_precision, so dst_string should*
3170	now be encoded as UTF-32BE. /*
3171	const uint32_t be32_chars = (const* uint32_t *)dst_string.text;
3172	ASSERT_EQ (`'0'`, uint32_from_big_endian (&be32_chars[`0`]));
3173	ASSERT_EQ (`'5'`, uint32_from_big_endian (&be32_chars[`5`]));
3174	ASSERT_EQ (`'9'`, uint32_from_big_endian (&be32_chars[`9`]));
3175	ASSERT_EQ (`0`, uint32_from_big_endian (&be32_chars[`10`]));
3176	free (ptr: const_cast <unsigned char *> (dst_string.text));
3177
3178	/ We don't yet support generating substring location information*
3179	for L"" strings. /*
3180	ASSERT_HAS_NO_SUBSTRING_RANGES
3181	(test, tok->src_loc, type,
3182	"execution character set != source character set");
3183	}
3184
3185	/ Fetch a big-endian 16-bit value and convert to host endianness. /
3186
3187	static uint16_t
3188	uint16_from_big_endian (const uint16_t *ptr_be_value)
3189	{
3190	const unsigned char buf = (const* unsigned char *)ptr_be_value;
3191	return ((uint16_t) buf[`0`] << `8`) \| (uint16_t) buf[`1`];
3192	}
3193
3194	/ Lex a u"" string literal and verify that attempts to read substring*
3195	location data from it fail gracefully. /*
3196
3197	static void
3198	test_lexer_string_locations_string16 (const line_table_case &case_)
3199	{
3200	/ Digits 0-9.*
3201	....................000000000.11111111112.22222222233333
3202	....................123456789.01234567890.12345678901234 /*
3203	const char content = " u\"0123456789\" / non-str */\n";
3204	lexer_test test (case_, content, NULL);
3205
3206	/ Verify that we get the expected token back, with the correct*
3207	location information. /*
3208	const cpp_token *tok = test.get_token ();
3209	ASSERT_EQ (tok->type, CPP_STRING16);
3210	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
3211
3212	/ Verify that cpp_interpret_string works, using CPP_STRING16. /
3213	cpp_string dst_string;
3214	const enum cpp_ttype type = CPP_STRING16;
3215	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
3216	&dst_string, type);
3217	ASSERT_TRUE (result);
3218
3219	/ The cpp_reader defaults to big-endian, so dst_string should*
3220	now be encoded as UTF-16BE. /*
3221	const uint16_t be16_chars = (const* uint16_t *)dst_string.text;
3222	ASSERT_EQ (`'0'`, uint16_from_big_endian (&be16_chars[`0`]));
3223	ASSERT_EQ (`'5'`, uint16_from_big_endian (&be16_chars[`5`]));
3224	ASSERT_EQ (`'9'`, uint16_from_big_endian (&be16_chars[`9`]));
3225	ASSERT_EQ (`0`, uint16_from_big_endian (&be16_chars[`10`]));
3226	free (ptr: const_cast <unsigned char *> (dst_string.text));
3227
3228	/ We don't yet support generating substring location information*
3229	for L"" strings. /*
3230	ASSERT_HAS_NO_SUBSTRING_RANGES
3231	(test, tok->src_loc, type,
3232	"execution character set != source character set");
3233	}
3234
3235	/ Lex a U"" string literal and verify that attempts to read substring*
3236	location data from it fail gracefully. /*
3237
3238	static void
3239	test_lexer_string_locations_string32 (const line_table_case &case_)
3240	{
3241	/ Digits 0-9.*
3242	....................000000000.11111111112.22222222233333
3243	....................123456789.01234567890.12345678901234 /*
3244	const char content = " U\"0123456789\" / non-str */\n";
3245	lexer_test test (case_, content, NULL);
3246
3247	/ Verify that we get the expected token back, with the correct*
3248	location information. /*
3249	const cpp_token *tok = test.get_token ();
3250	ASSERT_EQ (tok->type, CPP_STRING32);
3251	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
3252
3253	/ Verify that cpp_interpret_string works, using CPP_STRING32. /
3254	cpp_string dst_string;
3255	const enum cpp_ttype type = CPP_STRING32;
3256	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
3257	&dst_string, type);
3258	ASSERT_TRUE (result);
3259
3260	/ The cpp_reader defaults to big-endian, so dst_string should*
3261	now be encoded as UTF-32BE. /*
3262	const uint32_t be32_chars = (const* uint32_t *)dst_string.text;
3263	ASSERT_EQ (`'0'`, uint32_from_big_endian (&be32_chars[`0`]));
3264	ASSERT_EQ (`'5'`, uint32_from_big_endian (&be32_chars[`5`]));
3265	ASSERT_EQ (`'9'`, uint32_from_big_endian (&be32_chars[`9`]));
3266	ASSERT_EQ (`0`, uint32_from_big_endian (&be32_chars[`10`]));
3267	free (ptr: const_cast <unsigned char *> (dst_string.text));
3268
3269	/ We don't yet support generating substring location information*
3270	for L"" strings. /*
3271	ASSERT_HAS_NO_SUBSTRING_RANGES
3272	(test, tok->src_loc, type,
3273	"execution character set != source character set");
3274	}
3275
3276	/ Lex a u8-string literal.*
3277	Verify the substring location data after running cpp_interpret_string
3278	on it. /*
3279
3280	static void
3281	test_lexer_string_locations_u8 (const line_table_case &case_)
3282	{
3283	/ Digits 0-9.*
3284	....................000000000.11111111112.22222222233333
3285	....................123456789.01234567890.12345678901234 /*
3286	const char content = " u8\"0123456789\" / non-str */\n";
3287	lexer_test test (case_, content, NULL);
3288
3289	/ Verify that we get the expected token back, with the correct*
3290	location information. /*
3291	const cpp_token *tok = test.get_token ();
3292	ASSERT_EQ (tok->type, CPP_UTF8STRING);
3293	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
3294
3295	/ Verify that cpp_interpret_string works. /
3296	cpp_string dst_string;
3297	const enum cpp_ttype type = CPP_STRING;
3298	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
3299	&dst_string, type);
3300	ASSERT_TRUE (result);
3301	ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3302	free (ptr: const_cast <unsigned char *> (dst_string.text));
3303
3304	/ Verify ranges of individual characters. This no longer includes the*
3305	opening quote, but does include the closing quote. /*
3306	for (int i = `0`; i <= `10`; i++)
3307	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `10` + i, `10` + i);
3308	}
3309
3310	/ Lex a string literal containing UTF-8 source characters.*
3311	Verify the substring location data after running cpp_interpret_string
3312	on it. /*
3313
3314	static void
3315	test_lexer_string_locations_utf8_source (const line_table_case &case_)
3316	{
3317	/ This string literal is written out to the source file as UTF-8,*
3318	and is of the form "before mojibake after", where "mojibake"
3319	is written as the following four unicode code points:
3320	U+6587 CJK UNIFIED IDEOGRAPH-6587
3321	U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3322	U+5316 CJK UNIFIED IDEOGRAPH-5316
3323	U+3051 HIRAGANA LETTER KE.
3324	Each of these is 3 bytes wide when encoded in UTF-8, whereas the
3325	"before" and "after" are 1 byte per unicode character.
3326
3327	The numbering shown are "columns", which are byte* numbers within*
3328	the line, rather than unicode character numbers.
3329
3330	.................... 000000000.1111111.
3331	.................... 123456789.0123456. /*
3332	const char *content = (" \"before "
3333	/ U+6587 CJK UNIFIED IDEOGRAPH-6587*
3334	UTF-8: 0xE6 0x96 0x87
3335	C octal escaped UTF-8: \346\226\207
3336	"column" numbers: 17-19. /*
3337	"\346\226\207"
3338
3339	/ U+5B57 CJK UNIFIED IDEOGRAPH-5B57*
3340	UTF-8: 0xE5 0xAD 0x97
3341	C octal escaped UTF-8: \345\255\227
3342	"column" numbers: 20-22. /*
3343	"\345\255\227"
3344
3345	/ U+5316 CJK UNIFIED IDEOGRAPH-5316*
3346	UTF-8: 0xE5 0x8C 0x96
3347	C octal escaped UTF-8: \345\214\226
3348	"column" numbers: 23-25. /*
3349	"\345\214\226"
3350
3351	/ U+3051 HIRAGANA LETTER KE*
3352	UTF-8: 0xE3 0x81 0x91
3353	C octal escaped UTF-8: \343\201\221
3354	"column" numbers: 26-28. /*
3355	"\343\201\221"
3356
3357	/ column numbers 29 onwards*
3358	2333333.33334444444444
3359	9012345.67890123456789. /*
3360	" after\" /* non-str */\n");
3361	lexer_test test (case_, content, NULL);
3362
3363	/ Verify that we get the expected token back, with the correct*
3364	location information. /*
3365	const cpp_token *tok = test.get_token ();
3366	ASSERT_EQ (tok->type, CPP_STRING);
3367	ASSERT_TOKEN_AS_TEXT_EQ
3368	(test.m_parser, tok,
3369	"\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3370
3371	/ Verify that cpp_interpret_string works. /
3372	cpp_string dst_string;
3373	const enum cpp_ttype type = CPP_STRING;
3374	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
3375	&dst_string, type);
3376	ASSERT_TRUE (result);
3377	ASSERT_STREQ
3378	("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3379	(const char *)dst_string.text);
3380	free (ptr: const_cast <unsigned char *> (dst_string.text));
3381
3382	/ Verify ranges of individual characters. This no longer includes the*
3383	opening quote, but does include the closing quote.
3384	Assuming that both source and execution encodings are UTF-8, we have
3385	a run of 25 octets in each, plus the NUL terminator. /*
3386	for (int i = `0`; i < `25`; i++)
3387	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `10` + i, `10` + i);
3388	/ NUL-terminator should use the closing quote at column 35. /
3389	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, `25`, `1`, `35`, `35`);
3390
3391	ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, `26`);
3392	}
3393
3394	/ Test of string literal concatenation. /
3395
3396	static void
3397	test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3398	{
3399	/ Digits 0-9.*
3400	.....................000000000.111111.11112222222222
3401	.....................123456789.012345.67890123456789. /*
3402	const char content = (" \"01234\" / non-str */\n"
3403	" \"56789\" /* non-str */\n");
3404	lexer_test test (case_, content, NULL);
3405
3406	location_t input_locs[`2`];
3407
3408	/ Verify that we get the expected tokens back. /
3409	auto_vec <cpp_string> input_strings;
3410	const cpp_token *tok_a = test.get_token ();
3411	ASSERT_EQ (tok_a->type, CPP_STRING);
3412	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3413	input_strings.safe_push (obj: tok_a->val.str);
3414	input_locs[`0`] = tok_a->src_loc;
3415
3416	const cpp_token *tok_b = test.get_token ();
3417	ASSERT_EQ (tok_b->type, CPP_STRING);
3418	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3419	input_strings.safe_push (obj: tok_b->val.str);
3420	input_locs[`1`] = tok_b->src_loc;
3421
3422	/ Verify that cpp_interpret_string works. /
3423	cpp_string dst_string;
3424	const enum cpp_ttype type = CPP_STRING;
3425	bool result = cpp_interpret_string (test.m_parser,
3426	input_strings.address (), `2`,
3427	&dst_string, type);
3428	ASSERT_TRUE (result);
3429	ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3430	free (ptr: const_cast <unsigned char *> (dst_string.text));
3431
3432	/ Simulate c-lex.cc's lex_string in order to record concatenation. /
3433	test.m_concats.record_string_concatenation (num: `2`, locs: input_locs);
3434
3435	location_t initial_loc = input_locs[`0`];
3436
3437	/ "01234" on line 1. /
3438	for (int i = `0`; i <= `4`; i++)
3439	ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, `1`, `10` + i, `10` + i);
3440	/ "56789" in line 2, plus its closing quote for the nul terminator. /
3441	for (int i = `5`; i <= `10`; i++)
3442	ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, `2`, `5` + i, `5` + i);
3443
3444	ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, `11`);
3445	}
3446
3447	/ Another test of string literal concatenation. /
3448
3449	static void
3450	test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3451	{
3452	/ Digits 0-9.*
3453	.....................000000000.111.11111112222222
3454	.....................123456789.012.34567890123456. /*
3455	const char content = (" \"01\" / non-str */\n"
3456	" \"23\" /* non-str */\n"
3457	" \"45\" /* non-str */\n"
3458	" \"67\" /* non-str */\n"
3459	" \"89\" /* non-str */\n");
3460	lexer_test test (case_, content, NULL);
3461
3462	auto_vec <cpp_string> input_strings;
3463	location_t input_locs[`5`];
3464
3465	/ Verify that we get the expected tokens back. /
3466	for (int i = `0`; i < `5`; i++)
3467	{
3468	const cpp_token *tok = test.get_token ();
3469	ASSERT_EQ (tok->type, CPP_STRING);
3470	input_strings.safe_push (obj: tok->val.str);
3471	input_locs[i] = tok->src_loc;
3472	}
3473
3474	/ Verify that cpp_interpret_string works. /
3475	cpp_string dst_string;
3476	const enum cpp_ttype type = CPP_STRING;
3477	bool result = cpp_interpret_string (test.m_parser,
3478	input_strings.address (), `5`,
3479	&dst_string, type);
3480	ASSERT_TRUE (result);
3481	ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3482	free (ptr: const_cast <unsigned char *> (dst_string.text));
3483
3484	/ Simulate c-lex.cc's lex_string in order to record concatenation. /
3485	test.m_concats.record_string_concatenation (num: `5`, locs: input_locs);
3486
3487	location_t initial_loc = input_locs[`0`];
3488
3489	/ Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can*
3490	detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3491	and expect get_source_range_for_substring to fail.
3492	However, for a string concatenation test, we can have a case
3493	where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3494	but subsequent strings can be after it.
3495	Attempting to detect this within assert_char_at_range
3496	would overcomplicate the logic for the common test cases, so
3497	we detect it here. /*
3498	if (should_have_column_data_p (loc: input_locs[`0`])
3499	&& !should_have_column_data_p (loc: input_locs[`4`]))
3500	{
3501	/ Verify that get_source_range_for_substring gracefully rejects*
3502	this case. /*
3503	source_range actual_range;
3504	const char *err
3505	= get_source_range_for_char (pfile: test.m_parser, fc&: test.m_file_cache,
3506	concats: &test.m_concats,
3507	strloc: initial_loc, type, char_idx: `0`, out_range: &actual_range);
3508	ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3509	return;
3510	}
3511
3512	for (int i = `0`; i < `5`; i++)
3513	for (int j = `0`; j < `2`; j++)
3514	ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * `2`) + j,
3515	i + `1`, `10` + j, `10` + j);
3516
3517	/ NUL-terminator should use the final closing quote at line 5 column 12. /
3518	ASSERT_CHAR_AT_RANGE (test, initial_loc, type, `10`, `5`, `12`, `12`);
3519
3520	ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, `11`);
3521	}
3522
3523	/ Another test of string literal concatenation, this time combined with*
3524	various kinds of escaped characters. /*
3525
3526	static void
3527	test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3528	{
3529	/ Digits 0-9, expressing digit 5 in ASCII as hex "\x35"*
3530	digit 6 in ASCII as octal "\066", concatenating multiple strings. /*
3531	const char *content
3532	/ .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555*
3533	.123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. /*
3534	= (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3535	lexer_test test (case_, content, NULL);
3536
3537	auto_vec <cpp_string> input_strings;
3538	location_t input_locs[`4`];
3539
3540	/ Verify that we get the expected tokens back. /
3541	for (int i = `0`; i < `4`; i++)
3542	{
3543	const cpp_token *tok = test.get_token ();
3544	ASSERT_EQ (tok->type, CPP_STRING);
3545	input_strings.safe_push (obj: tok->val.str);
3546	input_locs[i] = tok->src_loc;
3547	}
3548
3549	/ Verify that cpp_interpret_string works. /
3550	cpp_string dst_string;
3551	const enum cpp_ttype type = CPP_STRING;
3552	bool result = cpp_interpret_string (test.m_parser,
3553	input_strings.address (), `4`,
3554	&dst_string, type);
3555	ASSERT_TRUE (result);
3556	ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3557	free (ptr: const_cast <unsigned char *> (dst_string.text));
3558
3559	/ Simulate c-lex.cc's lex_string in order to record concatenation. /
3560	test.m_concats.record_string_concatenation (num: `4`, locs: input_locs);
3561
3562	location_t initial_loc = input_locs[`0`];
3563
3564	for (int i = `0`; i <= `4`; i++)
3565	ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, `1`, `10` + i, `10` + i);
3566	ASSERT_CHAR_AT_RANGE (test, initial_loc, type, `5`, `1`, `19`, `22`);
3567	ASSERT_CHAR_AT_RANGE (test, initial_loc, type, `6`, `1`, `27`, `30`);
3568	for (int i = `7`; i <= `9`; i++)
3569	ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, `1`, `28` + i, `28` + i);
3570
3571	/ NUL-terminator should use the location of the final closing quote. /
3572	ASSERT_CHAR_AT_RANGE (test, initial_loc, type, `10`, `1`, `38`, `38`);
3573
3574	ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, `11`);
3575	}
3576
3577	/ Test of string literal in a macro. /
3578
3579	static void
3580	test_lexer_string_locations_macro (const line_table_case &case_)
3581	{
3582	/ Digits 0-9.*
3583	.....................0000000001111111111.22222222223.
3584	.....................1234567890123456789.01234567890. /*
3585	const char content = ("#define MACRO \"0123456789\" / non-str */\n"
3586	" MACRO");
3587	lexer_test test (case_, content, NULL);
3588
3589	/ Verify that we get the expected tokens back. /
3590	const cpp_token *tok = test.get_token ();
3591	ASSERT_EQ (tok->type, CPP_PADDING);
3592
3593	tok = test.get_token ();
3594	ASSERT_EQ (tok->type, CPP_STRING);
3595	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3596
3597	/ Verify ranges of individual characters. We ought to*
3598	see columns within the macro definition. /*
3599	for (int i = `0`; i <= `10`; i++)
3600	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3601	i, `1`, `20` + i, `20` + i);
3602
3603	ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, `11`);
3604
3605	tok = test.get_token ();
3606	ASSERT_EQ (tok->type, CPP_PADDING);
3607	}
3608
3609	/ Test of stringification of a macro argument. /
3610
3611	static void
3612	test_lexer_string_locations_stringified_macro_argument
3613	(const line_table_case &case_)
3614	{
3615	/ .....................000000000111111111122222222223.*
3616	.....................123456789012345678901234567890. /*
3617	const char content = ("#define MACRO(X) #X / non-str */\n"
3618	"MACRO(foo)\n");
3619	lexer_test test (case_, content, NULL);
3620
3621	/ Verify that we get the expected token back. /
3622	const cpp_token *tok = test.get_token ();
3623	ASSERT_EQ (tok->type, CPP_PADDING);
3624
3625	tok = test.get_token ();
3626	ASSERT_EQ (tok->type, CPP_STRING);
3627	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3628
3629	/ We don't support getting the location of a stringified macro*
3630	argument. Verify that it fails gracefully. /*
3631	ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3632	"cpp_interpret_string_1 failed");
3633
3634	tok = test.get_token ();
3635	ASSERT_EQ (tok->type, CPP_PADDING);
3636
3637	tok = test.get_token ();
3638	ASSERT_EQ (tok->type, CPP_PADDING);
3639	}
3640
3641	/ Ensure that we are fail gracefully if something attempts to pass*
3642	in a location that isn't a string literal token. Seen on this code:
3643
3644	const char a[] = " %d ";
3645	__builtin_printf (a, 0.5);
3646	^
3647
3648	when c-format.cc erroneously used the indicated one-character
3649	location as the format string location, leading to a read past the
3650	end of a string buffer in cpp_interpret_string_1. /*
3651
3652	static void
3653	test_lexer_string_locations_non_string (const line_table_case &case_)
3654	{
3655	/ .....................000000000111111111122222222223.*
3656	.....................123456789012345678901234567890. /*
3657	const char *content = (" a\n");
3658	lexer_test test (case_, content, NULL);
3659
3660	/ Verify that we get the expected token back. /
3661	const cpp_token *tok = test.get_token ();
3662	ASSERT_EQ (tok->type, CPP_NAME);
3663	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3664
3665	/ At this point, libcpp is attempting to interpret the name as a*
3666	string literal, despite it not starting with a quote. We don't detect
3667	that, but we should at least fail gracefully. /*
3668	ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3669	"cpp_interpret_string_1 failed");
3670	}
3671
3672	/ Ensure that we can read substring information for a token which*
3673	starts in one linemap and ends in another . Adapted from
3674	gcc.dg/cpp/pr69985.c. /*
3675
3676	static void
3677	test_lexer_string_locations_long_line (const line_table_case &case_)
3678	{
3679	/ .....................000000.000111111111*
3680	.....................123456.789012346789. /*
3681	const char content = ("/ A very long line, so that we start a new line map. */\n"
3682	" \"0123456789012345678901234567890123456789"
3683	"0123456789012345678901234567890123456789"
3684	"0123456789012345678901234567890123456789"
3685	"0123456789\"\n");
3686
3687	lexer_test test (case_, content, NULL);
3688
3689	/ Verify that we get the expected token back. /
3690	const cpp_token *tok = test.get_token ();
3691	ASSERT_EQ (tok->type, CPP_STRING);
3692
3693	if (!should_have_column_data_p (loc: line_table->highest_location))
3694	return;
3695
3696	/ Verify ranges of individual characters. /
3697	ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, `131`);
3698	for (int i = `0`; i < `131`; i++)
3699	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3700	i, `2`, `7` + i, `7` + i);
3701	}
3702
3703	/ Test of locations within a raw string that doesn't contain a newline. /
3704
3705	static void
3706	test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3707	{
3708	/ .....................00.0000000111111111122.*
3709	.....................12.3456789012345678901. /*
3710	const char *content = ("R\"foo(0123456789)foo\"\n");
3711	lexer_test test (case_, content, NULL);
3712
3713	/ Verify that we get the expected token back. /
3714	const cpp_token *tok = test.get_token ();
3715	ASSERT_EQ (tok->type, CPP_STRING);
3716
3717	/ Verify that cpp_interpret_string works. /
3718	cpp_string dst_string;
3719	const enum cpp_ttype type = CPP_STRING;
3720	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
3721	&dst_string, type);
3722	ASSERT_TRUE (result);
3723	ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3724	free (ptr: const_cast <unsigned char *> (dst_string.text));
3725
3726	if (!should_have_column_data_p (loc: line_table->highest_location))
3727	return;
3728
3729	/ 0-9, plus the nil terminator. /
3730	ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, `11`);
3731	for (int i = `0`; i < `11`; i++)
3732	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3733	i, `1`, `7` + i, `7` + i);
3734	}
3735
3736	/ Test of locations within a raw string that contains a newline. /
3737
3738	static void
3739	test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3740	{
3741	/ .....................00.0000.*
3742	.....................12.3456. /*
3743	const char *content = ("R\"foo(\n"
3744	/ .....................00000.*
3745	.....................12345. /*
3746	"hello\n"
3747	"world\n"
3748	/ .....................00000.*
3749	.....................12345. /*
3750	")foo\"\n");
3751	lexer_test test (case_, content, NULL);
3752
3753	/ Verify that we get the expected token back. /
3754	const cpp_token *tok = test.get_token ();
3755	ASSERT_EQ (tok->type, CPP_STRING);
3756
3757	/ Verify that cpp_interpret_string works. /
3758	cpp_string dst_string;
3759	const enum cpp_ttype type = CPP_STRING;
3760	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
3761	&dst_string, type);
3762	ASSERT_TRUE (result);
3763	ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3764	free (ptr: const_cast <unsigned char *> (dst_string.text));
3765
3766	if (!should_have_column_data_p (loc: line_table->highest_location))
3767	return;
3768
3769	/ Currently we don't support locations within raw strings that*
3770	contain newlines. /*
3771	ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3772	"range endpoints are on different lines");
3773	}
3774
3775	/ Test of parsing an unterminated raw string. /
3776
3777	static void
3778	test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3779	{
3780	const char content = "R\"ouch()ouCh\" / etc */";
3781
3782	lexer_diagnostic_sink diagnostics;
3783	lexer_test test (case_, content, &diagnostics);
3784	test.m_implicitly_expect_EOF = false;
3785
3786	/ Attempt to parse the raw string. /
3787	const cpp_token *tok = test.get_token ();
3788	ASSERT_EQ (tok->type, CPP_EOF);
3789
3790	ASSERT_EQ (`1`, diagnostics.m_diagnostics.length ());
3791	/ We expect the message "unterminated raw string"*
3792	in the "cpplib" translation domain.
3793	It's not clear that dgettext is available on all supported hosts,
3794	so this assertion is commented-out for now.
3795	ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3796	diagnostics.m_diagnostics[0]);
3797	*/
3798	}
3799
3800	/ Test of lexing char constants. /
3801
3802	static void
3803	test_lexer_char_constants (const line_table_case &case_)
3804	{
3805	/ Various char constants.*
3806	.....................0000000001111111111.22222222223.
3807	.....................1234567890123456789.01234567890. /*
3808	const char *content = (" 'a'\n"
3809	" u'a'\n"
3810	" U'a'\n"
3811	" L'a'\n"
3812	" 'abc'\n");
3813	lexer_test test (case_, content, NULL);
3814
3815	/ Verify that we get the expected tokens back. /
3816	/ 'a'. /
3817	const cpp_token *tok = test.get_token ();
3818	ASSERT_EQ (tok->type, CPP_CHAR);
3819	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3820
3821	unsigned int chars_seen;
3822	int unsignedp;
3823	cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3824	&chars_seen, &unsignedp);
3825	ASSERT_EQ (cc, `'a'`);
3826	ASSERT_EQ (chars_seen, `1`);
3827
3828	/ u'a'. /
3829	tok = test.get_token ();
3830	ASSERT_EQ (tok->type, CPP_CHAR16);
3831	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3832
3833	/ U'a'. /
3834	tok = test.get_token ();
3835	ASSERT_EQ (tok->type, CPP_CHAR32);
3836	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3837
3838	/ L'a'. /
3839	tok = test.get_token ();
3840	ASSERT_EQ (tok->type, CPP_WCHAR);
3841	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3842
3843	/ 'abc' (c-char-sequence). /
3844	tok = test.get_token ();
3845	ASSERT_EQ (tok->type, CPP_CHAR);
3846	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3847	}
3848	/ A table of interesting location_t values, giving one axis of our test*
3849	matrix. /*
3850
3851	static const location_t boundary_locations[] = {
3852	/ Zero means "don't override the default values for a new line_table". /
3853	`0`,
3854
3855	/ An arbitrary non-zero value that isn't close to one of*
3856	the boundary values below. /*
3857	`0x10000`,
3858
3859	/ Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. /
3860	LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - `0x100`,
3861	LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - `1`,
3862	LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3863	LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + `1`,
3864	LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + `0x100`,
3865
3866	/ Values near LINE_MAP_MAX_LOCATION_WITH_COLS. /
3867	LINE_MAP_MAX_LOCATION_WITH_COLS - `0x100`,
3868	LINE_MAP_MAX_LOCATION_WITH_COLS - `1`,
3869	LINE_MAP_MAX_LOCATION_WITH_COLS,
3870	LINE_MAP_MAX_LOCATION_WITH_COLS + `1`,
3871	LINE_MAP_MAX_LOCATION_WITH_COLS + `0x100`,
3872	};
3873
3874	/ Run TESTCASE multiple times, once for each case in our test matrix. /
3875
3876	void
3877	for_each_line_table_case (void (testcase) (const* line_table_case &))
3878	{
3879	/ As noted above in the description of struct line_table_case,*
3880	we want to explore a test matrix of interesting line_table
3881	situations, running various selftests for each case within the
3882	matrix. /*
3883
3884	/ Run all tests with:*
3885	(a) line_table->default_range_bits == 0, and
3886	(b) line_table->default_range_bits == 5. /*
3887	int num_cases_tested = `0`;
3888	for (int default_range_bits = `0`; default_range_bits <= `5`;
3889	default_range_bits += `5`)
3890	{
3891	/ ...and use each of the "interesting" location values as*
3892	the starting location within line_table. /*
3893	const int num_boundary_locations = ARRAY_SIZE (boundary_locations);
3894	for (int loc_idx = `0`; loc_idx < num_boundary_locations; loc_idx++)
3895	{
3896	line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3897
3898	testcase (c);
3899
3900	num_cases_tested++;
3901	}
3902	}
3903
3904	/ Verify that we fully covered the test matrix. /
3905	ASSERT_EQ (num_cases_tested, `2` * `12`);
3906	}
3907
3908	/ Verify that when presented with a consecutive pair of locations with*
3909	a very large line offset, we don't attempt to consolidate them into
3910	a single ordinary linemap where the line offsets within the line map
3911	would lead to overflow (PR lto/88147). /*
3912
3913	static void
3914	test_line_offset_overflow ()
3915	{
3916	line_table_test ltt (line_table_case (`5`, `0`));
3917
3918	linemap_add (line_table, LC_ENTER, sysp: false, to_file: "foo.c", to_line: `0`);
3919	linemap_line_start (set: line_table, to_line: `1`, max_column_hint: `100`);
3920	location_t loc_a = linemap_line_start (set: line_table, to_line: `2578`, max_column_hint: `255`);
3921	assert_loceq (exp_filename: "foo.c", exp_linenum: `2578`, exp_colnum: `0`, loc: loc_a);
3922
3923	const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (set: line_table);
3924	ASSERT_EQ (ordmap_a->m_column_and_range_bits, `13`);
3925	ASSERT_EQ (ordmap_a->m_range_bits, `5`);
3926
3927	location_t loc_b = linemap_line_start (set: line_table, to_line: `404198`, max_column_hint: `512`);
3928	assert_loceq (exp_filename: "foo.c", exp_linenum: `404198`, exp_colnum: `0`, loc: loc_b);
3929
3930	/ We should have started a new linemap, rather than attempting to store*
3931	a very large line offset. /*
3932	const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (set: line_table);
3933	ASSERT_NE (ordmap_a, ordmap_b);
3934	}
3935
3936	void test_cpp_utf8 ()
3937	{
3938	const int def_tabstop = `8`;
3939	cpp_char_column_policy policy (def_tabstop, cpp_wcwidth);
3940
3941	/ Verify that wcwidth of invalid UTF-8 or control bytes is 1. /
3942	{
3943	int w_bad = cpp_display_width (data: "\xf0!\x9f!\x98!\x82!", data_length: `8`, policy);
3944	ASSERT_EQ (`8`, w_bad);
3945	int w_ctrl = cpp_display_width (data: "\r\n\v\0\1", data_length: `5`, policy);
3946	ASSERT_EQ (`5`, w_ctrl);
3947	}
3948
3949	/ Verify that wcwidth of valid UTF-8 is as expected. /
3950	{
3951	const int w_pi = cpp_display_width (data: "\xcf\x80", data_length: `2`, policy);
3952	ASSERT_EQ (`1`, w_pi);
3953	const int w_emoji = cpp_display_width (data: "\xf0\x9f\x98\x82", data_length: `4`, policy);
3954	ASSERT_EQ (`2`, w_emoji);
3955	const int w_umlaut_precomposed = cpp_display_width (data: "\xc3\xbf", data_length: `2`,
3956	policy);
3957	ASSERT_EQ (`1`, w_umlaut_precomposed);
3958	const int w_umlaut_combining = cpp_display_width (data: "y\xcc\x88", data_length: `3`,
3959	policy);
3960	ASSERT_EQ (`1`, w_umlaut_combining);
3961	const int w_han = cpp_display_width (data: "\xe4\xb8\xba", data_length: `3`, policy);
3962	ASSERT_EQ (`2`, w_han);
3963	const int w_ascii = cpp_display_width (data: "GCC", data_length: `3`, policy);
3964	ASSERT_EQ (`3`, w_ascii);
3965	const int w_mixed = cpp_display_width (data: "\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
3966	"\x9f! \xe4\xb8\xba y\xcc\x88",
3967	data_length: `24`, policy);
3968	ASSERT_EQ (`18`, w_mixed);
3969	}
3970
3971	/ Verify that display width properly expands tabs. /
3972	{
3973	const char *tstr = "\tabc\td";
3974	ASSERT_EQ (`6`, cpp_display_width (tstr, `6`,
3975	cpp_char_column_policy (`1`, cpp_wcwidth)));
3976	ASSERT_EQ (`10`, cpp_display_width (tstr, `6`,
3977	cpp_char_column_policy (`3`, cpp_wcwidth)));
3978	ASSERT_EQ (`17`, cpp_display_width (tstr, `6`,
3979	cpp_char_column_policy (`8`, cpp_wcwidth)));
3980	ASSERT_EQ (`1`,
3981	cpp_display_column_to_byte_column
3982	(tstr, `6`, `7`, cpp_char_column_policy (`8`, cpp_wcwidth)));
3983	}
3984
3985	/ Verify that cpp_byte_column_to_display_column can go past the end,*
3986	and similar edge cases. /*
3987	{
3988	const char *str
3989	/ Display columns.*
3990	111111112345 /*
3991	= "\xcf\x80 abc";
3992	/ 111122223456*
3993	Byte columns. /*
3994
3995	ASSERT_EQ (`5`, cpp_display_width (str, `6`, policy));
3996	ASSERT_EQ (`105`,
3997	cpp_byte_column_to_display_column (str, `6`, `106`, policy));
3998	ASSERT_EQ (`10000`,
3999	cpp_byte_column_to_display_column (NULL, `0`, `10000`, policy));
4000	ASSERT_EQ (`0`,
4001	cpp_byte_column_to_display_column (NULL, `10000`, `0`, policy));
4002	}
4003
4004	/ Verify that cpp_display_column_to_byte_column can go past the end,*
4005	and similar edge cases, and check invertibility. /*
4006	{
4007	const char *str
4008	/ Display columns.*
4009	000000000000000000000000000000000000011
4010	111111112222222234444444455555555678901 /*
4011	= "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
4012	/ 000000000000000000000000000000000111111*
4013	111122223333444456666777788889999012345
4014	Byte columns. /*
4015	ASSERT_EQ (`4`, cpp_display_column_to_byte_column (str, `15`, `2`, policy));
4016	ASSERT_EQ (`15`,
4017	cpp_display_column_to_byte_column (str, `15`, `11`, policy));
4018	ASSERT_EQ (`115`,
4019	cpp_display_column_to_byte_column (str, `15`, `111`, policy));
4020	ASSERT_EQ (`10000`,
4021	cpp_display_column_to_byte_column (NULL, `0`, `10000`, policy));
4022	ASSERT_EQ (`0`,
4023	cpp_display_column_to_byte_column (NULL, `10000`, `0`, policy));
4024
4025	/ Verify that we do not interrupt a UTF-8 sequence. /
4026	ASSERT_EQ (`4`, cpp_display_column_to_byte_column (str, `15`, `1`, policy));
4027
4028	for (int byte_col = `1`; byte_col <= `15`; ++byte_col)
4029	{
4030	const int disp_col
4031	= cpp_byte_column_to_display_column (data: str, data_length: `15`, column: byte_col, policy);
4032	const int byte_col2
4033	= cpp_display_column_to_byte_column (data: str, data_length: `15`, display_col: disp_col, policy);
4034
4035	/ If we ask for the display column in the middle of a UTF-8*
4036	sequence, it will return the length of the partial sequence,
4037	matching the behavior of GCC before display column support.
4038	Otherwise check the round trip was successful. /*
4039	if (byte_col < `4`)
4040	ASSERT_EQ (byte_col, disp_col);
4041	else if (byte_col >= `6` && byte_col < `9`)
4042	ASSERT_EQ (`3` + (byte_col - `5`), disp_col);
4043	else
4044	ASSERT_EQ (byte_col2, byte_col);
4045	}
4046	}
4047	}
4048
4049	static bool
4050	check_cpp_valid_utf8_p (const char *str)
4051	{
4052	return cpp_valid_utf8_p (data: str, num_bytes: strlen (s: str));
4053	}
4054
4055	/ Check that cpp_valid_utf8_p works as expected. /
4056
4057	static void
4058	test_cpp_valid_utf8_p ()
4059	{
4060	ASSERT_TRUE (check_cpp_valid_utf8_p ("hello world"));
4061
4062	/ 2-byte char (pi). /
4063	ASSERT_TRUE (check_cpp_valid_utf8_p("\xcf\x80"));
4064
4065	/ 3-byte chars (the Japanese word "mojibake"). /
4066	ASSERT_TRUE (check_cpp_valid_utf8_p
4067	(
4068	/ U+6587 CJK UNIFIED IDEOGRAPH-6587*
4069	UTF-8: 0xE6 0x96 0x87
4070	C octal escaped UTF-8: \346\226\207. /*
4071	"\346\226\207"
4072	/ U+5B57 CJK UNIFIED IDEOGRAPH-5B57*
4073	UTF-8: 0xE5 0xAD 0x97
4074	C octal escaped UTF-8: \345\255\227. /*
4075	"\345\255\227"
4076	/ U+5316 CJK UNIFIED IDEOGRAPH-5316*
4077	UTF-8: 0xE5 0x8C 0x96
4078	C octal escaped UTF-8: \345\214\226. /*
4079	"\345\214\226"
4080	/ U+3051 HIRAGANA LETTER KE*
4081	UTF-8: 0xE3 0x81 0x91
4082	C octal escaped UTF-8: \343\201\221. /*
4083	"\343\201\221"));
4084
4085	/ 4-byte char: an emoji. /
4086	ASSERT_TRUE (check_cpp_valid_utf8_p ("\xf0\x9f\x98\x82"));
4087
4088	/ Control codes, including the NUL byte. /
4089	ASSERT_TRUE (cpp_valid_utf8_p ("\r\n\v\0\1", `5`));
4090
4091	ASSERT_FALSE (check_cpp_valid_utf8_p ("\xf0!\x9f!\x98!\x82!"));
4092
4093	/ Unexpected continuation bytes. /
4094	for (unsigned char continuation_byte = `0x80`;
4095	continuation_byte <= `0xbf`;
4096	continuation_byte++)
4097	ASSERT_FALSE (cpp_valid_utf8_p ((const char *)&continuation_byte, `1`));
4098
4099	/ "Lonely start characters" for 2-byte sequences. /
4100	{
4101	unsigned char buf[`2`];
4102	buf[`1`] = `' '`;
4103	for (buf[`0`] = `0xc0`;
4104	buf[`0`] <= `0xdf`;
4105	buf[`0`]++)
4106	ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, `2`));
4107	}
4108
4109	/ "Lonely start characters" for 3-byte sequences. /
4110	{
4111	unsigned char buf[`2`];
4112	buf[`1`] = `' '`;
4113	for (buf[`0`] = `0xe0`;
4114	buf[`0`] <= `0xef`;
4115	buf[`0`]++)
4116	ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, `2`));
4117	}
4118
4119	/ "Lonely start characters" for 4-byte sequences. /
4120	{
4121	unsigned char buf[`2`];
4122	buf[`1`] = `' '`;
4123	for (buf[`0`] = `0xf0`;
4124	buf[`0`] <= `0xf4`;
4125	buf[`0`]++)
4126	ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, `2`));
4127	}
4128
4129	/ Invalid start characters (formerly valid for 5-byte and 6-byte*
4130	sequences). /*
4131	{
4132	unsigned char buf[`2`];
4133	buf[`1`] = `' '`;
4134	for (buf[`0`] = `0xf5`;
4135	buf[`0`] <= `0xfd`;
4136	buf[`0`]++)
4137	ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, `2`));
4138	}
4139
4140	/ Impossible bytes. /
4141	ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc0"));
4142	ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc1"));
4143	ASSERT_FALSE (check_cpp_valid_utf8_p ("\xfe"));
4144	ASSERT_FALSE (check_cpp_valid_utf8_p ("\xff"));
4145	}
4146
4147	/ Run all of the selftests within this file. /
4148
4149	void
4150	input_cc_tests ()
4151	{
4152	test_linenum_comparisons ();
4153	test_should_have_column_data_p ();
4154	test_unknown_location ();
4155	test_builtins ();
4156	for_each_line_table_case (testcase: test_make_location_nonpure_range_endpoints);
4157
4158	for_each_line_table_case (testcase: test_accessing_ordinary_linemaps);
4159	for_each_line_table_case (testcase: test_lexer);
4160	for_each_line_table_case (testcase: test_lexer_string_locations_simple);
4161	for_each_line_table_case (testcase: test_lexer_string_locations_ebcdic);
4162	for_each_line_table_case (testcase: test_lexer_string_locations_hex);
4163	for_each_line_table_case (testcase: test_lexer_string_locations_oct);
4164	for_each_line_table_case (testcase: test_lexer_string_locations_letter_escape_1);
4165	for_each_line_table_case (testcase: test_lexer_string_locations_letter_escape_2);
4166	for_each_line_table_case (testcase: test_lexer_string_locations_ucn4);
4167	for_each_line_table_case (testcase: test_lexer_string_locations_ucn8);
4168	for_each_line_table_case (testcase: test_lexer_string_locations_wide_string);
4169	for_each_line_table_case (testcase: test_lexer_string_locations_string16);
4170	for_each_line_table_case (testcase: test_lexer_string_locations_string32);
4171	for_each_line_table_case (testcase: test_lexer_string_locations_u8);
4172	for_each_line_table_case (testcase: test_lexer_string_locations_utf8_source);
4173	for_each_line_table_case (testcase: test_lexer_string_locations_concatenation_1);
4174	for_each_line_table_case (testcase: test_lexer_string_locations_concatenation_2);
4175	for_each_line_table_case (testcase: test_lexer_string_locations_concatenation_3);
4176	for_each_line_table_case (testcase: test_lexer_string_locations_macro);
4177	for_each_line_table_case (testcase: test_lexer_string_locations_stringified_macro_argument);
4178	for_each_line_table_case (testcase: test_lexer_string_locations_non_string);
4179	for_each_line_table_case (testcase: test_lexer_string_locations_long_line);
4180	for_each_line_table_case (testcase: test_lexer_string_locations_raw_string_one_line);
4181	for_each_line_table_case (testcase: test_lexer_string_locations_raw_string_multiline);
4182	for_each_line_table_case (testcase: test_lexer_string_locations_raw_string_unterminated);
4183	for_each_line_table_case (testcase: test_lexer_char_constants);
4184
4185	test_reading_source_line ();
4186
4187	test_line_offset_overflow ();
4188
4189	test_cpp_utf8 ();
4190	test_cpp_valid_utf8_p ();
4191	}
4192
4193	} // namespace selftest
4194
4195	#endif /* CHECKING_P */
4196

source code of gcc/input.cc