1/* Data and functions related to line maps and input files.
2 Copyright (C) 2004-2017 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "intl.h"
24#include "diagnostic-core.h"
25#include "selftest.h"
26#include "cpplib.h"
27
28#ifndef HAVE_ICONV
29#define HAVE_ICONV 0
30#endif
31
32/* This is a cache used by get_next_line to store the content of a
33 file to be searched for file lines. */
34struct fcache
35{
36 /* These are information used to store a line boundary. */
37 struct line_info
38 {
39 /* The line number. It starts from 1. */
40 size_t line_num;
41
42 /* The position (byte count) of the beginning of the line,
43 relative to the file data pointer. This starts at zero. */
44 size_t start_pos;
45
46 /* The position (byte count) of the last byte of the line. This
47 normally points to the '\n' character, or to one byte after the
48 last byte of the file, if the file doesn't contain a '\n'
49 character. */
50 size_t end_pos;
51
52 line_info (size_t l, size_t s, size_t e)
53 : line_num (l), start_pos (s), end_pos (e)
54 {}
55
56 line_info ()
57 :line_num (0), start_pos (0), end_pos (0)
58 {}
59 };
60
61 /* The number of time this file has been accessed. This is used
62 to designate which file cache to evict from the cache
63 array. */
64 unsigned use_count;
65
66 /* The file_path is the key for identifying a particular file in
67 the cache.
68 For libcpp-using code, the underlying buffer for this field is
69 owned by the corresponding _cpp_file within the cpp_reader. */
70 const char *file_path;
71
72 FILE *fp;
73
74 /* This points to the content of the file that we've read so
75 far. */
76 char *data;
77
78 /* The size of the DATA array above.*/
79 size_t size;
80
81 /* The number of bytes read from the underlying file so far. This
82 must be less (or equal) than SIZE above. */
83 size_t nb_read;
84
85 /* The index of the beginning of the current line. */
86 size_t line_start_idx;
87
88 /* The number of the previous line read. This starts at 1. Zero
89 means we've read no line so far. */
90 size_t line_num;
91
92 /* This is the total number of lines of the current file. At the
93 moment, we try to get this information from the line map
94 subsystem. Note that this is just a hint. When using the C++
95 front-end, this hint is correct because the input file is then
96 completely tokenized before parsing starts; so the line map knows
97 the number of lines before compilation really starts. For e.g,
98 the C front-end, it can happen that we start emitting diagnostics
99 before the line map has seen the end of the file. */
100 size_t total_lines;
101
102 /* Could this file be missing a trailing newline on its final line?
103 Initially true (to cope with empty files), set to true/false
104 as each line is read. */
105 bool missing_trailing_newline;
106
107 /* This is a record of the beginning and end of the lines we've seen
108 while reading the file. This is useful to avoid walking the data
109 from the beginning when we are asked to read a line that is
110 before LINE_START_IDX above. Note that the maximum size of this
111 record is fcache_line_record_size, so that the memory consumption
112 doesn't explode. We thus scale total_lines down to
113 fcache_line_record_size. */
114 vec<line_info, va_heap> line_record;
115
116 fcache ();
117 ~fcache ();
118};
119
120/* Current position in real source file. */
121
122location_t input_location = UNKNOWN_LOCATION;
123
124struct line_maps *line_table;
125
126/* A stashed copy of "line_table" for use by selftest::line_table_test.
127 This needs to be a global so that it can be a GC root, and thus
128 prevent the stashed copy from being garbage-collected if the GC runs
129 during a line_table_test. */
130
131struct line_maps *saved_line_table;
132
133static fcache *fcache_tab;
134static const size_t fcache_tab_size = 16;
135static const size_t fcache_buffer_size = 4 * 1024;
136static const size_t fcache_line_record_size = 100;
137
138/* Expand the source location LOC into a human readable location. If
139 LOC resolves to a builtin location, the file name of the readable
140 location is set to the string "<built-in>". If EXPANSION_POINT_P is
141 TRUE and LOC is virtual, then it is resolved to the expansion
142 point of the involved macro. Otherwise, it is resolved to the
143 spelling location of the token.
144
145 When resolving to the spelling location of the token, if the
146 resulting location is for a built-in location (that is, it has no
147 associated line/column) in the context of a macro expansion, the
148 returned location is the first one (while unwinding the macro
149 location towards its expansion point) that is in real source
150 code.
151
152 ASPECT controls which part of the location to use. */
153
154static expanded_location
155expand_location_1 (source_location loc,
156 bool expansion_point_p,
157 enum location_aspect aspect)
158{
159 expanded_location xloc;
160 const line_map_ordinary *map;
161 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
162 tree block = NULL;
163
164 if (IS_ADHOC_LOC (loc))
165 {
166 block = LOCATION_BLOCK (loc);
167 loc = LOCATION_LOCUS (loc);
168 }
169
170 memset (&xloc, 0, sizeof (xloc));
171
172 if (loc >= RESERVED_LOCATION_COUNT)
173 {
174 if (!expansion_point_p)
175 {
176 /* We want to resolve LOC to its spelling location.
177
178 But if that spelling location is a reserved location that
179 appears in the context of a macro expansion (like for a
180 location for a built-in token), let's consider the first
181 location (toward the expansion point) that is not reserved;
182 that is, the first location that is in real source code. */
183 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
184 loc, NULL);
185 lrk = LRK_SPELLING_LOCATION;
186 }
187 loc = linemap_resolve_location (line_table, loc, lrk, &map);
188
189 /* loc is now either in an ordinary map, or is a reserved location.
190 If it is a compound location, the caret is in a spelling location,
191 but the start/finish might still be a virtual location.
192 Depending of what the caller asked for, we may need to recurse
193 one level in order to resolve any virtual locations in the
194 end-points. */
195 switch (aspect)
196 {
197 default:
198 gcc_unreachable ();
199 /* Fall through. */
200 case LOCATION_ASPECT_CARET:
201 break;
202 case LOCATION_ASPECT_START:
203 {
204 source_location start = get_start (loc);
205 if (start != loc)
206 return expand_location_1 (start, expansion_point_p, aspect);
207 }
208 break;
209 case LOCATION_ASPECT_FINISH:
210 {
211 source_location finish = get_finish (loc);
212 if (finish != loc)
213 return expand_location_1 (finish, expansion_point_p, aspect);
214 }
215 break;
216 }
217 xloc = linemap_expand_location (line_table, map, loc);
218 }
219
220 xloc.data = block;
221 if (loc <= BUILTINS_LOCATION)
222 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
223
224 return xloc;
225}
226
227/* Initialize the set of cache used for files accessed by caret
228 diagnostic. */
229
230static void
231diagnostic_file_cache_init (void)
232{
233 if (fcache_tab == NULL)
234 fcache_tab = new fcache[fcache_tab_size];
235}
236
237/* Free the resources used by the set of cache used for files accessed
238 by caret diagnostic. */
239
240void
241diagnostic_file_cache_fini (void)
242{
243 if (fcache_tab)
244 {
245 delete [] (fcache_tab);
246 fcache_tab = NULL;
247 }
248}
249
250/* Return the total lines number that have been read so far by the
251 line map (in the preprocessor) so far. For languages like C++ that
252 entirely preprocess the input file before starting to parse, this
253 equals the actual number of lines of the file. */
254
255static size_t
256total_lines_num (const char *file_path)
257{
258 size_t r = 0;
259 source_location l = 0;
260 if (linemap_get_file_highest_location (line_table, file_path, &l))
261 {
262 gcc_assert (l >= RESERVED_LOCATION_COUNT);
263 expanded_location xloc = expand_location (l);
264 r = xloc.line;
265 }
266 return r;
267}
268
269/* Lookup the cache used for the content of a given file accessed by
270 caret diagnostic. Return the found cached file, or NULL if no
271 cached file was found. */
272
273static fcache*
274lookup_file_in_cache_tab (const char *file_path)
275{
276 if (file_path == NULL)
277 return NULL;
278
279 diagnostic_file_cache_init ();
280
281 /* This will contain the found cached file. */
282 fcache *r = NULL;
283 for (unsigned i = 0; i < fcache_tab_size; ++i)
284 {
285 fcache *c = &fcache_tab[i];
286 if (c->file_path && !strcmp (c->file_path, file_path))
287 {
288 ++c->use_count;
289 r = c;
290 }
291 }
292
293 if (r)
294 ++r->use_count;
295
296 return r;
297}
298
299/* Purge any mention of FILENAME from the cache of files used for
300 printing source code. For use in selftests when working
301 with tempfiles. */
302
303void
304diagnostics_file_cache_forcibly_evict_file (const char *file_path)
305{
306 gcc_assert (file_path);
307
308 fcache *r = lookup_file_in_cache_tab (file_path);
309 if (!r)
310 /* Not found. */
311 return;
312
313 r->file_path = NULL;
314 if (r->fp)
315 fclose (r->fp);
316 r->fp = NULL;
317 r->nb_read = 0;
318 r->line_start_idx = 0;
319 r->line_num = 0;
320 r->line_record.truncate (0);
321 r->use_count = 0;
322 r->total_lines = 0;
323 r->missing_trailing_newline = true;
324}
325
326/* Return the file cache that has been less used, recently, or the
327 first empty one. If HIGHEST_USE_COUNT is non-null,
328 *HIGHEST_USE_COUNT is set to the highest use count of the entries
329 in the cache table. */
330
331static fcache*
332evicted_cache_tab_entry (unsigned *highest_use_count)
333{
334 diagnostic_file_cache_init ();
335
336 fcache *to_evict = &fcache_tab[0];
337 unsigned huc = to_evict->use_count;
338 for (unsigned i = 1; i < fcache_tab_size; ++i)
339 {
340 fcache *c = &fcache_tab[i];
341 bool c_is_empty = (c->file_path == NULL);
342
343 if (c->use_count < to_evict->use_count
344 || (to_evict->file_path && c_is_empty))
345 /* We evict C because it's either an entry with a lower use
346 count or one that is empty. */
347 to_evict = c;
348
349 if (huc < c->use_count)
350 huc = c->use_count;
351
352 if (c_is_empty)
353 /* We've reached the end of the cache; subsequent elements are
354 all empty. */
355 break;
356 }
357
358 if (highest_use_count)
359 *highest_use_count = huc;
360
361 return to_evict;
362}
363
364/* Create the cache used for the content of a given file to be
365 accessed by caret diagnostic. This cache is added to an array of
366 cache and can be retrieved by lookup_file_in_cache_tab. This
367 function returns the created cache. Note that only the last
368 fcache_tab_size files are cached. */
369
370static fcache*
371add_file_to_cache_tab (const char *file_path)
372{
373
374 FILE *fp = fopen (file_path, "r");
375 if (fp == NULL)
376 return NULL;
377
378 unsigned highest_use_count = 0;
379 fcache *r = evicted_cache_tab_entry (&highest_use_count);
380 r->file_path = file_path;
381 if (r->fp)
382 fclose (r->fp);
383 r->fp = fp;
384 r->nb_read = 0;
385 r->line_start_idx = 0;
386 r->line_num = 0;
387 r->line_record.truncate (0);
388 /* Ensure that this cache entry doesn't get evicted next time
389 add_file_to_cache_tab is called. */
390 r->use_count = ++highest_use_count;
391 r->total_lines = total_lines_num (file_path);
392 r->missing_trailing_newline = true;
393
394 return r;
395}
396
397/* Lookup the cache used for the content of a given file accessed by
398 caret diagnostic. If no cached file was found, create a new cache
399 for this file, add it to the array of cached file and return
400 it. */
401
402static fcache*
403lookup_or_add_file_to_cache_tab (const char *file_path)
404{
405 fcache *r = lookup_file_in_cache_tab (file_path);
406 if (r == NULL)
407 r = add_file_to_cache_tab (file_path);
408 return r;
409}
410
411/* Default constructor for a cache of file used by caret
412 diagnostic. */
413
414fcache::fcache ()
415: use_count (0), file_path (NULL), fp (NULL), data (0),
416 size (0), nb_read (0), line_start_idx (0), line_num (0),
417 total_lines (0), missing_trailing_newline (true)
418{
419 line_record.create (0);
420}
421
422/* Destructor for a cache of file used by caret diagnostic. */
423
424fcache::~fcache ()
425{
426 if (fp)
427 {
428 fclose (fp);
429 fp = NULL;
430 }
431 if (data)
432 {
433 XDELETEVEC (data);
434 data = 0;
435 }
436 line_record.release ();
437}
438
439/* Returns TRUE iff the cache would need to be filled with data coming
440 from the file. That is, either the cache is empty or full or the
441 current line is empty. Note that if the cache is full, it would
442 need to be extended and filled again. */
443
444static bool
445needs_read (fcache *c)
446{
447 return (c->nb_read == 0
448 || c->nb_read == c->size
449 || (c->line_start_idx >= c->nb_read - 1));
450}
451
452/* Return TRUE iff the cache is full and thus needs to be
453 extended. */
454
455static bool
456needs_grow (fcache *c)
457{
458 return c->nb_read == c->size;
459}
460
461/* Grow the cache if it needs to be extended. */
462
463static void
464maybe_grow (fcache *c)
465{
466 if (!needs_grow (c))
467 return;
468
469 size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
470 c->data = XRESIZEVEC (char, c->data, size);
471 c->size = size;
472}
473
474/* Read more data into the cache. Extends the cache if need be.
475 Returns TRUE iff new data could be read. */
476
477static bool
478read_data (fcache *c)
479{
480 if (feof (c->fp) || ferror (c->fp))
481 return false;
482
483 maybe_grow (c);
484
485 char * from = c->data + c->nb_read;
486 size_t to_read = c->size - c->nb_read;
487 size_t nb_read = fread (from, 1, to_read, c->fp);
488
489 if (ferror (c->fp))
490 return false;
491
492 c->nb_read += nb_read;
493 return !!nb_read;
494}
495
496/* Read new data iff the cache needs to be filled with more data
497 coming from the file FP. Return TRUE iff the cache was filled with
498 mode data. */
499
500static bool
501maybe_read_data (fcache *c)
502{
503 if (!needs_read (c))
504 return false;
505 return read_data (c);
506}
507
508/* Read a new line from file FP, using C as a cache for the data
509 coming from the file. Upon successful completion, *LINE is set to
510 the beginning of the line found. *LINE points directly in the
511 line cache and is only valid until the next call of get_next_line.
512 *LINE_LEN is set to the length of the line. Note that the line
513 does not contain any terminal delimiter. This function returns
514 true if some data was read or process from the cache, false
515 otherwise. Note that subsequent calls to get_next_line might
516 make the content of *LINE invalid. */
517
518static bool
519get_next_line (fcache *c, char **line, ssize_t *line_len)
520{
521 /* Fill the cache with data to process. */
522 maybe_read_data (c);
523
524 size_t remaining_size = c->nb_read - c->line_start_idx;
525 if (remaining_size == 0)
526 /* There is no more data to process. */
527 return false;
528
529 char *line_start = c->data + c->line_start_idx;
530
531 char *next_line_start = NULL;
532 size_t len = 0;
533 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
534 if (line_end == NULL)
535 {
536 /* We haven't found the end-of-line delimiter in the cache.
537 Fill the cache with more data from the file and look for the
538 '\n'. */
539 while (maybe_read_data (c))
540 {
541 line_start = c->data + c->line_start_idx;
542 remaining_size = c->nb_read - c->line_start_idx;
543 line_end = (char *) memchr (line_start, '\n', remaining_size);
544 if (line_end != NULL)
545 {
546 next_line_start = line_end + 1;
547 break;
548 }
549 }
550 if (line_end == NULL)
551 {
552 /* We've loadded all the file into the cache and still no
553 '\n'. Let's say the line ends up at one byte passed the
554 end of the file. This is to stay consistent with the case
555 of when the line ends up with a '\n' and line_end points to
556 that terminal '\n'. That consistency is useful below in
557 the len calculation. */
558 line_end = c->data + c->nb_read ;
559 c->missing_trailing_newline = true;
560 }
561 else
562 c->missing_trailing_newline = false;
563 }
564 else
565 {
566 next_line_start = line_end + 1;
567 c->missing_trailing_newline = false;
568 }
569
570 if (ferror (c->fp))
571 return false;
572
573 /* At this point, we've found the end of the of line. It either
574 points to the '\n' or to one byte after the last byte of the
575 file. */
576 gcc_assert (line_end != NULL);
577
578 len = line_end - line_start;
579
580 if (c->line_start_idx < c->nb_read)
581 *line = line_start;
582
583 ++c->line_num;
584
585 /* Before we update our line record, make sure the hint about the
586 total number of lines of the file is correct. If it's not, then
587 we give up recording line boundaries from now on. */
588 bool update_line_record = true;
589 if (c->line_num > c->total_lines)
590 update_line_record = false;
591
592 /* Now update our line record so that re-reading lines from the
593 before c->line_start_idx is faster. */
594 if (update_line_record
595 && c->line_record.length () < fcache_line_record_size)
596 {
597 /* If the file lines fits in the line record, we just record all
598 its lines ...*/
599 if (c->total_lines <= fcache_line_record_size
600 && c->line_num > c->line_record.length ())
601 c->line_record.safe_push (fcache::line_info (c->line_num,
602 c->line_start_idx,
603 line_end - c->data));
604 else if (c->total_lines > fcache_line_record_size)
605 {
606 /* ... otherwise, we just scale total_lines down to
607 (fcache_line_record_size lines. */
608 size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
609 if (c->line_record.length () == 0
610 || n >= c->line_record.length ())
611 c->line_record.safe_push (fcache::line_info (c->line_num,
612 c->line_start_idx,
613 line_end - c->data));
614 }
615 }
616
617 /* Update c->line_start_idx so that it points to the next line to be
618 read. */
619 if (next_line_start)
620 c->line_start_idx = next_line_start - c->data;
621 else
622 /* We didn't find any terminal '\n'. Let's consider that the end
623 of line is the end of the data in the cache. The next
624 invocation of get_next_line will either read more data from the
625 underlying file or return false early because we've reached the
626 end of the file. */
627 c->line_start_idx = c->nb_read;
628
629 *line_len = len;
630
631 return true;
632}
633
634/* Consume the next bytes coming from the cache (or from its
635 underlying file if there are remaining unread bytes in the file)
636 until we reach the next end-of-line (or end-of-file). There is no
637 copying from the cache involved. Return TRUE upon successful
638 completion. */
639
640static bool
641goto_next_line (fcache *cache)
642{
643 char *l;
644 ssize_t len;
645
646 return get_next_line (cache, &l, &len);
647}
648
649/* Read an arbitrary line number LINE_NUM from the file cached in C.
650 If the line was read successfully, *LINE points to the beginning
651 of the line in the file cache and *LINE_LEN is the length of the
652 line. *LINE is not nul-terminated, but may contain zero bytes.
653 *LINE is only valid until the next call of read_line_num.
654 This function returns bool if a line was read. */
655
656static bool
657read_line_num (fcache *c, size_t line_num,
658 char **line, ssize_t *line_len)
659{
660 gcc_assert (line_num > 0);
661
662 if (line_num <= c->line_num)
663 {
664 /* We've been asked to read lines that are before c->line_num.
665 So lets use our line record (if it's not empty) to try to
666 avoid re-reading the file from the beginning again. */
667
668 if (c->line_record.is_empty ())
669 {
670 c->line_start_idx = 0;
671 c->line_num = 0;
672 }
673 else
674 {
675 fcache::line_info *i = NULL;
676 if (c->total_lines <= fcache_line_record_size)
677 {
678 /* In languages where the input file is not totally
679 preprocessed up front, the c->total_lines hint
680 can be smaller than the number of lines of the
681 file. In that case, only the first
682 c->total_lines have been recorded.
683
684 Otherwise, the first c->total_lines we've read have
685 their start/end recorded here. */
686 i = (line_num <= c->total_lines)
687 ? &c->line_record[line_num - 1]
688 : &c->line_record[c->total_lines - 1];
689 gcc_assert (i->line_num <= line_num);
690 }
691 else
692 {
693 /* So the file had more lines than our line record
694 size. Thus the number of lines we've recorded has
695 been scaled down to fcache_line_reacord_size. Let's
696 pick the start/end of the recorded line that is
697 closest to line_num. */
698 size_t n = (line_num <= c->total_lines)
699 ? line_num * fcache_line_record_size / c->total_lines
700 : c ->line_record.length () - 1;
701 if (n < c->line_record.length ())
702 {
703 i = &c->line_record[n];
704 gcc_assert (i->line_num <= line_num);
705 }
706 }
707
708 if (i && i->line_num == line_num)
709 {
710 /* We have the start/end of the line. */
711 *line = c->data + i->start_pos;
712 *line_len = i->end_pos - i->start_pos;
713 return true;
714 }
715
716 if (i)
717 {
718 c->line_start_idx = i->start_pos;
719 c->line_num = i->line_num - 1;
720 }
721 else
722 {
723 c->line_start_idx = 0;
724 c->line_num = 0;
725 }
726 }
727 }
728
729 /* Let's walk from line c->line_num up to line_num - 1, without
730 copying any line. */
731 while (c->line_num < line_num - 1)
732 if (!goto_next_line (c))
733 return false;
734
735 /* The line we want is the next one. Let's read and copy it back to
736 the caller. */
737 return get_next_line (c, line, line_len);
738}
739
740/* Return the physical source line that corresponds to FILE_PATH/LINE.
741 The line is not nul-terminated. The returned pointer is only
742 valid until the next call of location_get_source_line.
743 Note that the line can contain several null characters,
744 so LINE_LEN, if non-null, points to the actual length of the line.
745 If the function fails, NULL is returned. */
746
747const char *
748location_get_source_line (const char *file_path, int line,
749 int *line_len)
750{
751 char *buffer = NULL;
752 ssize_t len;
753
754 if (line == 0)
755 return NULL;
756
757 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
758 if (c == NULL)
759 return NULL;
760
761 bool read = read_line_num (c, line, &buffer, &len);
762
763 if (read && line_len)
764 *line_len = len;
765
766 return read ? buffer : NULL;
767}
768
769/* Determine if FILE_PATH missing a trailing newline on its final line.
770 Only valid to call once all of the file has been loaded, by
771 requesting a line number beyond the end of the file. */
772
773bool
774location_missing_trailing_newline (const char *file_path)
775{
776 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
777 if (c == NULL)
778 return false;
779
780 return c->missing_trailing_newline;
781}
782
783/* Test if the location originates from the spelling location of a
784 builtin-tokens. That is, return TRUE if LOC is a (possibly
785 virtual) location of a built-in token that appears in the expansion
786 list of a macro. Please note that this function also works on
787 tokens that result from built-in tokens. For instance, the
788 function would return true if passed a token "4" that is the result
789 of the expansion of the built-in __LINE__ macro. */
790bool
791is_location_from_builtin_token (source_location loc)
792{
793 const line_map_ordinary *map = NULL;
794 loc = linemap_resolve_location (line_table, loc,
795 LRK_SPELLING_LOCATION, &map);
796 return loc == BUILTINS_LOCATION;
797}
798
799/* Expand the source location LOC into a human readable location. If
800 LOC is virtual, it resolves to the expansion point of the involved
801 macro. If LOC resolves to a builtin location, the file name of the
802 readable location is set to the string "<built-in>". */
803
804expanded_location
805expand_location (source_location loc)
806{
807 return expand_location_1 (loc, /*expansion_point_p=*/true,
808 LOCATION_ASPECT_CARET);
809}
810
811/* Expand the source location LOC into a human readable location. If
812 LOC is virtual, it resolves to the expansion location of the
813 relevant macro. If LOC resolves to a builtin location, the file
814 name of the readable location is set to the string
815 "<built-in>". */
816
817expanded_location
818expand_location_to_spelling_point (source_location loc)
819{
820 return expand_location_1 (loc, /*expansion_point_p=*/false,
821 LOCATION_ASPECT_CARET);
822}
823
824/* The rich_location class within libcpp requires a way to expand
825 source_location instances, and relies on the client code
826 providing a symbol named
827 linemap_client_expand_location_to_spelling_point
828 to do this.
829
830 This is the implementation for libcommon.a (all host binaries),
831 which simply calls into expand_location_1. */
832
833expanded_location
834linemap_client_expand_location_to_spelling_point (source_location loc,
835 enum location_aspect aspect)
836{
837 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
838}
839
840
841/* If LOCATION is in a system header and if it is a virtual location for
842 a token coming from the expansion of a macro, unwind it to the
843 location of the expansion point of the macro. Otherwise, just return
844 LOCATION.
845
846 This is used for instance when we want to emit diagnostics about a
847 token that may be located in a macro that is itself defined in a
848 system header, for example, for the NULL macro. In such a case, if
849 LOCATION were passed directly to diagnostic functions such as
850 warning_at, the diagnostic would be suppressed (unless
851 -Wsystem-headers). */
852
853source_location
854expansion_point_location_if_in_system_header (source_location location)
855{
856 if (in_system_header_at (location))
857 location = linemap_resolve_location (line_table, location,
858 LRK_MACRO_EXPANSION_POINT,
859 NULL);
860 return location;
861}
862
863/* If LOCATION is a virtual location for a token coming from the expansion
864 of a macro, unwind to the location of the expansion point of the macro. */
865
866source_location
867expansion_point_location (source_location location)
868{
869 return linemap_resolve_location (line_table, location,
870 LRK_MACRO_EXPANSION_POINT, NULL);
871}
872
873/* Construct a location with caret at CARET, ranging from START to
874 finish e.g.
875
876 11111111112
877 12345678901234567890
878 522
879 523 return foo + bar;
880 ~~~~^~~~~
881 524
882
883 The location's caret is at the "+", line 523 column 15, but starts
884 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
885 of "bar" at column 19. */
886
887location_t
888make_location (location_t caret, location_t start, location_t finish)
889{
890 location_t pure_loc = get_pure_location (caret);
891 source_range src_range;
892 src_range.m_start = get_start (start);
893 src_range.m_finish = get_finish (finish);
894 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
895 pure_loc,
896 src_range,
897 NULL);
898 return combined_loc;
899}
900
901/* Same as above, but taking a source range rather than two locations. */
902
903location_t
904make_location (location_t caret, source_range src_range)
905{
906 location_t pure_loc = get_pure_location (caret);
907 return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
908}
909
910#define ONE_K 1024
911#define ONE_M (ONE_K * ONE_K)
912
913/* Display a number as an integer multiple of either:
914 - 1024, if said integer is >= to 10 K (in base 2)
915 - 1024 * 1024, if said integer is >= 10 M in (base 2)
916 */
917#define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
918 ? (x) \
919 : ((x) < 10 * ONE_M \
920 ? (x) / ONE_K \
921 : (x) / ONE_M)))
922
923/* For a given integer, display either:
924 - the character 'k', if the number is higher than 10 K (in base 2)
925 but strictly lower than 10 M (in base 2)
926 - the character 'M' if the number is higher than 10 M (in base2)
927 - the charcter ' ' if the number is strictly lower than 10 K */
928#define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
929
930/* Display an integer amount as multiple of 1K or 1M (in base 2).
931 Display the correct unit (either k, M, or ' ') after the amount, as
932 well. */
933#define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
934
935/* Dump statistics to stderr about the memory usage of the line_table
936 set of line maps. This also displays some statistics about macro
937 expansion. */
938
939void
940dump_line_table_statistics (void)
941{
942 struct linemap_stats s;
943 long total_used_map_size,
944 macro_maps_size,
945 total_allocated_map_size;
946
947 memset (&s, 0, sizeof (s));
948
949 linemap_get_statistics (line_table, &s);
950
951 macro_maps_size = s.macro_maps_used_size
952 + s.macro_maps_locations_size;
953
954 total_allocated_map_size = s.ordinary_maps_allocated_size
955 + s.macro_maps_allocated_size
956 + s.macro_maps_locations_size;
957
958 total_used_map_size = s.ordinary_maps_used_size
959 + s.macro_maps_used_size
960 + s.macro_maps_locations_size;
961
962 fprintf (stderr, "Number of expanded macros: %5ld\n",
963 s.num_expanded_macros);
964 if (s.num_expanded_macros != 0)
965 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
966 s.num_macro_tokens / s.num_expanded_macros);
967 fprintf (stderr,
968 "\nLine Table allocations during the "
969 "compilation process\n");
970 fprintf (stderr, "Number of ordinary maps used: %5ld%c\n",
971 SCALE (s.num_ordinary_maps_used),
972 STAT_LABEL (s.num_ordinary_maps_used));
973 fprintf (stderr, "Ordinary map used size: %5ld%c\n",
974 SCALE (s.ordinary_maps_used_size),
975 STAT_LABEL (s.ordinary_maps_used_size));
976 fprintf (stderr, "Number of ordinary maps allocated: %5ld%c\n",
977 SCALE (s.num_ordinary_maps_allocated),
978 STAT_LABEL (s.num_ordinary_maps_allocated));
979 fprintf (stderr, "Ordinary maps allocated size: %5ld%c\n",
980 SCALE (s.ordinary_maps_allocated_size),
981 STAT_LABEL (s.ordinary_maps_allocated_size));
982 fprintf (stderr, "Number of macro maps used: %5ld%c\n",
983 SCALE (s.num_macro_maps_used),
984 STAT_LABEL (s.num_macro_maps_used));
985 fprintf (stderr, "Macro maps used size: %5ld%c\n",
986 SCALE (s.macro_maps_used_size),
987 STAT_LABEL (s.macro_maps_used_size));
988 fprintf (stderr, "Macro maps locations size: %5ld%c\n",
989 SCALE (s.macro_maps_locations_size),
990 STAT_LABEL (s.macro_maps_locations_size));
991 fprintf (stderr, "Macro maps size: %5ld%c\n",
992 SCALE (macro_maps_size),
993 STAT_LABEL (macro_maps_size));
994 fprintf (stderr, "Duplicated maps locations size: %5ld%c\n",
995 SCALE (s.duplicated_macro_maps_locations_size),
996 STAT_LABEL (s.duplicated_macro_maps_locations_size));
997 fprintf (stderr, "Total allocated maps size: %5ld%c\n",
998 SCALE (total_allocated_map_size),
999 STAT_LABEL (total_allocated_map_size));
1000 fprintf (stderr, "Total used maps size: %5ld%c\n",
1001 SCALE (total_used_map_size),
1002 STAT_LABEL (total_used_map_size));
1003 fprintf (stderr, "Ad-hoc table size: %5ld%c\n",
1004 SCALE (s.adhoc_table_size),
1005 STAT_LABEL (s.adhoc_table_size));
1006 fprintf (stderr, "Ad-hoc table entries used: %5ld\n",
1007 s.adhoc_table_entries_used);
1008 fprintf (stderr, "optimized_ranges: %i\n",
1009 line_table->num_optimized_ranges);
1010 fprintf (stderr, "unoptimized_ranges: %i\n",
1011 line_table->num_unoptimized_ranges);
1012
1013 fprintf (stderr, "\n");
1014}
1015
1016/* Get location one beyond the final location in ordinary map IDX. */
1017
1018static source_location
1019get_end_location (struct line_maps *set, unsigned int idx)
1020{
1021 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1022 return set->highest_location;
1023
1024 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1025 return MAP_START_LOCATION (next_map);
1026}
1027
1028/* Helper function for write_digit_row. */
1029
1030static void
1031write_digit (FILE *stream, int digit)
1032{
1033 fputc ('0' + (digit % 10), stream);
1034}
1035
1036/* Helper function for dump_location_info.
1037 Write a row of numbers to STREAM, numbering a source line,
1038 giving the units, tens, hundreds etc of the column number. */
1039
1040static void
1041write_digit_row (FILE *stream, int indent,
1042 const line_map_ordinary *map,
1043 source_location loc, int max_col, int divisor)
1044{
1045 fprintf (stream, "%*c", indent, ' ');
1046 fprintf (stream, "|");
1047 for (int column = 1; column < max_col; column++)
1048 {
1049 source_location column_loc = loc + (column << map->m_range_bits);
1050 write_digit (stream, column_loc / divisor);
1051 }
1052 fprintf (stream, "\n");
1053}
1054
1055/* Write a half-closed (START) / half-open (END) interval of
1056 source_location to STREAM. */
1057
1058static void
1059dump_location_range (FILE *stream,
1060 source_location start, source_location end)
1061{
1062 fprintf (stream,
1063 " source_location interval: %u <= loc < %u\n",
1064 start, end);
1065}
1066
1067/* Write a labelled description of a half-closed (START) / half-open (END)
1068 interval of source_location to STREAM. */
1069
1070static void
1071dump_labelled_location_range (FILE *stream,
1072 const char *name,
1073 source_location start, source_location end)
1074{
1075 fprintf (stream, "%s\n", name);
1076 dump_location_range (stream, start, end);
1077 fprintf (stream, "\n");
1078}
1079
1080/* Write a visualization of the locations in the line_table to STREAM. */
1081
1082void
1083dump_location_info (FILE *stream)
1084{
1085 /* Visualize the reserved locations. */
1086 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1087 0, RESERVED_LOCATION_COUNT);
1088
1089 /* Visualize the ordinary line_map instances, rendering the sources. */
1090 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1091 {
1092 source_location end_location = get_end_location (line_table, idx);
1093 /* half-closed: doesn't include this one. */
1094
1095 const line_map_ordinary *map
1096 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1097 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1098 dump_location_range (stream,
1099 MAP_START_LOCATION (map), end_location);
1100 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1101 fprintf (stream, " starting at line: %i\n",
1102 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1103 fprintf (stream, " column and range bits: %i\n",
1104 map->m_column_and_range_bits);
1105 fprintf (stream, " column bits: %i\n",
1106 map->m_column_and_range_bits - map->m_range_bits);
1107 fprintf (stream, " range bits: %i\n",
1108 map->m_range_bits);
1109
1110 /* Render the span of source lines that this "map" covers. */
1111 for (source_location loc = MAP_START_LOCATION (map);
1112 loc < end_location;
1113 loc += (1 << map->m_range_bits) )
1114 {
1115 gcc_assert (pure_location_p (line_table, loc) );
1116
1117 expanded_location exploc
1118 = linemap_expand_location (line_table, map, loc);
1119
1120 if (0 == exploc.column)
1121 {
1122 /* Beginning of a new source line: draw the line. */
1123
1124 int line_size;
1125 const char *line_text = location_get_source_line (exploc.file,
1126 exploc.line,
1127 &line_size);
1128 if (!line_text)
1129 break;
1130 fprintf (stream,
1131 "%s:%3i|loc:%5i|%.*s\n",
1132 exploc.file, exploc.line,
1133 loc,
1134 line_size, line_text);
1135
1136 /* "loc" is at column 0, which means "the whole line".
1137 Render the locations *within* the line, by underlining
1138 it, showing the source_location numeric values
1139 at each column. */
1140 int max_col = (1 << map->m_column_and_range_bits) - 1;
1141 if (max_col > line_size)
1142 max_col = line_size + 1;
1143
1144 int indent = 14 + strlen (exploc.file);
1145
1146 /* Thousands. */
1147 if (end_location > 999)
1148 write_digit_row (stream, indent, map, loc, max_col, 1000);
1149
1150 /* Hundreds. */
1151 if (end_location > 99)
1152 write_digit_row (stream, indent, map, loc, max_col, 100);
1153
1154 /* Tens. */
1155 write_digit_row (stream, indent, map, loc, max_col, 10);
1156
1157 /* Units. */
1158 write_digit_row (stream, indent, map, loc, max_col, 1);
1159 }
1160 }
1161 fprintf (stream, "\n");
1162 }
1163
1164 /* Visualize unallocated values. */
1165 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1166 line_table->highest_location,
1167 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1168
1169 /* Visualize the macro line_map instances, rendering the sources. */
1170 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1171 {
1172 /* Each macro map that is allocated owns source_location values
1173 that are *lower* that the one before them.
1174 Hence it's meaningful to view them either in order of ascending
1175 source locations, or in order of ascending macro map index. */
1176 const bool ascending_source_locations = true;
1177 unsigned int idx = (ascending_source_locations
1178 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1179 : i);
1180 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1181 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1182 idx,
1183 linemap_map_get_macro_name (map),
1184 MACRO_MAP_NUM_MACRO_TOKENS (map));
1185 dump_location_range (stream,
1186 map->start_location,
1187 (map->start_location
1188 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1189 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1190 "expansion point is location %i",
1191 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1192 fprintf (stream, " map->start_location: %u\n",
1193 map->start_location);
1194
1195 fprintf (stream, " macro_locations:\n");
1196 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1197 {
1198 source_location x = MACRO_MAP_LOCATIONS (map)[2 * i];
1199 source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1200
1201 /* linemap_add_macro_token encodes token numbers in an expansion
1202 by putting them after MAP_START_LOCATION. */
1203
1204 /* I'm typically seeing 4 uninitialized entries at the end of
1205 0xafafafaf.
1206 This appears to be due to macro.c:replace_args
1207 adding 2 extra args for padding tokens; presumably there may
1208 be a leading and/or trailing padding token injected,
1209 each for 2 more location slots.
1210 This would explain there being up to 4 source_locations slots
1211 that may be uninitialized. */
1212
1213 fprintf (stream, " %u: %u, %u\n",
1214 i,
1215 x,
1216 y);
1217 if (x == y)
1218 {
1219 if (x < MAP_START_LOCATION (map))
1220 inform (x, "token %u has x-location == y-location == %u", i, x);
1221 else
1222 fprintf (stream,
1223 "x-location == y-location == %u encodes token # %u\n",
1224 x, x - MAP_START_LOCATION (map));
1225 }
1226 else
1227 {
1228 inform (x, "token %u has x-location == %u", i, x);
1229 inform (x, "token %u has y-location == %u", i, y);
1230 }
1231 }
1232 fprintf (stream, "\n");
1233 }
1234
1235 /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a
1236 macro map, presumably due to an off-by-one error somewhere
1237 between the logic in linemap_enter_macro and
1238 LINEMAPS_MACRO_LOWEST_LOCATION. */
1239 dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION",
1240 MAX_SOURCE_LOCATION,
1241 MAX_SOURCE_LOCATION + 1);
1242
1243 /* Visualize ad-hoc values. */
1244 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1245 MAX_SOURCE_LOCATION + 1, UINT_MAX);
1246}
1247
1248/* string_concat's constructor. */
1249
1250string_concat::string_concat (int num, location_t *locs)
1251 : m_num (num)
1252{
1253 m_locs = ggc_vec_alloc <location_t> (num);
1254 for (int i = 0; i < num; i++)
1255 m_locs[i] = locs[i];
1256}
1257
1258/* string_concat_db's constructor. */
1259
1260string_concat_db::string_concat_db ()
1261{
1262 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1263}
1264
1265/* Record that a string concatenation occurred, covering NUM
1266 string literal tokens. LOCS is an array of size NUM, containing the
1267 locations of the tokens. A copy of LOCS is taken. */
1268
1269void
1270string_concat_db::record_string_concatenation (int num, location_t *locs)
1271{
1272 gcc_assert (num > 1);
1273 gcc_assert (locs);
1274
1275 location_t key_loc = get_key_loc (locs[0]);
1276
1277 string_concat *concat
1278 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1279 m_table->put (key_loc, concat);
1280}
1281
1282/* Determine if LOC was the location of the the initial token of a
1283 concatenation of string literal tokens.
1284 If so, *OUT_NUM is written to with the number of tokens, and
1285 *OUT_LOCS with the location of an array of locations of the
1286 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1287 storage owned by the string_concat_db.
1288 Otherwise, return false. */
1289
1290bool
1291string_concat_db::get_string_concatenation (location_t loc,
1292 int *out_num,
1293 location_t **out_locs)
1294{
1295 gcc_assert (out_num);
1296 gcc_assert (out_locs);
1297
1298 location_t key_loc = get_key_loc (loc);
1299
1300 string_concat **concat = m_table->get (key_loc);
1301 if (!concat)
1302 return false;
1303
1304 *out_num = (*concat)->m_num;
1305 *out_locs =(*concat)->m_locs;
1306 return true;
1307}
1308
1309/* Internal function. Canonicalize LOC into a form suitable for
1310 use as a key within the database, stripping away macro expansion,
1311 ad-hoc information, and range information, using the location of
1312 the start of LOC within an ordinary linemap. */
1313
1314location_t
1315string_concat_db::get_key_loc (location_t loc)
1316{
1317 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1318 NULL);
1319
1320 loc = get_range_from_loc (line_table, loc).m_start;
1321
1322 return loc;
1323}
1324
1325/* Helper class for use within get_substring_ranges_for_loc.
1326 An vec of cpp_string with responsibility for releasing all of the
1327 str->text for each str in the vector. */
1328
1329class auto_cpp_string_vec : public auto_vec <cpp_string>
1330{
1331 public:
1332 auto_cpp_string_vec (int alloc)
1333 : auto_vec <cpp_string> (alloc) {}
1334
1335 ~auto_cpp_string_vec ()
1336 {
1337 /* Clean up the copies within this vec. */
1338 int i;
1339 cpp_string *str;
1340 FOR_EACH_VEC_ELT (*this, i, str)
1341 free (const_cast <unsigned char *> (str->text));
1342 }
1343};
1344
1345/* Attempt to populate RANGES with source location information on the
1346 individual characters within the string literal found at STRLOC.
1347 If CONCATS is non-NULL, then any string literals that the token at
1348 STRLOC was concatenated with are also added to RANGES.
1349
1350 Return NULL if successful, or an error message if any errors occurred (in
1351 which case RANGES may be only partially populated and should not
1352 be used).
1353
1354 This is implemented by re-parsing the relevant source line(s). */
1355
1356static const char *
1357get_substring_ranges_for_loc (cpp_reader *pfile,
1358 string_concat_db *concats,
1359 location_t strloc,
1360 enum cpp_ttype type,
1361 cpp_substring_ranges &ranges)
1362{
1363 gcc_assert (pfile);
1364
1365 if (strloc == UNKNOWN_LOCATION)
1366 return "unknown location";
1367
1368 /* Reparsing the strings requires accurate location information.
1369 If -ftrack-macro-expansion has been overridden from its default
1370 of 2, then we might have a location of a macro expansion point,
1371 rather than the location of the literal itself.
1372 Avoid this by requiring that we have full macro expansion tracking
1373 for substring locations to be available. */
1374 if (cpp_get_options (pfile)->track_macro_expansion != 2)
1375 return "track_macro_expansion != 2";
1376
1377 /* If #line or # 44 "file"-style directives are present, then there's
1378 no guarantee that the line numbers we have can be used to locate
1379 the strings. For example, we might have a .i file with # directives
1380 pointing back to lines within a .c file, but the .c file might
1381 have been edited since the .i file was created.
1382 In such a case, the safest course is to disable on-demand substring
1383 locations. */
1384 if (line_table->seen_line_directive)
1385 return "seen line directive";
1386
1387 /* If string concatenation has occurred at STRLOC, get the locations
1388 of all of the literal tokens making up the compound string.
1389 Otherwise, just use STRLOC. */
1390 int num_locs = 1;
1391 location_t *strlocs = &strloc;
1392 if (concats)
1393 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1394
1395 auto_cpp_string_vec strs (num_locs);
1396 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1397 for (int i = 0; i < num_locs; i++)
1398 {
1399 /* Get range of strloc. We will use it to locate the start and finish
1400 of the literal token within the line. */
1401 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1402
1403 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1404 /* If the string is within a macro expansion, we can't get at the
1405 end location. */
1406 return "macro expansion";
1407
1408 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1409 /* If so, we can't reliably determine where the token started within
1410 its line. */
1411 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1412
1413 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1414 /* If so, we can't reliably determine where the token finished within
1415 its line. */
1416 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1417
1418 expanded_location start
1419 = expand_location_to_spelling_point (src_range.m_start);
1420 expanded_location finish
1421 = expand_location_to_spelling_point (src_range.m_finish);
1422 if (start.file != finish.file)
1423 return "range endpoints are in different files";
1424 if (start.line != finish.line)
1425 return "range endpoints are on different lines";
1426 if (start.column > finish.column)
1427 return "range endpoints are reversed";
1428
1429 int line_width;
1430 const char *line = location_get_source_line (start.file, start.line,
1431 &line_width);
1432 if (line == NULL)
1433 return "unable to read source line";
1434
1435 /* Determine the location of the literal (including quotes
1436 and leading prefix chars, such as the 'u' in a u""
1437 token). */
1438 const char *literal = line + start.column - 1;
1439 int literal_length = finish.column - start.column + 1;
1440
1441 /* Ensure that we don't crash if we got the wrong location. */
1442 if (line_width < (start.column - 1 + literal_length))
1443 return "line is not wide enough";
1444
1445 cpp_string from;
1446 from.len = literal_length;
1447 /* Make a copy of the literal, to avoid having to rely on
1448 the lifetime of the copy of the line within the cache.
1449 This will be released by the auto_cpp_string_vec dtor. */
1450 from.text = XDUPVEC (unsigned char, literal, literal_length);
1451 strs.safe_push (from);
1452
1453 /* For very long lines, a new linemap could have started
1454 halfway through the token.
1455 Ensure that the loc_reader uses the linemap of the
1456 *end* of the token for its start location. */
1457 const line_map_ordinary *final_ord_map;
1458 linemap_resolve_location (line_table, src_range.m_finish,
1459 LRK_MACRO_EXPANSION_POINT, &final_ord_map);
1460 location_t start_loc
1461 = linemap_position_for_line_and_column (line_table, final_ord_map,
1462 start.line, start.column);
1463
1464 cpp_string_location_reader loc_reader (start_loc, line_table);
1465 loc_readers.safe_push (loc_reader);
1466 }
1467
1468 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1469 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1470 loc_readers.address (),
1471 num_locs, &ranges, type);
1472 if (err)
1473 return err;
1474
1475 /* Success: "ranges" should now contain information on the string. */
1476 return NULL;
1477}
1478
1479/* Attempt to populate *OUT_LOC with source location information on the
1480 given characters within the string literal found at STRLOC.
1481 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1482 character set.
1483
1484 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1485 and string literal "012345\n789"
1486 *OUT_LOC is written to with:
1487 "012345\n789"
1488 ~^~~~~
1489
1490 If CONCATS is non-NULL, then any string literals that the token at
1491 STRLOC was concatenated with are also considered.
1492
1493 This is implemented by re-parsing the relevant source line(s).
1494
1495 Return NULL if successful, or an error message if any errors occurred.
1496 Error messages are intended for GCC developers (to help debugging) rather
1497 than for end-users. */
1498
1499const char *
1500get_source_location_for_substring (cpp_reader *pfile,
1501 string_concat_db *concats,
1502 location_t strloc,
1503 enum cpp_ttype type,
1504 int caret_idx, int start_idx, int end_idx,
1505 source_location *out_loc)
1506{
1507 gcc_checking_assert (caret_idx >= 0);
1508 gcc_checking_assert (start_idx >= 0);
1509 gcc_checking_assert (end_idx >= 0);
1510 gcc_assert (out_loc);
1511
1512 cpp_substring_ranges ranges;
1513 const char *err
1514 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1515 if (err)
1516 return err;
1517
1518 if (caret_idx >= ranges.get_num_ranges ())
1519 return "caret_idx out of range";
1520 if (start_idx >= ranges.get_num_ranges ())
1521 return "start_idx out of range";
1522 if (end_idx >= ranges.get_num_ranges ())
1523 return "end_idx out of range";
1524
1525 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1526 ranges.get_range (start_idx).m_start,
1527 ranges.get_range (end_idx).m_finish);
1528 return NULL;
1529}
1530
1531#if CHECKING_P
1532
1533namespace selftest {
1534
1535/* Selftests of location handling. */
1536
1537/* Attempt to populate *OUT_RANGE with source location information on the
1538 given character within the string literal found at STRLOC.
1539 CHAR_IDX refers to an offset within the execution character set.
1540 If CONCATS is non-NULL, then any string literals that the token at
1541 STRLOC was concatenated with are also considered.
1542
1543 This is implemented by re-parsing the relevant source line(s).
1544
1545 Return NULL if successful, or an error message if any errors occurred.
1546 Error messages are intended for GCC developers (to help debugging) rather
1547 than for end-users. */
1548
1549static const char *
1550get_source_range_for_char (cpp_reader *pfile,
1551 string_concat_db *concats,
1552 location_t strloc,
1553 enum cpp_ttype type,
1554 int char_idx,
1555 source_range *out_range)
1556{
1557 gcc_checking_assert (char_idx >= 0);
1558 gcc_assert (out_range);
1559
1560 cpp_substring_ranges ranges;
1561 const char *err
1562 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1563 if (err)
1564 return err;
1565
1566 if (char_idx >= ranges.get_num_ranges ())
1567 return "char_idx out of range";
1568
1569 *out_range = ranges.get_range (char_idx);
1570 return NULL;
1571}
1572
1573/* As get_source_range_for_char, but write to *OUT the number
1574 of ranges that are available. */
1575
1576static const char *
1577get_num_source_ranges_for_substring (cpp_reader *pfile,
1578 string_concat_db *concats,
1579 location_t strloc,
1580 enum cpp_ttype type,
1581 int *out)
1582{
1583 gcc_assert (out);
1584
1585 cpp_substring_ranges ranges;
1586 const char *err
1587 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1588
1589 if (err)
1590 return err;
1591
1592 *out = ranges.get_num_ranges ();
1593 return NULL;
1594}
1595
1596/* Selftests of location handling. */
1597
1598/* Helper function for verifying location data: when location_t
1599 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1600 as having column 0. */
1601
1602static bool
1603should_have_column_data_p (location_t loc)
1604{
1605 if (IS_ADHOC_LOC (loc))
1606 loc = get_location_from_adhoc_loc (line_table, loc);
1607 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1608 return false;
1609 return true;
1610}
1611
1612/* Selftest for should_have_column_data_p. */
1613
1614static void
1615test_should_have_column_data_p ()
1616{
1617 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1618 ASSERT_TRUE
1619 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1620 ASSERT_FALSE
1621 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1622}
1623
1624/* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1625 on LOC. */
1626
1627static void
1628assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1629 location_t loc)
1630{
1631 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1632 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1633 /* If location_t values are sufficiently high, then column numbers
1634 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1635 When close to the threshold, column numbers *may* be present: if
1636 the final linemap before the threshold contains a line that straddles
1637 the threshold, locations in that line have column information. */
1638 if (should_have_column_data_p (loc))
1639 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1640}
1641
1642/* Various selftests involve constructing a line table and one or more
1643 line maps within it.
1644
1645 For maximum test coverage we want to run these tests with a variety
1646 of situations:
1647 - line_table->default_range_bits: some frontends use a non-zero value
1648 and others use zero
1649 - the fallback modes within line-map.c: there are various threshold
1650 values for source_location/location_t beyond line-map.c changes
1651 behavior (disabling of the range-packing optimization, disabling
1652 of column-tracking). We can exercise these by starting the line_table
1653 at interesting values at or near these thresholds.
1654
1655 The following struct describes a particular case within our test
1656 matrix. */
1657
1658struct line_table_case
1659{
1660 line_table_case (int default_range_bits, int base_location)
1661 : m_default_range_bits (default_range_bits),
1662 m_base_location (base_location)
1663 {}
1664
1665 int m_default_range_bits;
1666 int m_base_location;
1667};
1668
1669/* Constructor. Store the old value of line_table, and create a new
1670 one, using sane defaults. */
1671
1672line_table_test::line_table_test ()
1673{
1674 gcc_assert (saved_line_table == NULL);
1675 saved_line_table = line_table;
1676 line_table = ggc_alloc<line_maps> ();
1677 linemap_init (line_table, BUILTINS_LOCATION);
1678 gcc_assert (saved_line_table->reallocator);
1679 line_table->reallocator = saved_line_table->reallocator;
1680 gcc_assert (saved_line_table->round_alloc_size);
1681 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1682 line_table->default_range_bits = 0;
1683}
1684
1685/* Constructor. Store the old value of line_table, and create a new
1686 one, using the sitation described in CASE_. */
1687
1688line_table_test::line_table_test (const line_table_case &case_)
1689{
1690 gcc_assert (saved_line_table == NULL);
1691 saved_line_table = line_table;
1692 line_table = ggc_alloc<line_maps> ();
1693 linemap_init (line_table, BUILTINS_LOCATION);
1694 gcc_assert (saved_line_table->reallocator);
1695 line_table->reallocator = saved_line_table->reallocator;
1696 gcc_assert (saved_line_table->round_alloc_size);
1697 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1698 line_table->default_range_bits = case_.m_default_range_bits;
1699 if (case_.m_base_location)
1700 {
1701 line_table->highest_location = case_.m_base_location;
1702 line_table->highest_line = case_.m_base_location;
1703 }
1704}
1705
1706/* Destructor. Restore the old value of line_table. */
1707
1708line_table_test::~line_table_test ()
1709{
1710 gcc_assert (saved_line_table != NULL);
1711 line_table = saved_line_table;
1712 saved_line_table = NULL;
1713}
1714
1715/* Verify basic operation of ordinary linemaps. */
1716
1717static void
1718test_accessing_ordinary_linemaps (const line_table_case &case_)
1719{
1720 line_table_test ltt (case_);
1721
1722 /* Build a simple linemap describing some locations. */
1723 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1724
1725 linemap_line_start (line_table, 1, 100);
1726 location_t loc_a = linemap_position_for_column (line_table, 1);
1727 location_t loc_b = linemap_position_for_column (line_table, 23);
1728
1729 linemap_line_start (line_table, 2, 100);
1730 location_t loc_c = linemap_position_for_column (line_table, 1);
1731 location_t loc_d = linemap_position_for_column (line_table, 17);
1732
1733 /* Example of a very long line. */
1734 linemap_line_start (line_table, 3, 2000);
1735 location_t loc_e = linemap_position_for_column (line_table, 700);
1736
1737 /* Transitioning back to a short line. */
1738 linemap_line_start (line_table, 4, 0);
1739 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1740
1741 if (should_have_column_data_p (loc_back_to_short))
1742 {
1743 /* Verify that we switched to short lines in the linemap. */
1744 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1745 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1746 }
1747
1748 /* Example of a line that will eventually be seen to be longer
1749 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1750 below that. */
1751 linemap_line_start (line_table, 5, 2000);
1752
1753 location_t loc_start_of_very_long_line
1754 = linemap_position_for_column (line_table, 2000);
1755 location_t loc_too_wide
1756 = linemap_position_for_column (line_table, 4097);
1757 location_t loc_too_wide_2
1758 = linemap_position_for_column (line_table, 4098);
1759
1760 /* ...and back to a sane line length. */
1761 linemap_line_start (line_table, 6, 100);
1762 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1763
1764 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1765
1766 /* Multiple files. */
1767 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1768 linemap_line_start (line_table, 1, 200);
1769 location_t loc_f = linemap_position_for_column (line_table, 150);
1770 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1771
1772 /* Verify that we can recover the location info. */
1773 assert_loceq ("foo.c", 1, 1, loc_a);
1774 assert_loceq ("foo.c", 1, 23, loc_b);
1775 assert_loceq ("foo.c", 2, 1, loc_c);
1776 assert_loceq ("foo.c", 2, 17, loc_d);
1777 assert_loceq ("foo.c", 3, 700, loc_e);
1778 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1779
1780 /* In the very wide line, the initial location should be fully tracked. */
1781 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1782 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1783 be disabled. */
1784 assert_loceq ("foo.c", 5, 0, loc_too_wide);
1785 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1786 /*...and column-tracking should be re-enabled for subsequent lines. */
1787 assert_loceq ("foo.c", 6, 10, loc_sane_again);
1788
1789 assert_loceq ("bar.c", 1, 150, loc_f);
1790
1791 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1792 ASSERT_TRUE (pure_location_p (line_table, loc_a));
1793
1794 /* Verify using make_location to build a range, and extracting data
1795 back from it. */
1796 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1797 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1798 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1799 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1800 ASSERT_EQ (loc_b, src_range.m_start);
1801 ASSERT_EQ (loc_d, src_range.m_finish);
1802}
1803
1804/* Verify various properties of UNKNOWN_LOCATION. */
1805
1806static void
1807test_unknown_location ()
1808{
1809 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1810 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1811 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1812}
1813
1814/* Verify various properties of BUILTINS_LOCATION. */
1815
1816static void
1817test_builtins ()
1818{
1819 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1820 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1821}
1822
1823/* Regression test for make_location.
1824 Ensure that we use pure locations for the start/finish of the range,
1825 rather than storing a packed or ad-hoc range as the start/finish. */
1826
1827static void
1828test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1829{
1830 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1831 with C++ frontend.
1832 ....................0000000001111111111222.
1833 ....................1234567890123456789012. */
1834 const char *content = " r += !aaa == bbb;\n";
1835 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1836 line_table_test ltt (case_);
1837 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1838
1839 const location_t c11 = linemap_position_for_column (line_table, 11);
1840 const location_t c12 = linemap_position_for_column (line_table, 12);
1841 const location_t c13 = linemap_position_for_column (line_table, 13);
1842 const location_t c14 = linemap_position_for_column (line_table, 14);
1843 const location_t c21 = linemap_position_for_column (line_table, 21);
1844
1845 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1846 return;
1847
1848 /* Use column 13 for the caret location, arbitrarily, to verify that we
1849 handle start != caret. */
1850 const location_t aaa = make_location (c13, c12, c14);
1851 ASSERT_EQ (c13, get_pure_location (aaa));
1852 ASSERT_EQ (c12, get_start (aaa));
1853 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1854 ASSERT_EQ (c14, get_finish (aaa));
1855 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1856
1857 /* Make a location using a location with a range as the start-point. */
1858 const location_t not_aaa = make_location (c11, aaa, c14);
1859 ASSERT_EQ (c11, get_pure_location (not_aaa));
1860 /* It should use the start location of the range, not store the range
1861 itself. */
1862 ASSERT_EQ (c12, get_start (not_aaa));
1863 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1864 ASSERT_EQ (c14, get_finish (not_aaa));
1865 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1866
1867 /* Similarly, make a location with a range as the end-point. */
1868 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1869 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1870 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1871 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1872 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1873 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1874 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1875 /* It should use the finish location of the range, not store the range
1876 itself. */
1877 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1878 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1879 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1880 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1881 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1882}
1883
1884/* Verify reading of input files (e.g. for caret-based diagnostics). */
1885
1886static void
1887test_reading_source_line ()
1888{
1889 /* Create a tempfile and write some text to it. */
1890 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1891 "01234567890123456789\n"
1892 "This is the test text\n"
1893 "This is the 3rd line");
1894
1895 /* Read back a specific line from the tempfile. */
1896 int line_size;
1897 const char *source_line = location_get_source_line (tmp.get_filename (),
1898 3, &line_size);
1899 ASSERT_TRUE (source_line != NULL);
1900 ASSERT_EQ (20, line_size);
1901 ASSERT_TRUE (!strncmp ("This is the 3rd line",
1902 source_line, line_size));
1903
1904 source_line = location_get_source_line (tmp.get_filename (),
1905 2, &line_size);
1906 ASSERT_TRUE (source_line != NULL);
1907 ASSERT_EQ (21, line_size);
1908 ASSERT_TRUE (!strncmp ("This is the test text",
1909 source_line, line_size));
1910
1911 source_line = location_get_source_line (tmp.get_filename (),
1912 4, &line_size);
1913 ASSERT_TRUE (source_line == NULL);
1914}
1915
1916/* Tests of lexing. */
1917
1918/* Verify that token TOK from PARSER has cpp_token_as_text
1919 equal to EXPECTED_TEXT. */
1920
1921#define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
1922 SELFTEST_BEGIN_STMT \
1923 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
1924 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
1925 SELFTEST_END_STMT
1926
1927/* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1928 and ranges from EXP_START_COL to EXP_FINISH_COL.
1929 Use LOC as the effective location of the selftest. */
1930
1931static void
1932assert_token_loc_eq (const location &loc,
1933 const cpp_token *tok,
1934 const char *exp_filename, int exp_linenum,
1935 int exp_start_col, int exp_finish_col)
1936{
1937 location_t tok_loc = tok->src_loc;
1938 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1939 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1940
1941 /* If location_t values are sufficiently high, then column numbers
1942 will be unavailable. */
1943 if (!should_have_column_data_p (tok_loc))
1944 return;
1945
1946 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1947 source_range tok_range = get_range_from_loc (line_table, tok_loc);
1948 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1949 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1950}
1951
1952/* Use assert_token_loc_eq to verify the TOK->src_loc, using
1953 SELFTEST_LOCATION as the effective location of the selftest. */
1954
1955#define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1956 EXP_START_COL, EXP_FINISH_COL) \
1957 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1958 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1959
1960/* Test of lexing a file using libcpp, verifying tokens and their
1961 location information. */
1962
1963static void
1964test_lexer (const line_table_case &case_)
1965{
1966 /* Create a tempfile and write some text to it. */
1967 const char *content =
1968 /*00000000011111111112222222222333333.3333444444444.455555555556
1969 12345678901234567890123456789012345.6789012345678.901234567890. */
1970 ("test_name /* c-style comment */\n"
1971 " \"test literal\"\n"
1972 " // test c++-style comment\n"
1973 " 42\n");
1974 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
1975
1976 line_table_test ltt (case_);
1977
1978 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
1979
1980 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
1981 ASSERT_NE (fname, NULL);
1982
1983 /* Verify that we get the expected tokens back, with the correct
1984 location information. */
1985
1986 location_t loc;
1987 const cpp_token *tok;
1988 tok = cpp_get_token_with_location (parser, &loc);
1989 ASSERT_NE (tok, NULL);
1990 ASSERT_EQ (tok->type, CPP_NAME);
1991 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
1992 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
1993
1994 tok = cpp_get_token_with_location (parser, &loc);
1995 ASSERT_NE (tok, NULL);
1996 ASSERT_EQ (tok->type, CPP_STRING);
1997 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
1998 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
1999
2000 tok = cpp_get_token_with_location (parser, &loc);
2001 ASSERT_NE (tok, NULL);
2002 ASSERT_EQ (tok->type, CPP_NUMBER);
2003 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2004 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2005
2006 tok = cpp_get_token_with_location (parser, &loc);
2007 ASSERT_NE (tok, NULL);
2008 ASSERT_EQ (tok->type, CPP_EOF);
2009
2010 cpp_finish (parser, NULL);
2011 cpp_destroy (parser);
2012}
2013
2014/* Forward decls. */
2015
2016struct lexer_test;
2017class lexer_test_options;
2018
2019/* A class for specifying options of a lexer_test.
2020 The "apply" vfunc is called during the lexer_test constructor. */
2021
2022class lexer_test_options
2023{
2024 public:
2025 virtual void apply (lexer_test &) = 0;
2026};
2027
2028/* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2029 in its dtor.
2030
2031 This is needed by struct lexer_test to ensure that the cleanup of the
2032 cpp_reader happens *after* the cleanup of the temp_source_file. */
2033
2034class cpp_reader_ptr
2035{
2036 public:
2037 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2038
2039 ~cpp_reader_ptr ()
2040 {
2041 cpp_finish (m_ptr, NULL);
2042 cpp_destroy (m_ptr);
2043 }
2044
2045 operator cpp_reader * () const { return m_ptr; }
2046
2047 private:
2048 cpp_reader *m_ptr;
2049};
2050
2051/* A struct for writing lexer tests. */
2052
2053struct lexer_test
2054{
2055 lexer_test (const line_table_case &case_, const char *content,
2056 lexer_test_options *options);
2057 ~lexer_test ();
2058
2059 const cpp_token *get_token ();
2060
2061 /* The ordering of these fields matters.
2062 The line_table_test must be first, since the cpp_reader_ptr
2063 uses it.
2064 The cpp_reader must be cleaned up *after* the temp_source_file
2065 since the filenames in input.c's input cache are owned by the
2066 cpp_reader; in particular, when ~temp_source_file evicts the
2067 filename the filenames must still be alive. */
2068 line_table_test m_ltt;
2069 cpp_reader_ptr m_parser;
2070 temp_source_file m_tempfile;
2071 string_concat_db m_concats;
2072 bool m_implicitly_expect_EOF;
2073};
2074
2075/* Use an EBCDIC encoding for the execution charset, specifically
2076 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2077
2078 This exercises iconv integration within libcpp.
2079 Not every build of iconv supports the given charset,
2080 so we need to flag this error and handle it gracefully. */
2081
2082class ebcdic_execution_charset : public lexer_test_options
2083{
2084 public:
2085 ebcdic_execution_charset () : m_num_iconv_errors (0)
2086 {
2087 gcc_assert (s_singleton == NULL);
2088 s_singleton = this;
2089 }
2090 ~ebcdic_execution_charset ()
2091 {
2092 gcc_assert (s_singleton == this);
2093 s_singleton = NULL;
2094 }
2095
2096 void apply (lexer_test &test) FINAL OVERRIDE
2097 {
2098 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2099 cpp_opts->narrow_charset = "IBM1047";
2100
2101 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2102 callbacks->error = on_error;
2103 }
2104
2105 static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
2106 int level ATTRIBUTE_UNUSED,
2107 int reason ATTRIBUTE_UNUSED,
2108 rich_location *richloc ATTRIBUTE_UNUSED,
2109 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2110 ATTRIBUTE_FPTR_PRINTF(5,0)
2111 {
2112 gcc_assert (s_singleton);
2113 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2114 const char *msg = "conversion from %s to %s not supported by iconv";
2115#ifdef ENABLE_NLS
2116 msg = dgettext ("cpplib", msg);
2117#endif
2118 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2119 when the local iconv build doesn't support the conversion. */
2120 if (strcmp (msgid, msg) == 0)
2121 {
2122 s_singleton->m_num_iconv_errors++;
2123 return true;
2124 }
2125
2126 /* Otherwise, we have an unexpected error. */
2127 abort ();
2128 }
2129
2130 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2131
2132 private:
2133 static ebcdic_execution_charset *s_singleton;
2134 int m_num_iconv_errors;
2135};
2136
2137ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2138
2139/* A lexer_test_options subclass that records a list of error
2140 messages emitted by the lexer. */
2141
2142class lexer_error_sink : public lexer_test_options
2143{
2144 public:
2145 lexer_error_sink ()
2146 {
2147 gcc_assert (s_singleton == NULL);
2148 s_singleton = this;
2149 }
2150 ~lexer_error_sink ()
2151 {
2152 gcc_assert (s_singleton == this);
2153 s_singleton = NULL;
2154
2155 int i;
2156 char *str;
2157 FOR_EACH_VEC_ELT (m_errors, i, str)
2158 free (str);
2159 }
2160
2161 void apply (lexer_test &test) FINAL OVERRIDE
2162 {
2163 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2164 callbacks->error = on_error;
2165 }
2166
2167 static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
2168 int level ATTRIBUTE_UNUSED,
2169 int reason ATTRIBUTE_UNUSED,
2170 rich_location *richloc ATTRIBUTE_UNUSED,
2171 const char *msgid, va_list *ap)
2172 ATTRIBUTE_FPTR_PRINTF(5,0)
2173 {
2174 char *msg = xvasprintf (msgid, *ap);
2175 s_singleton->m_errors.safe_push (msg);
2176 return true;
2177 }
2178
2179 auto_vec<char *> m_errors;
2180
2181 private:
2182 static lexer_error_sink *s_singleton;
2183};
2184
2185lexer_error_sink *lexer_error_sink::s_singleton;
2186
2187/* Constructor. Override line_table with a new instance based on CASE_,
2188 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2189 start parsing the tempfile. */
2190
2191lexer_test::lexer_test (const line_table_case &case_, const char *content,
2192 lexer_test_options *options)
2193: m_ltt (case_),
2194 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2195 /* Create a tempfile and write the text to it. */
2196 m_tempfile (SELFTEST_LOCATION, ".c", content),
2197 m_concats (),
2198 m_implicitly_expect_EOF (true)
2199{
2200 if (options)
2201 options->apply (*this);
2202
2203 cpp_init_iconv (m_parser);
2204
2205 /* Parse the file. */
2206 const char *fname = cpp_read_main_file (m_parser,
2207 m_tempfile.get_filename ());
2208 ASSERT_NE (fname, NULL);
2209}
2210
2211/* Destructor. By default, verify that the next token in m_parser is EOF. */
2212
2213lexer_test::~lexer_test ()
2214{
2215 location_t loc;
2216 const cpp_token *tok;
2217
2218 if (m_implicitly_expect_EOF)
2219 {
2220 tok = cpp_get_token_with_location (m_parser, &loc);
2221 ASSERT_NE (tok, NULL);
2222 ASSERT_EQ (tok->type, CPP_EOF);
2223 }
2224}
2225
2226/* Get the next token from m_parser. */
2227
2228const cpp_token *
2229lexer_test::get_token ()
2230{
2231 location_t loc;
2232 const cpp_token *tok;
2233
2234 tok = cpp_get_token_with_location (m_parser, &loc);
2235 ASSERT_NE (tok, NULL);
2236 return tok;
2237}
2238
2239/* Verify that locations within string literals are correctly handled. */
2240
2241/* Verify get_source_range_for_substring for token(s) at STRLOC,
2242 using the string concatenation database for TEST.
2243
2244 Assert that the character at index IDX is on EXPECTED_LINE,
2245 and that it begins at column EXPECTED_START_COL and ends at
2246 EXPECTED_FINISH_COL (unless the locations are beyond
2247 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2248 columns). */
2249
2250static void
2251assert_char_at_range (const location &loc,
2252 lexer_test& test,
2253 location_t strloc, enum cpp_ttype type, int idx,
2254 int expected_line, int expected_start_col,
2255 int expected_finish_col)
2256{
2257 cpp_reader *pfile = test.m_parser;
2258 string_concat_db *concats = &test.m_concats;
2259
2260 source_range actual_range = source_range();
2261 const char *err
2262 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2263 &actual_range);
2264 if (should_have_column_data_p (strloc))
2265 ASSERT_EQ_AT (loc, NULL, err);
2266 else
2267 {
2268 ASSERT_STREQ_AT (loc,
2269 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2270 err);
2271 return;
2272 }
2273
2274 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2275 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2276 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2277 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2278
2279 if (should_have_column_data_p (actual_range.m_start))
2280 {
2281 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2282 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2283 }
2284 if (should_have_column_data_p (actual_range.m_finish))
2285 {
2286 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2287 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2288 }
2289}
2290
2291/* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2292 the effective location of any errors. */
2293
2294#define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2295 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2296 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2297 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2298 (EXPECTED_FINISH_COL))
2299
2300/* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2301 using the string concatenation database for TEST.
2302
2303 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2304
2305static void
2306assert_num_substring_ranges (const location &loc,
2307 lexer_test& test,
2308 location_t strloc,
2309 enum cpp_ttype type,
2310 int expected_num_ranges)
2311{
2312 cpp_reader *pfile = test.m_parser;
2313 string_concat_db *concats = &test.m_concats;
2314
2315 int actual_num_ranges = -1;
2316 const char *err
2317 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2318 &actual_num_ranges);
2319 if (should_have_column_data_p (strloc))
2320 ASSERT_EQ_AT (loc, NULL, err);
2321 else
2322 {
2323 ASSERT_STREQ_AT (loc,
2324 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2325 err);
2326 return;
2327 }
2328 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2329}
2330
2331/* Macro for calling assert_num_substring_ranges, supplying
2332 SELFTEST_LOCATION for the effective location of any errors. */
2333
2334#define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2335 EXPECTED_NUM_RANGES) \
2336 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2337 (TYPE), (EXPECTED_NUM_RANGES))
2338
2339
2340/* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2341 returns an error (using the string concatenation database for TEST). */
2342
2343static void
2344assert_has_no_substring_ranges (const location &loc,
2345 lexer_test& test,
2346 location_t strloc,
2347 enum cpp_ttype type,
2348 const char *expected_err)
2349{
2350 cpp_reader *pfile = test.m_parser;
2351 string_concat_db *concats = &test.m_concats;
2352 cpp_substring_ranges ranges;
2353 const char *actual_err
2354 = get_substring_ranges_for_loc (pfile, concats, strloc,
2355 type, ranges);
2356 if (should_have_column_data_p (strloc))
2357 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2358 else
2359 ASSERT_STREQ_AT (loc,
2360 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2361 actual_err);
2362}
2363
2364#define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2365 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2366 (STRLOC), (TYPE), (ERR))
2367
2368/* Lex a simple string literal. Verify the substring location data, before
2369 and after running cpp_interpret_string on it. */
2370
2371static void
2372test_lexer_string_locations_simple (const line_table_case &case_)
2373{
2374 /* Digits 0-9 (with 0 at column 10), the simple way.
2375 ....................000000000.11111111112.2222222223333333333
2376 ....................123456789.01234567890.1234567890123456789
2377 We add a trailing comment to ensure that we correctly locate
2378 the end of the string literal token. */
2379 const char *content = " \"0123456789\" /* not a string */\n";
2380 lexer_test test (case_, content, NULL);
2381
2382 /* Verify that we get the expected token back, with the correct
2383 location information. */
2384 const cpp_token *tok = test.get_token ();
2385 ASSERT_EQ (tok->type, CPP_STRING);
2386 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2387 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2388
2389 /* At this point in lexing, the quote characters are treated as part of
2390 the string (they are stripped off by cpp_interpret_string). */
2391
2392 ASSERT_EQ (tok->val.str.len, 12);
2393
2394 /* Verify that cpp_interpret_string works. */
2395 cpp_string dst_string;
2396 const enum cpp_ttype type = CPP_STRING;
2397 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2398 &dst_string, type);
2399 ASSERT_TRUE (result);
2400 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2401 free (const_cast <unsigned char *> (dst_string.text));
2402
2403 /* Verify ranges of individual characters. This no longer includes the
2404 opening quote, but does include the closing quote. */
2405 for (int i = 0; i <= 10; i++)
2406 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2407 10 + i, 10 + i);
2408
2409 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2410}
2411
2412/* As test_lexer_string_locations_simple, but use an EBCDIC execution
2413 encoding. */
2414
2415static void
2416test_lexer_string_locations_ebcdic (const line_table_case &case_)
2417{
2418 /* EBCDIC support requires iconv. */
2419 if (!HAVE_ICONV)
2420 return;
2421
2422 /* Digits 0-9 (with 0 at column 10), the simple way.
2423 ....................000000000.11111111112.2222222223333333333
2424 ....................123456789.01234567890.1234567890123456789
2425 We add a trailing comment to ensure that we correctly locate
2426 the end of the string literal token. */
2427 const char *content = " \"0123456789\" /* not a string */\n";
2428 ebcdic_execution_charset use_ebcdic;
2429 lexer_test test (case_, content, &use_ebcdic);
2430
2431 /* Verify that we get the expected token back, with the correct
2432 location information. */
2433 const cpp_token *tok = test.get_token ();
2434 ASSERT_EQ (tok->type, CPP_STRING);
2435 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2436 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2437
2438 /* At this point in lexing, the quote characters are treated as part of
2439 the string (they are stripped off by cpp_interpret_string). */
2440
2441 ASSERT_EQ (tok->val.str.len, 12);
2442
2443 /* The remainder of the test requires an iconv implementation that
2444 can convert from UTF-8 to the EBCDIC encoding requested above. */
2445 if (use_ebcdic.iconv_errors_occurred_p ())
2446 return;
2447
2448 /* Verify that cpp_interpret_string works. */
2449 cpp_string dst_string;
2450 const enum cpp_ttype type = CPP_STRING;
2451 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2452 &dst_string, type);
2453 ASSERT_TRUE (result);
2454 /* We should now have EBCDIC-encoded text, specifically
2455 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2456 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2457 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2458 (const char *)dst_string.text);
2459 free (const_cast <unsigned char *> (dst_string.text));
2460
2461 /* Verify that we don't attempt to record substring location information
2462 for such cases. */
2463 ASSERT_HAS_NO_SUBSTRING_RANGES
2464 (test, tok->src_loc, type,
2465 "execution character set != source character set");
2466}
2467
2468/* Lex a string literal containing a hex-escaped character.
2469 Verify the substring location data, before and after running
2470 cpp_interpret_string on it. */
2471
2472static void
2473test_lexer_string_locations_hex (const line_table_case &case_)
2474{
2475 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2476 and with a space in place of digit 6, to terminate the escaped
2477 hex code.
2478 ....................000000000.111111.11112222.
2479 ....................123456789.012345.67890123. */
2480 const char *content = " \"01234\\x35 789\"\n";
2481 lexer_test test (case_, content, NULL);
2482
2483 /* Verify that we get the expected token back, with the correct
2484 location information. */
2485 const cpp_token *tok = test.get_token ();
2486 ASSERT_EQ (tok->type, CPP_STRING);
2487 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2488 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2489
2490 /* At this point in lexing, the quote characters are treated as part of
2491 the string (they are stripped off by cpp_interpret_string). */
2492 ASSERT_EQ (tok->val.str.len, 15);
2493
2494 /* Verify that cpp_interpret_string works. */
2495 cpp_string dst_string;
2496 const enum cpp_ttype type = CPP_STRING;
2497 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2498 &dst_string, type);
2499 ASSERT_TRUE (result);
2500 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2501 free (const_cast <unsigned char *> (dst_string.text));
2502
2503 /* Verify ranges of individual characters. This no longer includes the
2504 opening quote, but does include the closing quote. */
2505 for (int i = 0; i <= 4; i++)
2506 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2507 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2508 for (int i = 6; i <= 10; i++)
2509 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2510
2511 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2512}
2513
2514/* Lex a string literal containing an octal-escaped character.
2515 Verify the substring location data after running cpp_interpret_string
2516 on it. */
2517
2518static void
2519test_lexer_string_locations_oct (const line_table_case &case_)
2520{
2521 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2522 and with a space in place of digit 6, to terminate the escaped
2523 octal code.
2524 ....................000000000.111111.11112222.2222223333333333444
2525 ....................123456789.012345.67890123.4567890123456789012 */
2526 const char *content = " \"01234\\065 789\" /* not a string */\n";
2527 lexer_test test (case_, content, NULL);
2528
2529 /* Verify that we get the expected token back, with the correct
2530 location information. */
2531 const cpp_token *tok = test.get_token ();
2532 ASSERT_EQ (tok->type, CPP_STRING);
2533 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2534
2535 /* Verify that cpp_interpret_string works. */
2536 cpp_string dst_string;
2537 const enum cpp_ttype type = CPP_STRING;
2538 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2539 &dst_string, type);
2540 ASSERT_TRUE (result);
2541 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2542 free (const_cast <unsigned char *> (dst_string.text));
2543
2544 /* Verify ranges of individual characters. This no longer includes the
2545 opening quote, but does include the closing quote. */
2546 for (int i = 0; i < 5; i++)
2547 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2548 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2549 for (int i = 6; i <= 10; i++)
2550 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2551
2552 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2553}
2554
2555/* Test of string literal containing letter escapes. */
2556
2557static void
2558test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2559{
2560 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2561 .....................000000000.1.11111.1.1.11222.22222223333333
2562 .....................123456789.0.12345.6.7.89012.34567890123456. */
2563 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2564 lexer_test test (case_, content, NULL);
2565
2566 /* Verify that we get the expected tokens back. */
2567 const cpp_token *tok = test.get_token ();
2568 ASSERT_EQ (tok->type, CPP_STRING);
2569 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2570
2571 /* Verify ranges of individual characters. */
2572 /* "\t". */
2573 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2574 0, 1, 10, 11);
2575 /* "foo". */
2576 for (int i = 1; i <= 3; i++)
2577 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2578 i, 1, 11 + i, 11 + i);
2579 /* "\\" and "\n". */
2580 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2581 4, 1, 15, 16);
2582 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2583 5, 1, 17, 18);
2584
2585 /* "bar" and closing quote for nul-terminator. */
2586 for (int i = 6; i <= 9; i++)
2587 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2588 i, 1, 13 + i, 13 + i);
2589
2590 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2591}
2592
2593/* Another test of a string literal containing a letter escape.
2594 Based on string seen in
2595 printf ("%-%\n");
2596 in gcc.dg/format/c90-printf-1.c. */
2597
2598static void
2599test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2600{
2601 /* .....................000000000.1111.11.1111.22222222223.
2602 .....................123456789.0123.45.6789.01234567890. */
2603 const char *content = (" \"%-%\\n\" /* non-str */\n");
2604 lexer_test test (case_, content, NULL);
2605
2606 /* Verify that we get the expected tokens back. */
2607 const cpp_token *tok = test.get_token ();
2608 ASSERT_EQ (tok->type, CPP_STRING);
2609 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2610
2611 /* Verify ranges of individual characters. */
2612 /* "%-%". */
2613 for (int i = 0; i < 3; i++)
2614 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2615 i, 1, 10 + i, 10 + i);
2616 /* "\n". */
2617 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2618 3, 1, 13, 14);
2619
2620 /* Closing quote for nul-terminator. */
2621 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2622 4, 1, 15, 15);
2623
2624 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2625}
2626
2627/* Lex a string literal containing UCN 4 characters.
2628 Verify the substring location data after running cpp_interpret_string
2629 on it. */
2630
2631static void
2632test_lexer_string_locations_ucn4 (const line_table_case &case_)
2633{
2634 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2635 as UCN 4.
2636 ....................000000000.111111.111122.222222223.33333333344444
2637 ....................123456789.012345.678901.234567890.12345678901234 */
2638 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2639 lexer_test test (case_, content, NULL);
2640
2641 /* Verify that we get the expected token back, with the correct
2642 location information. */
2643 const cpp_token *tok = test.get_token ();
2644 ASSERT_EQ (tok->type, CPP_STRING);
2645 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2646
2647 /* Verify that cpp_interpret_string works.
2648 The string should be encoded in the execution character
2649 set. Assuming that that is UTF-8, we should have the following:
2650 ----------- ---- ----- ------- ----------------
2651 Byte offset Byte Octal Unicode Source Column(s)
2652 ----------- ---- ----- ------- ----------------
2653 0 0x30 '0' 10
2654 1 0x31 '1' 11
2655 2 0x32 '2' 12
2656 3 0x33 '3' 13
2657 4 0x34 '4' 14
2658 5 0xE2 \342 U+2174 15-20
2659 6 0x85 \205 (cont) 15-20
2660 7 0xB4 \264 (cont) 15-20
2661 8 0xE2 \342 U+2175 21-26
2662 9 0x85 \205 (cont) 21-26
2663 10 0xB5 \265 (cont) 21-26
2664 11 0x37 '7' 27
2665 12 0x38 '8' 28
2666 13 0x39 '9' 29
2667 14 0x00 30 (closing quote)
2668 ----------- ---- ----- ------- ---------------. */
2669
2670 cpp_string dst_string;
2671 const enum cpp_ttype type = CPP_STRING;
2672 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2673 &dst_string, type);
2674 ASSERT_TRUE (result);
2675 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2676 (const char *)dst_string.text);
2677 free (const_cast <unsigned char *> (dst_string.text));
2678
2679 /* Verify ranges of individual characters. This no longer includes the
2680 opening quote, but does include the closing quote.
2681 '01234'. */
2682 for (int i = 0; i <= 4; i++)
2683 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2684 /* U+2174. */
2685 for (int i = 5; i <= 7; i++)
2686 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2687 /* U+2175. */
2688 for (int i = 8; i <= 10; i++)
2689 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2690 /* '789' and nul terminator */
2691 for (int i = 11; i <= 14; i++)
2692 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2693
2694 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2695}
2696
2697/* Lex a string literal containing UCN 8 characters.
2698 Verify the substring location data after running cpp_interpret_string
2699 on it. */
2700
2701static void
2702test_lexer_string_locations_ucn8 (const line_table_case &case_)
2703{
2704 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2705 ....................000000000.111111.1111222222.2222333333333.344444
2706 ....................123456789.012345.6789012345.6789012345678.901234 */
2707 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2708 lexer_test test (case_, content, NULL);
2709
2710 /* Verify that we get the expected token back, with the correct
2711 location information. */
2712 const cpp_token *tok = test.get_token ();
2713 ASSERT_EQ (tok->type, CPP_STRING);
2714 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2715 "\"01234\\U00002174\\U00002175789\"");
2716
2717 /* Verify that cpp_interpret_string works.
2718 The UTF-8 encoding of the string is identical to that from
2719 the ucn4 testcase above; the only difference is the column
2720 locations. */
2721 cpp_string dst_string;
2722 const enum cpp_ttype type = CPP_STRING;
2723 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2724 &dst_string, type);
2725 ASSERT_TRUE (result);
2726 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2727 (const char *)dst_string.text);
2728 free (const_cast <unsigned char *> (dst_string.text));
2729
2730 /* Verify ranges of individual characters. This no longer includes the
2731 opening quote, but does include the closing quote.
2732 '01234'. */
2733 for (int i = 0; i <= 4; i++)
2734 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2735 /* U+2174. */
2736 for (int i = 5; i <= 7; i++)
2737 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2738 /* U+2175. */
2739 for (int i = 8; i <= 10; i++)
2740 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2741 /* '789' at columns 35-37 */
2742 for (int i = 11; i <= 13; i++)
2743 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2744 /* Closing quote/nul-terminator at column 38. */
2745 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2746
2747 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2748}
2749
2750/* Fetch a big-endian 32-bit value and convert to host endianness. */
2751
2752static uint32_t
2753uint32_from_big_endian (const uint32_t *ptr_be_value)
2754{
2755 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2756 return (((uint32_t) buf[0] << 24)
2757 | ((uint32_t) buf[1] << 16)
2758 | ((uint32_t) buf[2] << 8)
2759 | (uint32_t) buf[3]);
2760}
2761
2762/* Lex a wide string literal and verify that attempts to read substring
2763 location data from it fail gracefully. */
2764
2765static void
2766test_lexer_string_locations_wide_string (const line_table_case &case_)
2767{
2768 /* Digits 0-9.
2769 ....................000000000.11111111112.22222222233333
2770 ....................123456789.01234567890.12345678901234 */
2771 const char *content = " L\"0123456789\" /* non-str */\n";
2772 lexer_test test (case_, content, NULL);
2773
2774 /* Verify that we get the expected token back, with the correct
2775 location information. */
2776 const cpp_token *tok = test.get_token ();
2777 ASSERT_EQ (tok->type, CPP_WSTRING);
2778 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2779
2780 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2781 cpp_string dst_string;
2782 const enum cpp_ttype type = CPP_WSTRING;
2783 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2784 &dst_string, type);
2785 ASSERT_TRUE (result);
2786 /* The cpp_reader defaults to big-endian with
2787 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2788 now be encoded as UTF-32BE. */
2789 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2790 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2791 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2792 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2793 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2794 free (const_cast <unsigned char *> (dst_string.text));
2795
2796 /* We don't yet support generating substring location information
2797 for L"" strings. */
2798 ASSERT_HAS_NO_SUBSTRING_RANGES
2799 (test, tok->src_loc, type,
2800 "execution character set != source character set");
2801}
2802
2803/* Fetch a big-endian 16-bit value and convert to host endianness. */
2804
2805static uint16_t
2806uint16_from_big_endian (const uint16_t *ptr_be_value)
2807{
2808 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2809 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2810}
2811
2812/* Lex a u"" string literal and verify that attempts to read substring
2813 location data from it fail gracefully. */
2814
2815static void
2816test_lexer_string_locations_string16 (const line_table_case &case_)
2817{
2818 /* Digits 0-9.
2819 ....................000000000.11111111112.22222222233333
2820 ....................123456789.01234567890.12345678901234 */
2821 const char *content = " u\"0123456789\" /* non-str */\n";
2822 lexer_test test (case_, content, NULL);
2823
2824 /* Verify that we get the expected token back, with the correct
2825 location information. */
2826 const cpp_token *tok = test.get_token ();
2827 ASSERT_EQ (tok->type, CPP_STRING16);
2828 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2829
2830 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
2831 cpp_string dst_string;
2832 const enum cpp_ttype type = CPP_STRING16;
2833 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2834 &dst_string, type);
2835 ASSERT_TRUE (result);
2836
2837 /* The cpp_reader defaults to big-endian, so dst_string should
2838 now be encoded as UTF-16BE. */
2839 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2840 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2841 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2842 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2843 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2844 free (const_cast <unsigned char *> (dst_string.text));
2845
2846 /* We don't yet support generating substring location information
2847 for L"" strings. */
2848 ASSERT_HAS_NO_SUBSTRING_RANGES
2849 (test, tok->src_loc, type,
2850 "execution character set != source character set");
2851}
2852
2853/* Lex a U"" string literal and verify that attempts to read substring
2854 location data from it fail gracefully. */
2855
2856static void
2857test_lexer_string_locations_string32 (const line_table_case &case_)
2858{
2859 /* Digits 0-9.
2860 ....................000000000.11111111112.22222222233333
2861 ....................123456789.01234567890.12345678901234 */
2862 const char *content = " U\"0123456789\" /* non-str */\n";
2863 lexer_test test (case_, content, NULL);
2864
2865 /* Verify that we get the expected token back, with the correct
2866 location information. */
2867 const cpp_token *tok = test.get_token ();
2868 ASSERT_EQ (tok->type, CPP_STRING32);
2869 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2870
2871 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
2872 cpp_string dst_string;
2873 const enum cpp_ttype type = CPP_STRING32;
2874 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2875 &dst_string, type);
2876 ASSERT_TRUE (result);
2877
2878 /* The cpp_reader defaults to big-endian, so dst_string should
2879 now be encoded as UTF-32BE. */
2880 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2881 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2882 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2883 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2884 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2885 free (const_cast <unsigned char *> (dst_string.text));
2886
2887 /* We don't yet support generating substring location information
2888 for L"" strings. */
2889 ASSERT_HAS_NO_SUBSTRING_RANGES
2890 (test, tok->src_loc, type,
2891 "execution character set != source character set");
2892}
2893
2894/* Lex a u8-string literal.
2895 Verify the substring location data after running cpp_interpret_string
2896 on it. */
2897
2898static void
2899test_lexer_string_locations_u8 (const line_table_case &case_)
2900{
2901 /* Digits 0-9.
2902 ....................000000000.11111111112.22222222233333
2903 ....................123456789.01234567890.12345678901234 */
2904 const char *content = " u8\"0123456789\" /* non-str */\n";
2905 lexer_test test (case_, content, NULL);
2906
2907 /* Verify that we get the expected token back, with the correct
2908 location information. */
2909 const cpp_token *tok = test.get_token ();
2910 ASSERT_EQ (tok->type, CPP_UTF8STRING);
2911 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2912
2913 /* Verify that cpp_interpret_string works. */
2914 cpp_string dst_string;
2915 const enum cpp_ttype type = CPP_STRING;
2916 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2917 &dst_string, type);
2918 ASSERT_TRUE (result);
2919 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2920 free (const_cast <unsigned char *> (dst_string.text));
2921
2922 /* Verify ranges of individual characters. This no longer includes the
2923 opening quote, but does include the closing quote. */
2924 for (int i = 0; i <= 10; i++)
2925 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2926}
2927
2928/* Lex a string literal containing UTF-8 source characters.
2929 Verify the substring location data after running cpp_interpret_string
2930 on it. */
2931
2932static void
2933test_lexer_string_locations_utf8_source (const line_table_case &case_)
2934{
2935 /* This string literal is written out to the source file as UTF-8,
2936 and is of the form "before mojibake after", where "mojibake"
2937 is written as the following four unicode code points:
2938 U+6587 CJK UNIFIED IDEOGRAPH-6587
2939 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2940 U+5316 CJK UNIFIED IDEOGRAPH-5316
2941 U+3051 HIRAGANA LETTER KE.
2942 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2943 "before" and "after" are 1 byte per unicode character.
2944
2945 The numbering shown are "columns", which are *byte* numbers within
2946 the line, rather than unicode character numbers.
2947
2948 .................... 000000000.1111111.
2949 .................... 123456789.0123456. */
2950 const char *content = (" \"before "
2951 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2952 UTF-8: 0xE6 0x96 0x87
2953 C octal escaped UTF-8: \346\226\207
2954 "column" numbers: 17-19. */
2955 "\346\226\207"
2956
2957 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2958 UTF-8: 0xE5 0xAD 0x97
2959 C octal escaped UTF-8: \345\255\227
2960 "column" numbers: 20-22. */
2961 "\345\255\227"
2962
2963 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2964 UTF-8: 0xE5 0x8C 0x96
2965 C octal escaped UTF-8: \345\214\226
2966 "column" numbers: 23-25. */
2967 "\345\214\226"
2968
2969 /* U+3051 HIRAGANA LETTER KE
2970 UTF-8: 0xE3 0x81 0x91
2971 C octal escaped UTF-8: \343\201\221
2972 "column" numbers: 26-28. */
2973 "\343\201\221"
2974
2975 /* column numbers 29 onwards
2976 2333333.33334444444444
2977 9012345.67890123456789. */
2978 " after\" /* non-str */\n");
2979 lexer_test test (case_, content, NULL);
2980
2981 /* Verify that we get the expected token back, with the correct
2982 location information. */
2983 const cpp_token *tok = test.get_token ();
2984 ASSERT_EQ (tok->type, CPP_STRING);
2985 ASSERT_TOKEN_AS_TEXT_EQ
2986 (test.m_parser, tok,
2987 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
2988
2989 /* Verify that cpp_interpret_string works. */
2990 cpp_string dst_string;
2991 const enum cpp_ttype type = CPP_STRING;
2992 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2993 &dst_string, type);
2994 ASSERT_TRUE (result);
2995 ASSERT_STREQ
2996 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
2997 (const char *)dst_string.text);
2998 free (const_cast <unsigned char *> (dst_string.text));
2999
3000 /* Verify ranges of individual characters. This no longer includes the
3001 opening quote, but does include the closing quote.
3002 Assuming that both source and execution encodings are UTF-8, we have
3003 a run of 25 octets in each, plus the NUL terminator. */
3004 for (int i = 0; i < 25; i++)
3005 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3006 /* NUL-terminator should use the closing quote at column 35. */
3007 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3008
3009 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3010}
3011
3012/* Test of string literal concatenation. */
3013
3014static void
3015test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3016{
3017 /* Digits 0-9.
3018 .....................000000000.111111.11112222222222
3019 .....................123456789.012345.67890123456789. */
3020 const char *content = (" \"01234\" /* non-str */\n"
3021 " \"56789\" /* non-str */\n");
3022 lexer_test test (case_, content, NULL);
3023
3024 location_t input_locs[2];
3025
3026 /* Verify that we get the expected tokens back. */
3027 auto_vec <cpp_string> input_strings;
3028 const cpp_token *tok_a = test.get_token ();
3029 ASSERT_EQ (tok_a->type, CPP_STRING);
3030 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3031 input_strings.safe_push (tok_a->val.str);
3032 input_locs[0] = tok_a->src_loc;
3033
3034 const cpp_token *tok_b = test.get_token ();
3035 ASSERT_EQ (tok_b->type, CPP_STRING);
3036 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3037 input_strings.safe_push (tok_b->val.str);
3038 input_locs[1] = tok_b->src_loc;
3039
3040 /* Verify that cpp_interpret_string works. */
3041 cpp_string dst_string;
3042 const enum cpp_ttype type = CPP_STRING;
3043 bool result = cpp_interpret_string (test.m_parser,
3044 input_strings.address (), 2,
3045 &dst_string, type);
3046 ASSERT_TRUE (result);
3047 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3048 free (const_cast <unsigned char *> (dst_string.text));
3049
3050 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3051 test.m_concats.record_string_concatenation (2, input_locs);
3052
3053 location_t initial_loc = input_locs[0];
3054
3055 /* "01234" on line 1. */
3056 for (int i = 0; i <= 4; i++)
3057 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3058 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3059 for (int i = 5; i <= 10; i++)
3060 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3061
3062 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3063}
3064
3065/* Another test of string literal concatenation. */
3066
3067static void
3068test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3069{
3070 /* Digits 0-9.
3071 .....................000000000.111.11111112222222
3072 .....................123456789.012.34567890123456. */
3073 const char *content = (" \"01\" /* non-str */\n"
3074 " \"23\" /* non-str */\n"
3075 " \"45\" /* non-str */\n"
3076 " \"67\" /* non-str */\n"
3077 " \"89\" /* non-str */\n");
3078 lexer_test test (case_, content, NULL);
3079
3080 auto_vec <cpp_string> input_strings;
3081 location_t input_locs[5];
3082
3083 /* Verify that we get the expected tokens back. */
3084 for (int i = 0; i < 5; i++)
3085 {
3086 const cpp_token *tok = test.get_token ();
3087 ASSERT_EQ (tok->type, CPP_STRING);
3088 input_strings.safe_push (tok->val.str);
3089 input_locs[i] = tok->src_loc;
3090 }
3091
3092 /* Verify that cpp_interpret_string works. */
3093 cpp_string dst_string;
3094 const enum cpp_ttype type = CPP_STRING;
3095 bool result = cpp_interpret_string (test.m_parser,
3096 input_strings.address (), 5,
3097 &dst_string, type);
3098 ASSERT_TRUE (result);
3099 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3100 free (const_cast <unsigned char *> (dst_string.text));
3101
3102 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3103 test.m_concats.record_string_concatenation (5, input_locs);
3104
3105 location_t initial_loc = input_locs[0];
3106
3107 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3108 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3109 and expect get_source_range_for_substring to fail.
3110 However, for a string concatenation test, we can have a case
3111 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3112 but subsequent strings can be after it.
3113 Attempting to detect this within assert_char_at_range
3114 would overcomplicate the logic for the common test cases, so
3115 we detect it here. */
3116 if (should_have_column_data_p (input_locs[0])
3117 && !should_have_column_data_p (input_locs[4]))
3118 {
3119 /* Verify that get_source_range_for_substring gracefully rejects
3120 this case. */
3121 source_range actual_range;
3122 const char *err
3123 = get_source_range_for_char (test.m_parser, &test.m_concats,
3124 initial_loc, type, 0, &actual_range);
3125 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3126 return;
3127 }
3128
3129 for (int i = 0; i < 5; i++)
3130 for (int j = 0; j < 2; j++)
3131 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3132 i + 1, 10 + j, 10 + j);
3133
3134 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3135 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3136
3137 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3138}
3139
3140/* Another test of string literal concatenation, this time combined with
3141 various kinds of escaped characters. */
3142
3143static void
3144test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3145{
3146 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3147 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3148 const char *content
3149 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3150 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3151 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3152 lexer_test test (case_, content, NULL);
3153
3154 auto_vec <cpp_string> input_strings;
3155 location_t input_locs[4];
3156
3157 /* Verify that we get the expected tokens back. */
3158 for (int i = 0; i < 4; i++)
3159 {
3160 const cpp_token *tok = test.get_token ();
3161 ASSERT_EQ (tok->type, CPP_STRING);
3162 input_strings.safe_push (tok->val.str);
3163 input_locs[i] = tok->src_loc;
3164 }
3165
3166 /* Verify that cpp_interpret_string works. */
3167 cpp_string dst_string;
3168 const enum cpp_ttype type = CPP_STRING;
3169 bool result = cpp_interpret_string (test.m_parser,
3170 input_strings.address (), 4,
3171 &dst_string, type);
3172 ASSERT_TRUE (result);
3173 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3174 free (const_cast <unsigned char *> (dst_string.text));
3175
3176 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3177 test.m_concats.record_string_concatenation (4, input_locs);
3178
3179 location_t initial_loc = input_locs[0];
3180
3181 for (int i = 0; i <= 4; i++)
3182 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3183 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3184 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3185 for (int i = 7; i <= 9; i++)
3186 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3187
3188 /* NUL-terminator should use the location of the final closing quote. */
3189 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3190
3191 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3192}
3193
3194/* Test of string literal in a macro. */
3195
3196static void
3197test_lexer_string_locations_macro (const line_table_case &case_)
3198{
3199 /* Digits 0-9.
3200 .....................0000000001111111111.22222222223.
3201 .....................1234567890123456789.01234567890. */
3202 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3203 " MACRO");
3204 lexer_test test (case_, content, NULL);
3205
3206 /* Verify that we get the expected tokens back. */
3207 const cpp_token *tok = test.get_token ();
3208 ASSERT_EQ (tok->type, CPP_PADDING);
3209
3210 tok = test.get_token ();
3211 ASSERT_EQ (tok->type, CPP_STRING);
3212 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3213
3214 /* Verify ranges of individual characters. We ought to
3215 see columns within the macro definition. */
3216 for (int i = 0; i <= 10; i++)
3217 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3218 i, 1, 20 + i, 20 + i);
3219
3220 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3221
3222 tok = test.get_token ();
3223 ASSERT_EQ (tok->type, CPP_PADDING);
3224}
3225
3226/* Test of stringification of a macro argument. */
3227
3228static void
3229test_lexer_string_locations_stringified_macro_argument
3230 (const line_table_case &case_)
3231{
3232 /* .....................000000000111111111122222222223.
3233 .....................123456789012345678901234567890. */
3234 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3235 "MACRO(foo)\n");
3236 lexer_test test (case_, content, NULL);
3237
3238 /* Verify that we get the expected token back. */
3239 const cpp_token *tok = test.get_token ();
3240 ASSERT_EQ (tok->type, CPP_PADDING);
3241
3242 tok = test.get_token ();
3243 ASSERT_EQ (tok->type, CPP_STRING);
3244 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3245
3246 /* We don't support getting the location of a stringified macro
3247 argument. Verify that it fails gracefully. */
3248 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3249 "cpp_interpret_string_1 failed");
3250
3251 tok = test.get_token ();
3252 ASSERT_EQ (tok->type, CPP_PADDING);
3253
3254 tok = test.get_token ();
3255 ASSERT_EQ (tok->type, CPP_PADDING);
3256}
3257
3258/* Ensure that we are fail gracefully if something attempts to pass
3259 in a location that isn't a string literal token. Seen on this code:
3260
3261 const char a[] = " %d ";
3262 __builtin_printf (a, 0.5);
3263 ^
3264
3265 when c-format.c erroneously used the indicated one-character
3266 location as the format string location, leading to a read past the
3267 end of a string buffer in cpp_interpret_string_1. */
3268
3269static void
3270test_lexer_string_locations_non_string (const line_table_case &case_)
3271{
3272 /* .....................000000000111111111122222222223.
3273 .....................123456789012345678901234567890. */
3274 const char *content = (" a\n");
3275 lexer_test test (case_, content, NULL);
3276
3277 /* Verify that we get the expected token back. */
3278 const cpp_token *tok = test.get_token ();
3279 ASSERT_EQ (tok->type, CPP_NAME);
3280 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3281
3282 /* At this point, libcpp is attempting to interpret the name as a
3283 string literal, despite it not starting with a quote. We don't detect
3284 that, but we should at least fail gracefully. */
3285 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3286 "cpp_interpret_string_1 failed");
3287}
3288
3289/* Ensure that we can read substring information for a token which
3290 starts in one linemap and ends in another . Adapted from
3291 gcc.dg/cpp/pr69985.c. */
3292
3293static void
3294test_lexer_string_locations_long_line (const line_table_case &case_)
3295{
3296 /* .....................000000.000111111111
3297 .....................123456.789012346789. */
3298 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3299 " \"0123456789012345678901234567890123456789"
3300 "0123456789012345678901234567890123456789"
3301 "0123456789012345678901234567890123456789"
3302 "0123456789\"\n");
3303
3304 lexer_test test (case_, content, NULL);
3305
3306 /* Verify that we get the expected token back. */
3307 const cpp_token *tok = test.get_token ();
3308 ASSERT_EQ (tok->type, CPP_STRING);
3309
3310 if (!should_have_column_data_p (line_table->highest_location))
3311 return;
3312
3313 /* Verify ranges of individual characters. */
3314 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3315 for (int i = 0; i < 131; i++)
3316 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3317 i, 2, 7 + i, 7 + i);
3318}
3319
3320/* Test of locations within a raw string that doesn't contain a newline. */
3321
3322static void
3323test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3324{
3325 /* .....................00.0000000111111111122.
3326 .....................12.3456789012345678901. */
3327 const char *content = ("R\"foo(0123456789)foo\"\n");
3328 lexer_test test (case_, content, NULL);
3329
3330 /* Verify that we get the expected token back. */
3331 const cpp_token *tok = test.get_token ();
3332 ASSERT_EQ (tok->type, CPP_STRING);
3333
3334 /* Verify that cpp_interpret_string works. */
3335 cpp_string dst_string;
3336 const enum cpp_ttype type = CPP_STRING;
3337 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3338 &dst_string, type);
3339 ASSERT_TRUE (result);
3340 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3341 free (const_cast <unsigned char *> (dst_string.text));
3342
3343 if (!should_have_column_data_p (line_table->highest_location))
3344 return;
3345
3346 /* 0-9, plus the nil terminator. */
3347 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3348 for (int i = 0; i < 11; i++)
3349 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3350 i, 1, 7 + i, 7 + i);
3351}
3352
3353/* Test of locations within a raw string that contains a newline. */
3354
3355static void
3356test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3357{
3358 /* .....................00.0000.
3359 .....................12.3456. */
3360 const char *content = ("R\"foo(\n"
3361 /* .....................00000.
3362 .....................12345. */
3363 "hello\n"
3364 "world\n"
3365 /* .....................00000.
3366 .....................12345. */
3367 ")foo\"\n");
3368 lexer_test test (case_, content, NULL);
3369
3370 /* Verify that we get the expected token back. */
3371 const cpp_token *tok = test.get_token ();
3372 ASSERT_EQ (tok->type, CPP_STRING);
3373
3374 /* Verify that cpp_interpret_string works. */
3375 cpp_string dst_string;
3376 const enum cpp_ttype type = CPP_STRING;
3377 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3378 &dst_string, type);
3379 ASSERT_TRUE (result);
3380 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3381 free (const_cast <unsigned char *> (dst_string.text));
3382
3383 if (!should_have_column_data_p (line_table->highest_location))
3384 return;
3385
3386 /* Currently we don't support locations within raw strings that
3387 contain newlines. */
3388 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3389 "range endpoints are on different lines");
3390}
3391
3392/* Test of parsing an unterminated raw string. */
3393
3394static void
3395test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3396{
3397 const char *content = "R\"ouch()ouCh\" /* etc */";
3398
3399 lexer_error_sink errors;
3400 lexer_test test (case_, content, &errors);
3401 test.m_implicitly_expect_EOF = false;
3402
3403 /* Attempt to parse the raw string. */
3404 const cpp_token *tok = test.get_token ();
3405 ASSERT_EQ (tok->type, CPP_EOF);
3406
3407 ASSERT_EQ (1, errors.m_errors.length ());
3408 /* We expect the message "unterminated raw string"
3409 in the "cpplib" translation domain.
3410 It's not clear that dgettext is available on all supported hosts,
3411 so this assertion is commented-out for now.
3412 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3413 errors.m_errors[0]);
3414 */
3415}
3416
3417/* Test of lexing char constants. */
3418
3419static void
3420test_lexer_char_constants (const line_table_case &case_)
3421{
3422 /* Various char constants.
3423 .....................0000000001111111111.22222222223.
3424 .....................1234567890123456789.01234567890. */
3425 const char *content = (" 'a'\n"
3426 " u'a'\n"
3427 " U'a'\n"
3428 " L'a'\n"
3429 " 'abc'\n");
3430 lexer_test test (case_, content, NULL);
3431
3432 /* Verify that we get the expected tokens back. */
3433 /* 'a'. */
3434 const cpp_token *tok = test.get_token ();
3435 ASSERT_EQ (tok->type, CPP_CHAR);
3436 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3437
3438 unsigned int chars_seen;
3439 int unsignedp;
3440 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3441 &chars_seen, &unsignedp);
3442 ASSERT_EQ (cc, 'a');
3443 ASSERT_EQ (chars_seen, 1);
3444
3445 /* u'a'. */
3446 tok = test.get_token ();
3447 ASSERT_EQ (tok->type, CPP_CHAR16);
3448 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3449
3450 /* U'a'. */
3451 tok = test.get_token ();
3452 ASSERT_EQ (tok->type, CPP_CHAR32);
3453 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3454
3455 /* L'a'. */
3456 tok = test.get_token ();
3457 ASSERT_EQ (tok->type, CPP_WCHAR);
3458 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3459
3460 /* 'abc' (c-char-sequence). */
3461 tok = test.get_token ();
3462 ASSERT_EQ (tok->type, CPP_CHAR);
3463 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3464}
3465/* A table of interesting location_t values, giving one axis of our test
3466 matrix. */
3467
3468static const location_t boundary_locations[] = {
3469 /* Zero means "don't override the default values for a new line_table". */
3470 0,
3471
3472 /* An arbitrary non-zero value that isn't close to one of
3473 the boundary values below. */
3474 0x10000,
3475
3476 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3477 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3478 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3479 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3480 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3481 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3482
3483 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3484 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3485 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3486 LINE_MAP_MAX_LOCATION_WITH_COLS,
3487 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3488 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3489};
3490
3491/* Run TESTCASE multiple times, once for each case in our test matrix. */
3492
3493void
3494for_each_line_table_case (void (*testcase) (const line_table_case &))
3495{
3496 /* As noted above in the description of struct line_table_case,
3497 we want to explore a test matrix of interesting line_table
3498 situations, running various selftests for each case within the
3499 matrix. */
3500
3501 /* Run all tests with:
3502 (a) line_table->default_range_bits == 0, and
3503 (b) line_table->default_range_bits == 5. */
3504 int num_cases_tested = 0;
3505 for (int default_range_bits = 0; default_range_bits <= 5;
3506 default_range_bits += 5)
3507 {
3508 /* ...and use each of the "interesting" location values as
3509 the starting location within line_table. */
3510 const int num_boundary_locations
3511 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3512 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3513 {
3514 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3515
3516 testcase (c);
3517
3518 num_cases_tested++;
3519 }
3520 }
3521
3522 /* Verify that we fully covered the test matrix. */
3523 ASSERT_EQ (num_cases_tested, 2 * 12);
3524}
3525
3526/* Run all of the selftests within this file. */
3527
3528void
3529input_c_tests ()
3530{
3531 test_should_have_column_data_p ();
3532 test_unknown_location ();
3533 test_builtins ();
3534 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3535
3536 for_each_line_table_case (test_accessing_ordinary_linemaps);
3537 for_each_line_table_case (test_lexer);
3538 for_each_line_table_case (test_lexer_string_locations_simple);
3539 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3540 for_each_line_table_case (test_lexer_string_locations_hex);
3541 for_each_line_table_case (test_lexer_string_locations_oct);
3542 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3543 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3544 for_each_line_table_case (test_lexer_string_locations_ucn4);
3545 for_each_line_table_case (test_lexer_string_locations_ucn8);
3546 for_each_line_table_case (test_lexer_string_locations_wide_string);
3547 for_each_line_table_case (test_lexer_string_locations_string16);
3548 for_each_line_table_case (test_lexer_string_locations_string32);
3549 for_each_line_table_case (test_lexer_string_locations_u8);
3550 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3551 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3552 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3553 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3554 for_each_line_table_case (test_lexer_string_locations_macro);
3555 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3556 for_each_line_table_case (test_lexer_string_locations_non_string);
3557 for_each_line_table_case (test_lexer_string_locations_long_line);
3558 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3559 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3560 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3561 for_each_line_table_case (test_lexer_char_constants);
3562
3563 test_reading_source_line ();
3564}
3565
3566} // namespace selftest
3567
3568#endif /* CHECKING_P */
3569