1/* Bundles of location information used when printing diagnostics.
2 Copyright (C) 2015-2024 Free Software Foundation, Inc.
3
4This program is free software; you can redistribute it and/or modify it
5under the terms of the GNU General Public License as published by the
6Free Software Foundation; either version 3, or (at your option) any
7later version.
8
9This program is distributed in the hope that it will be useful,
10but WITHOUT ANY WARRANTY; without even the implied warranty of
11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; see the file COPYING3. If not see
16<http://www.gnu.org/licenses/>.
17
18 In other words, you are welcome to use, share and improve this program.
19 You are forbidden to forbid anyone else to use, share and improve
20 what you give them. Help stamp out software-hoarding! */
21
22#ifndef LIBCPP_RICH_LOCATION_H
23#define LIBCPP_RICH_LOCATION_H
24
25class range_label;
26
27/* A hint to diagnostic_show_locus on how to print a source range within a
28 rich_location.
29
30 Typically this is SHOW_RANGE_WITH_CARET for the 0th range, and
31 SHOW_RANGE_WITHOUT_CARET for subsequent ranges,
32 but the Fortran frontend uses SHOW_RANGE_WITH_CARET repeatedly for
33 printing things like:
34
35 x = x + y
36 1 2
37 Error: Shapes for operands at (1) and (2) are not conformable
38
39 where "1" and "2" are notionally carets. */
40
41enum range_display_kind
42{
43 /* Show the pertinent source line(s), the caret, and underline(s). */
44 SHOW_RANGE_WITH_CARET,
45
46 /* Show the pertinent source line(s) and underline(s), but don't
47 show the caret (just an underline). */
48 SHOW_RANGE_WITHOUT_CARET,
49
50 /* Just show the source lines; don't show the range itself.
51 This is for use when displaying some line-insertion fix-it hints (for
52 showing the user context on the change, for when it doesn't make sense
53 to highlight the first column on the next line). */
54 SHOW_LINES_WITHOUT_RANGE
55};
56
57/* A location within a rich_location: a caret&range, with
58 the caret potentially flagged for display, and an optional
59 label. */
60
61struct location_range
62{
63 location_t m_loc;
64
65 enum range_display_kind m_range_display_kind;
66
67 /* If non-NULL, the label for this range. */
68 const range_label *m_label;
69};
70
71/* A partially-embedded vec for use within rich_location for storing
72 ranges and fix-it hints.
73
74 Elements [0..NUM_EMBEDDED) are allocated within m_embed, after
75 that they are within the dynamically-allocated m_extra.
76
77 This allows for static allocation in the common case, whilst
78 supporting the rarer case of an arbitrary number of elements.
79
80 Dynamic allocation is not performed unless it's needed. */
81
82template <typename T, int NUM_EMBEDDED>
83class semi_embedded_vec
84{
85 public:
86 semi_embedded_vec ();
87 ~semi_embedded_vec ();
88
89 unsigned int count () const { return m_num; }
90 T& operator[] (int idx);
91 const T& operator[] (int idx) const;
92
93 void push (const T&);
94 void truncate (int len);
95
96 private:
97 int m_num;
98 T m_embedded[NUM_EMBEDDED];
99 int m_alloc;
100 T *m_extra;
101};
102
103/* Constructor for semi_embedded_vec. In particular, no dynamic allocation
104 is done. */
105
106template <typename T, int NUM_EMBEDDED>
107semi_embedded_vec<T, NUM_EMBEDDED>::semi_embedded_vec ()
108: m_num (0), m_alloc (0), m_extra (NULL)
109{
110}
111
112/* semi_embedded_vec's dtor. Release any dynamically-allocated memory. */
113
114template <typename T, int NUM_EMBEDDED>
115semi_embedded_vec<T, NUM_EMBEDDED>::~semi_embedded_vec ()
116{
117 XDELETEVEC (m_extra);
118}
119
120/* Look up element IDX, mutably. */
121
122template <typename T, int NUM_EMBEDDED>
123T&
124semi_embedded_vec<T, NUM_EMBEDDED>::operator[] (int idx)
125{
126 linemap_assert (idx < m_num);
127 if (idx < NUM_EMBEDDED)
128 return m_embedded[idx];
129 else
130 {
131 linemap_assert (m_extra != NULL);
132 return m_extra[idx - NUM_EMBEDDED];
133 }
134}
135
136/* Look up element IDX (const). */
137
138template <typename T, int NUM_EMBEDDED>
139const T&
140semi_embedded_vec<T, NUM_EMBEDDED>::operator[] (int idx) const
141{
142 linemap_assert (idx < m_num);
143 if (idx < NUM_EMBEDDED)
144 return m_embedded[idx];
145 else
146 {
147 linemap_assert (m_extra != NULL);
148 return m_extra[idx - NUM_EMBEDDED];
149 }
150}
151
152/* Append VALUE to the end of the semi_embedded_vec. */
153
154template <typename T, int NUM_EMBEDDED>
155void
156semi_embedded_vec<T, NUM_EMBEDDED>::push (const T& value)
157{
158 int idx = m_num++;
159 if (idx < NUM_EMBEDDED)
160 m_embedded[idx] = value;
161 else
162 {
163 /* Offset "idx" to be an index within m_extra. */
164 idx -= NUM_EMBEDDED;
165 if (NULL == m_extra)
166 {
167 linemap_assert (m_alloc == 0);
168 m_alloc = 16;
169 m_extra = XNEWVEC (T, m_alloc);
170 }
171 else if (idx >= m_alloc)
172 {
173 linemap_assert (m_alloc > 0);
174 m_alloc *= 2;
175 m_extra = XRESIZEVEC (T, m_extra, m_alloc);
176 }
177 linemap_assert (m_extra);
178 linemap_assert (idx < m_alloc);
179 m_extra[idx] = value;
180 }
181}
182
183/* Truncate to length LEN. No deallocation is performed. */
184
185template <typename T, int NUM_EMBEDDED>
186void
187semi_embedded_vec<T, NUM_EMBEDDED>::truncate (int len)
188{
189 linemap_assert (len <= m_num);
190 m_num = len;
191}
192
193class fixit_hint;
194class diagnostic_path;
195
196/* A "rich" source code location, for use when printing diagnostics.
197 A rich_location has one or more carets&ranges, where the carets
198 are optional. These are referred to as "ranges" from here.
199 Typically the zeroth range has a caret; other ranges sometimes
200 have carets.
201
202 The "primary" location of a rich_location is the caret of range 0,
203 used for determining the line/column when printing diagnostic
204 text, such as:
205
206 some-file.c:3:1: error: ...etc...
207
208 Additional ranges may be added to help the user identify other
209 pertinent clauses in a diagnostic.
210
211 Ranges can (optionally) be given labels via class range_label.
212
213 rich_location instances are intended to be allocated on the stack
214 when generating diagnostics, and to be short-lived.
215
216 Examples of rich locations
217 --------------------------
218
219 Example A
220 *********
221 int i = "foo";
222 ^
223 This "rich" location is simply a single range (range 0), with
224 caret = start = finish at the given point.
225
226 Example B
227 *********
228 a = (foo && bar)
229 ~~~~~^~~~~~~
230 This rich location has a single range (range 0), with the caret
231 at the first "&", and the start/finish at the parentheses.
232 Compare with example C below.
233
234 Example C
235 *********
236 a = (foo && bar)
237 ~~~ ^~ ~~~
238 This rich location has three ranges:
239 - Range 0 has its caret and start location at the first "&" and
240 end at the second "&.
241 - Range 1 has its start and finish at the "f" and "o" of "foo";
242 the caret is not flagged for display, but is perhaps at the "f"
243 of "foo".
244 - Similarly, range 2 has its start and finish at the "b" and "r" of
245 "bar"; the caret is not flagged for display, but is perhaps at the
246 "b" of "bar".
247 Compare with example B above.
248
249 Example D (Fortran frontend)
250 ****************************
251 x = x + y
252 1 2
253 This rich location has range 0 at "1", and range 1 at "2".
254 Both are flagged for caret display. Both ranges have start/finish
255 equal to their caret point. The frontend overrides the diagnostic
256 context's default caret character for these ranges.
257
258 Example E (range labels)
259 ************************
260 printf ("arg0: %i arg1: %s arg2: %i",
261 ^~
262 |
263 const char *
264 100, 101, 102);
265 ~~~
266 |
267 int
268 This rich location has two ranges:
269 - range 0 is at the "%s" with start = caret = "%" and finish at
270 the "s". It has a range_label ("const char *").
271 - range 1 has start/finish covering the "101" and is not flagged for
272 caret printing. The caret is at the start of "101", where its
273 range_label is printed ("int").
274
275 Fix-it hints
276 ------------
277
278 Rich locations can also contain "fix-it hints", giving suggestions
279 for the user on how to edit their code to fix a problem. These
280 can be expressed as insertions, replacements, and removals of text.
281 The edits by default are relative to the zeroth range within the
282 rich_location, but optionally they can be expressed relative to
283 other locations (using various overloaded methods of the form
284 rich_location::add_fixit_*).
285
286 For example:
287
288 Example F: fix-it hint: insert_before
289 *************************************
290 ptr = arr[0];
291 ^~~~~~
292 &
293 This rich location has a single range (range 0) covering "arr[0]",
294 with the caret at the start. The rich location has a single
295 insertion fix-it hint, inserted before range 0, added via
296 richloc.add_fixit_insert_before ("&");
297
298 Example G: multiple fix-it hints: insert_before and insert_after
299 ****************************************************************
300 #define FN(ARG0, ARG1, ARG2) fn(ARG0, ARG1, ARG2)
301 ^~~~ ^~~~ ^~~~
302 ( ) ( ) ( )
303 This rich location has three ranges, covering "arg0", "arg1",
304 and "arg2", all with caret-printing enabled.
305 The rich location has 6 insertion fix-it hints: each arg
306 has a pair of insertion fix-it hints, suggesting wrapping
307 them with parentheses: one a '(' inserted before,
308 the other a ')' inserted after, added via
309 richloc.add_fixit_insert_before (LOC, "(");
310 and
311 richloc.add_fixit_insert_after (LOC, ")");
312
313 Example H: fix-it hint: removal
314 *******************************
315 struct s {int i};;
316 ^
317 -
318 This rich location has a single range at the stray trailing
319 semicolon, along with a single removal fix-it hint, covering
320 the same range, added via:
321 richloc.add_fixit_remove ();
322
323 Example I: fix-it hint: replace
324 *******************************
325 c = s.colour;
326 ^~~~~~
327 color
328 This rich location has a single range (range 0) covering "colour",
329 and a single "replace" fix-it hint, covering the same range,
330 added via
331 richloc.add_fixit_replace ("color");
332
333 Example J: fix-it hint: line insertion
334 **************************************
335
336 3 | #include <stddef.h>
337 + |+#include <stdio.h>
338 4 | int the_next_line;
339
340 This rich location has a single range at line 4 column 1, marked
341 with SHOW_LINES_WITHOUT_RANGE (to avoid printing a meaningless caret
342 on the "i" of int). It has a insertion fix-it hint of the string
343 "#include <stdio.h>\n".
344
345 Adding a fix-it hint can fail: for example, attempts to insert content
346 at the transition between two line maps may fail due to there being no
347 location_t value to express the new location.
348
349 Attempts to add a fix-it hint within a macro expansion will fail.
350
351 There is only limited support for newline characters in fix-it hints:
352 only hints with newlines which insert an entire new line are permitted,
353 inserting at the start of a line, and finishing with a newline
354 (with no interior newline characters). Other attempts to add
355 fix-it hints containing newline characters will fail.
356 Similarly, attempts to delete or replace a range *affecting* multiple
357 lines will fail.
358
359 The rich_location API handles these failures gracefully, so that
360 diagnostics can attempt to add fix-it hints without each needing
361 extensive checking.
362
363 Fix-it hints within a rich_location are "atomic": if any hints can't
364 be applied, none of them will be (tracked by the m_seen_impossible_fixit
365 flag), and no fix-its hints will be displayed for that rich_location.
366 This implies that diagnostic messages need to be worded in such a way
367 that they make sense whether or not the fix-it hints are displayed,
368 or that richloc.seen_impossible_fixit_p () should be checked before
369 issuing the diagnostics. */
370
371class rich_location
372{
373 public:
374 /* Constructors. */
375
376 /* Constructing from a location. */
377 rich_location (line_maps *set, location_t loc,
378 const range_label *label = NULL);
379
380 /* Destructor. */
381 ~rich_location ();
382
383 /* The class manages the memory pointed to by the elements of
384 the M_FIXIT_HINTS vector and is not meant to be copied or
385 assigned. */
386 rich_location (const rich_location &) = delete;
387 void operator= (const rich_location &) = delete;
388
389 /* Accessors. */
390 location_t get_loc () const { return get_loc (idx: 0); }
391 location_t get_loc (unsigned int idx) const;
392
393 void
394 add_range (location_t loc,
395 enum range_display_kind range_display_kind
396 = SHOW_RANGE_WITHOUT_CARET,
397 const range_label *label = NULL);
398
399 void
400 set_range (unsigned int idx, location_t loc,
401 enum range_display_kind range_display_kind);
402
403 unsigned int get_num_locations () const { return m_ranges.count (); }
404
405 const location_range *get_range (unsigned int idx) const;
406 location_range *get_range (unsigned int idx);
407
408 expanded_location get_expanded_location (unsigned int idx) const;
409
410 void
411 override_column (int column);
412
413 /* Fix-it hints. */
414
415 /* Methods for adding insertion fix-it hints. */
416
417 /* Suggest inserting NEW_CONTENT immediately before the primary
418 range's start. */
419 void
420 add_fixit_insert_before (const char *new_content);
421
422 /* Suggest inserting NEW_CONTENT immediately before the start of WHERE. */
423 void
424 add_fixit_insert_before (location_t where,
425 const char *new_content);
426
427 /* Suggest inserting NEW_CONTENT immediately after the end of the primary
428 range. */
429 void
430 add_fixit_insert_after (const char *new_content);
431
432 /* Suggest inserting NEW_CONTENT immediately after the end of WHERE. */
433 void
434 add_fixit_insert_after (location_t where,
435 const char *new_content);
436
437 /* Methods for adding removal fix-it hints. */
438
439 /* Suggest removing the content covered by range 0. */
440 void
441 add_fixit_remove ();
442
443 /* Suggest removing the content covered between the start and finish
444 of WHERE. */
445 void
446 add_fixit_remove (location_t where);
447
448 /* Suggest removing the content covered by SRC_RANGE. */
449 void
450 add_fixit_remove (source_range src_range);
451
452 /* Methods for adding "replace" fix-it hints. */
453
454 /* Suggest replacing the content covered by range 0 with NEW_CONTENT. */
455 void
456 add_fixit_replace (const char *new_content);
457
458 /* Suggest replacing the content between the start and finish of
459 WHERE with NEW_CONTENT. */
460 void
461 add_fixit_replace (location_t where,
462 const char *new_content);
463
464 /* Suggest replacing the content covered by SRC_RANGE with
465 NEW_CONTENT. */
466 void
467 add_fixit_replace (source_range src_range,
468 const char *new_content);
469
470 unsigned int get_num_fixit_hints () const { return m_fixit_hints.count (); }
471 fixit_hint *get_fixit_hint (int idx) const { return m_fixit_hints[idx]; }
472 fixit_hint *get_last_fixit_hint () const;
473 bool seen_impossible_fixit_p () const { return m_seen_impossible_fixit; }
474
475 /* Set this if the fix-it hints are not suitable to be
476 automatically applied.
477
478 For example, if you are suggesting more than one
479 mutually exclusive solution to a problem, then
480 it doesn't make sense to apply all of the solutions;
481 manual intervention is required.
482
483 If set, then the fix-it hints in the rich_location will
484 be printed, but will not be added to generated patches,
485 or affect the modified version of the file. */
486 void fixits_cannot_be_auto_applied ()
487 {
488 m_fixits_cannot_be_auto_applied = true;
489 }
490
491 bool fixits_can_be_auto_applied_p () const
492 {
493 return !m_fixits_cannot_be_auto_applied;
494 }
495
496 /* An optional path through the code. */
497 const diagnostic_path *get_path () const { return m_path; }
498 void set_path (const diagnostic_path *path) { m_path = path; }
499
500 /* A flag for hinting that the diagnostic involves character encoding
501 issues, and thus that it will be helpful to the user if we show some
502 representation of how the characters in the pertinent source lines
503 are encoded.
504 The default is false (i.e. do not escape).
505 When set to true, non-ASCII bytes in the pertinent source lines will
506 be escaped in a manner controlled by the user-supplied option
507 -fdiagnostics-escape-format=, so that the user can better understand
508 what's going on with the encoding in their source file. */
509 bool escape_on_output_p () const { return m_escape_on_output; }
510 void set_escape_on_output (bool flag) { m_escape_on_output = flag; }
511
512 const line_maps *get_line_table () const { return m_line_table; }
513
514private:
515 bool reject_impossible_fixit (location_t where);
516 void stop_supporting_fixits ();
517 void maybe_add_fixit (location_t start,
518 location_t next_loc,
519 const char *new_content);
520
521public:
522 static const int STATICALLY_ALLOCATED_RANGES = 3;
523
524protected:
525 line_maps * const m_line_table;
526 semi_embedded_vec <location_range, STATICALLY_ALLOCATED_RANGES> m_ranges;
527
528 int m_column_override;
529
530 mutable bool m_have_expanded_location;
531 bool m_seen_impossible_fixit;
532 bool m_fixits_cannot_be_auto_applied;
533 bool m_escape_on_output;
534
535 mutable expanded_location m_expanded_location;
536
537 static const int MAX_STATIC_FIXIT_HINTS = 2;
538 semi_embedded_vec <fixit_hint *, MAX_STATIC_FIXIT_HINTS> m_fixit_hints;
539
540 const diagnostic_path *m_path;
541};
542
543/* A struct for the result of range_label::get_text: a NUL-terminated buffer
544 of localized text, and a flag to determine if the caller should "free" the
545 buffer. */
546
547class label_text
548{
549public:
550 label_text ()
551 : m_buffer (NULL), m_owned (false)
552 {}
553
554 ~label_text ()
555 {
556 if (m_owned)
557 free (ptr: m_buffer);
558 }
559
560 /* Move ctor. */
561 label_text (label_text &&other)
562 : m_buffer (other.m_buffer), m_owned (other.m_owned)
563 {
564 other.release ();
565 }
566
567 /* Move assignment. */
568 label_text & operator= (label_text &&other)
569 {
570 if (m_owned)
571 free (ptr: m_buffer);
572 m_buffer = other.m_buffer;
573 m_owned = other.m_owned;
574 other.release ();
575 return *this;
576 }
577
578 /* Delete the copy ctor and copy-assignment operator. */
579 label_text (const label_text &) = delete;
580 label_text & operator= (const label_text &) = delete;
581
582 /* Create a label_text instance that borrows BUFFER from a
583 longer-lived owner. */
584 static label_text borrow (const char *buffer)
585 {
586 return label_text (const_cast <char *> (buffer), false);
587 }
588
589 /* Create a label_text instance that takes ownership of BUFFER. */
590 static label_text take (char *buffer)
591 {
592 return label_text (buffer, true);
593 }
594
595 void release ()
596 {
597 m_buffer = NULL;
598 m_owned = false;
599 }
600
601 const char *get () const
602 {
603 return m_buffer;
604 }
605
606 bool is_owner () const
607 {
608 return m_owned;
609 }
610
611private:
612 char *m_buffer;
613 bool m_owned;
614
615 label_text (char *buffer, bool owned)
616 : m_buffer (buffer), m_owned (owned)
617 {}
618};
619
620/* Abstract base class for labelling a range within a rich_location
621 (e.g. for labelling expressions with their type).
622
623 Generating the text could require non-trivial work, so this work
624 is delayed (via the "get_text" virtual function) until the diagnostic
625 printing code "knows" it needs it, thus avoiding doing it e.g. for
626 warnings that are filtered by command-line flags. This virtual
627 function also isolates libcpp and the diagnostics subsystem from
628 the front-end and middle-end-specific code for generating the text
629 for the labels.
630
631 Like the rich_location instances they annotate, range_label instances
632 are intended to be allocated on the stack when generating diagnostics,
633 and to be short-lived. */
634
635class range_label
636{
637 public:
638 virtual ~range_label () {}
639
640 /* Get localized text for the label.
641 The RANGE_IDX is provided, allowing for range_label instances to be
642 shared by multiple ranges if need be (the "flyweight" design pattern). */
643 virtual label_text get_text (unsigned range_idx) const = 0;
644};
645
646/* A fix-it hint: a suggested insertion, replacement, or deletion of text.
647 We handle these three types of edit with one class, by representing
648 them as replacement of a half-open range:
649 [start, next_loc)
650 Insertions have start == next_loc: "replace" the empty string at the
651 start location with the new string.
652 Deletions are replacement with the empty string.
653
654 There is only limited support for newline characters in fix-it hints
655 as noted above in the comment for class rich_location.
656 A fixit_hint instance can have at most one newline character; if
657 present, the newline character must be the final character of
658 the content (preventing e.g. fix-its that split a pre-existing line). */
659
660class fixit_hint
661{
662 public:
663 fixit_hint (location_t start,
664 location_t next_loc,
665 const char *new_content);
666 ~fixit_hint () { free (ptr: m_bytes); }
667
668 bool affects_line_p (const line_maps *set,
669 const char *file,
670 int line) const;
671 location_t get_start_loc () const { return m_start; }
672 location_t get_next_loc () const { return m_next_loc; }
673 bool maybe_append (location_t start,
674 location_t next_loc,
675 const char *new_content);
676
677 const char *get_string () const { return m_bytes; }
678 size_t get_length () const { return m_len; }
679
680 bool insertion_p () const { return m_start == m_next_loc; }
681
682 bool ends_with_newline_p () const;
683
684 private:
685 /* We don't use source_range here since, unlike most places,
686 this is a half-open/half-closed range:
687 [start, next_loc)
688 so that we can support insertion via start == next_loc. */
689 location_t m_start;
690 location_t m_next_loc;
691 char *m_bytes;
692 size_t m_len;
693};
694
695#endif /* !LIBCPP_RICH_LOCATION_H */
696

source code of libcpp/include/rich-location.h