1//===- Markup.h -------------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file declares the log symbolizer markup data model and parser.
11///
12/// See https://llvm.org/docs/SymbolizerMarkupFormat.html
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
17#define LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
18
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/ADT/StringRef.h"
21#include "llvm/ADT/StringSet.h"
22#include "llvm/Support/Regex.h"
23
24namespace llvm {
25namespace symbolize {
26
27/// A node of symbolizer markup.
28///
29/// If only the Text field is set, this represents a region of text outside a
30/// markup element. ANSI SGR control codes are also reported this way; if
31/// detected, then the control code will be the entirety of the Text field, and
32/// any surrounding text will be reported as preceding and following nodes.
33struct MarkupNode {
34 /// The full text of this node in the input.
35 StringRef Text;
36
37 /// If this represents an element, the tag. Otherwise, empty.
38 StringRef Tag;
39
40 /// If this represents an element with fields, a list of the field contents.
41 /// Otherwise, empty.
42 SmallVector<StringRef> Fields;
43
44 bool operator==(const MarkupNode &Other) const {
45 return Text == Other.Text && Tag == Other.Tag && Fields == Other.Fields;
46 }
47 bool operator!=(const MarkupNode &Other) const { return !(*this == Other); }
48};
49
50/// Parses a log containing symbolizer markup into a sequence of nodes.
51class MarkupParser {
52public:
53 MarkupParser(StringSet<> MultilineTags = {});
54
55 /// Parses an individual \p Line of input.
56 ///
57 /// Nodes from the previous parseLine() call that haven't yet been extracted
58 /// by nextNode() are discarded. The nodes returned by nextNode() may
59 /// reference the input string, so it must be retained by the caller until the
60 /// last use.
61 ///
62 /// Note that some elements may span multiple lines. If a line ends with the
63 /// start of one of these elements, then no nodes will be produced until the
64 /// either the end or something that cannot be part of an element is
65 /// encountered. This may only occur after multiple calls to parseLine(),
66 /// corresponding to the lines of the multi-line element.
67 void parseLine(StringRef Line);
68
69 /// Inform the parser of that the input stream has ended.
70 ///
71 /// This allows the parser to finish any deferred processing (e.g., an
72 /// in-progress multi-line element) and may cause nextNode() to return
73 /// additional nodes.
74 void flush();
75
76 /// Returns the next node in the input sequence.
77 ///
78 /// Calling nextNode() may invalidate the contents of the node returned by the
79 /// previous call.
80 ///
81 /// \returns the next markup node or std::nullopt if none remain.
82 std::optional<MarkupNode> nextNode();
83
84 bool isSGR(const MarkupNode &Node) const {
85 return SGRSyntax.match(String: Node.Text);
86 }
87
88private:
89 std::optional<MarkupNode> parseElement(StringRef Line);
90 void parseTextOutsideMarkup(StringRef Text);
91 std::optional<StringRef> parseMultiLineBegin(StringRef Line);
92 std::optional<StringRef> parseMultiLineEnd(StringRef Line);
93
94 // Tags of elements that can span multiple lines.
95 const StringSet<> MultilineTags;
96
97 // Contents of a multi-line element that has finished being parsed. Retained
98 // to keep returned StringRefs for the contents valid.
99 std::string FinishedMultiline;
100
101 // Contents of a multi-line element that is still in the process of receiving
102 // lines.
103 std::string InProgressMultiline;
104
105 // The line currently being parsed.
106 StringRef Line;
107
108 // Buffer for nodes parsed from the current line.
109 SmallVector<MarkupNode> Buffer;
110
111 // Next buffer index to return.
112 size_t NextIdx;
113
114 // Regular expression matching supported ANSI SGR escape sequences.
115 const Regex SGRSyntax;
116};
117
118} // end namespace symbolize
119} // end namespace llvm
120
121#endif // LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
122

source code of llvm/include/llvm/DebugInfo/Symbolize/Markup.h