1//===-- Automaton.h - Support for driving TableGen-produced DFAs ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements class that drive and introspect deterministic finite-
10// state automata (DFAs) as generated by TableGen's -gen-automata backend.
11//
12// For a description of how to define an automaton, see
13// include/llvm/TableGen/Automaton.td.
14//
15// One important detail is that these deterministic automata are created from
16// (potentially) nondeterministic definitions. Therefore a unique sequence of
17// input symbols will produce one path through the DFA but multiple paths
18// through the original NFA. An automaton by default only returns "accepted" or
19// "not accepted", but frequently we want to analyze what NFA path was taken.
20// Finding a path through the NFA states that results in a DFA state can help
21// answer *what* the solution to a problem was, not just that there exists a
22// solution.
23//
24//===----------------------------------------------------------------------===//
25
26#ifndef LLVM_SUPPORT_AUTOMATON_H
27#define LLVM_SUPPORT_AUTOMATON_H
28
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/SmallVector.h"
32#include "llvm/Support/Allocator.h"
33#include <deque>
34#include <map>
35#include <memory>
36
37namespace llvm {
38
39using NfaPath = SmallVector<uint64_t, 4>;
40
41/// Forward define the pair type used by the automata transition info tables.
42///
43/// Experimental results with large tables have shown a significant (multiple
44/// orders of magnitude) parsing speedup by using a custom struct here with a
45/// trivial constructor rather than std::pair<uint64_t, uint64_t>.
46struct NfaStatePair {
47 uint64_t FromDfaState, ToDfaState;
48
49 bool operator<(const NfaStatePair &Other) const {
50 return std::make_tuple(args: FromDfaState, args: ToDfaState) <
51 std::make_tuple(args: Other.FromDfaState, args: Other.ToDfaState);
52 }
53};
54
55namespace internal {
56/// The internal class that maintains all possible paths through an NFA based
57/// on a path through the DFA.
58class NfaTranscriber {
59private:
60 /// Cached transition table. This is a table of NfaStatePairs that contains
61 /// zero-terminated sequences pointed to by DFA transitions.
62 ArrayRef<NfaStatePair> TransitionInfo;
63
64 /// A simple linked-list of traversed states that can have a shared tail. The
65 /// traversed path is stored in reverse order with the latest state as the
66 /// head.
67 struct PathSegment {
68 uint64_t State;
69 PathSegment *Tail;
70 };
71
72 /// We allocate segment objects frequently. Allocate them upfront and dispose
73 /// at the end of a traversal rather than hammering the system allocator.
74 SpecificBumpPtrAllocator<PathSegment> Allocator;
75
76 /// Heads of each tracked path. These are not ordered.
77 std::deque<PathSegment *> Heads;
78
79 /// The returned paths. This is populated during getPaths.
80 SmallVector<NfaPath, 4> Paths;
81
82 /// Create a new segment and return it.
83 PathSegment *makePathSegment(uint64_t State, PathSegment *Tail) {
84 PathSegment *P = Allocator.Allocate();
85 *P = {.State: State, .Tail: Tail};
86 return P;
87 }
88
89 /// Pairs defines a sequence of possible NFA transitions for a single DFA
90 /// transition.
91 void transition(ArrayRef<NfaStatePair> Pairs) {
92 // Iterate over all existing heads. We will mutate the Heads deque during
93 // iteration.
94 unsigned NumHeads = Heads.size();
95 for (unsigned I = 0; I < NumHeads; ++I) {
96 PathSegment *Head = Heads[I];
97 // The sequence of pairs is sorted. Select the set of pairs that
98 // transition from the current head state.
99 auto PI = lower_bound(Range&: Pairs, Value: NfaStatePair{.FromDfaState: Head->State, .ToDfaState: 0ULL});
100 auto PE = upper_bound(Range&: Pairs, Value: NfaStatePair{.FromDfaState: Head->State, INT64_MAX});
101 // For every transition from the current head state, add a new path
102 // segment.
103 for (; PI != PE; ++PI)
104 if (PI->FromDfaState == Head->State)
105 Heads.push_back(x: makePathSegment(State: PI->ToDfaState, Tail: Head));
106 }
107 // Now we've iterated over all the initial heads and added new ones,
108 // dispose of the original heads.
109 Heads.erase(first: Heads.begin(), last: std::next(x: Heads.begin(), n: NumHeads));
110 }
111
112public:
113 NfaTranscriber(ArrayRef<NfaStatePair> TransitionInfo)
114 : TransitionInfo(TransitionInfo) {
115 reset();
116 }
117
118 ArrayRef<NfaStatePair> getTransitionInfo() const {
119 return TransitionInfo;
120 }
121
122 void reset() {
123 Paths.clear();
124 Heads.clear();
125 Allocator.DestroyAll();
126 // The initial NFA state is 0.
127 Heads.push_back(x: makePathSegment(State: 0ULL, Tail: nullptr));
128 }
129
130 void transition(unsigned TransitionInfoIdx) {
131 unsigned EndIdx = TransitionInfoIdx;
132 while (TransitionInfo[EndIdx].ToDfaState != 0)
133 ++EndIdx;
134 ArrayRef<NfaStatePair> Pairs(&TransitionInfo[TransitionInfoIdx],
135 EndIdx - TransitionInfoIdx);
136 transition(Pairs);
137 }
138
139 ArrayRef<NfaPath> getPaths() {
140 Paths.clear();
141 for (auto *Head : Heads) {
142 NfaPath P;
143 while (Head->State != 0) {
144 P.push_back(Elt: Head->State);
145 Head = Head->Tail;
146 }
147 std::reverse(first: P.begin(), last: P.end());
148 Paths.push_back(Elt: std::move(P));
149 }
150 return Paths;
151 }
152};
153} // namespace internal
154
155/// A deterministic finite-state automaton. The automaton is defined in
156/// TableGen; this object drives an automaton defined by tblgen-emitted tables.
157///
158/// An automaton accepts a sequence of input tokens ("actions"). This class is
159/// templated on the type of these actions.
160template <typename ActionT> class Automaton {
161 /// Map from {State, Action} to {NewState, TransitionInfoIdx}.
162 /// TransitionInfoIdx is used by the DfaTranscriber to analyze the transition.
163 /// FIXME: This uses a std::map because ActionT can be a pair type including
164 /// an enum. In particular DenseMapInfo<ActionT> must be defined to use
165 /// DenseMap here.
166 /// This is a shared_ptr to allow very quick copy-construction of Automata; this
167 /// state is immutable after construction so this is safe.
168 using MapTy = std::map<std::pair<uint64_t, ActionT>, std::pair<uint64_t, unsigned>>;
169 std::shared_ptr<MapTy> M;
170 /// An optional transcription object. This uses much more state than simply
171 /// traversing the DFA for acceptance, so is heap allocated.
172 std::shared_ptr<internal::NfaTranscriber> Transcriber;
173 /// The initial DFA state is 1.
174 uint64_t State = 1;
175 /// True if we should transcribe and false if not (even if Transcriber is defined).
176 bool Transcribe;
177
178public:
179 /// Create an automaton.
180 /// \param Transitions The Transitions table as created by TableGen. Note that
181 /// because the action type differs per automaton, the
182 /// table type is templated as ArrayRef<InfoT>.
183 /// \param TranscriptionTable The TransitionInfo table as created by TableGen.
184 ///
185 /// Providing the TranscriptionTable argument as non-empty will enable the
186 /// use of transcription, which analyzes the possible paths in the original
187 /// NFA taken by the DFA. NOTE: This is substantially more work than simply
188 /// driving the DFA, so unless you require the getPaths() method leave this
189 /// empty.
190 template <typename InfoT>
191 Automaton(ArrayRef<InfoT> Transitions,
192 ArrayRef<NfaStatePair> TranscriptionTable = {}) {
193 if (!TranscriptionTable.empty())
194 Transcriber =
195 std::make_shared<internal::NfaTranscriber>(args&: TranscriptionTable);
196 Transcribe = Transcriber != nullptr;
197 M = std::make_shared<MapTy>();
198 for (const auto &I : Transitions)
199 // Greedily read and cache the transition table.
200 M->emplace(std::make_pair(I.FromDfaState, I.Action),
201 std::make_pair(I.ToDfaState, I.InfoIdx));
202 }
203 Automaton(const Automaton &Other)
204 : M(Other.M), State(Other.State), Transcribe(Other.Transcribe) {
205 // Transcriber is not thread-safe, so create a new instance on copy.
206 if (Other.Transcriber)
207 Transcriber = std::make_shared<internal::NfaTranscriber>(
208 Other.Transcriber->getTransitionInfo());
209 }
210
211 /// Reset the automaton to its initial state.
212 void reset() {
213 State = 1;
214 if (Transcriber)
215 Transcriber->reset();
216 }
217
218 /// Enable or disable transcription. Transcription is only available if
219 /// TranscriptionTable was provided to the constructor.
220 void enableTranscription(bool Enable = true) {
221 assert(Transcriber &&
222 "Transcription is only available if TranscriptionTable was provided "
223 "to the Automaton constructor");
224 Transcribe = Enable;
225 }
226
227 /// Transition the automaton based on input symbol A. Return true if the
228 /// automaton transitioned to a valid state, false if the automaton
229 /// transitioned to an invalid state.
230 ///
231 /// If this function returns false, all methods are undefined until reset() is
232 /// called.
233 bool add(const ActionT &A) {
234 auto I = M->find({State, A});
235 if (I == M->end())
236 return false;
237 if (Transcriber && Transcribe)
238 Transcriber->transition(I->second.second);
239 State = I->second.first;
240 return true;
241 }
242
243 /// Return true if the automaton can be transitioned based on input symbol A.
244 bool canAdd(const ActionT &A) {
245 auto I = M->find({State, A});
246 return I != M->end();
247 }
248
249 /// Obtain a set of possible paths through the input nondeterministic
250 /// automaton that could be obtained from the sequence of input actions
251 /// presented to this deterministic automaton.
252 ArrayRef<NfaPath> getNfaPaths() {
253 assert(Transcriber && Transcribe &&
254 "Can only obtain NFA paths if transcribing!");
255 return Transcriber->getPaths();
256 }
257};
258
259} // namespace llvm
260
261#endif // LLVM_SUPPORT_AUTOMATON_H
262

source code of llvm/include/llvm/Support/Automaton.h