1//===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This header defines the BitstreamReader class. This class can be used to
10// read an arbitrary bitstream, regardless of its contents.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_BITSTREAM_BITSTREAMREADER_H
15#define LLVM_BITSTREAM_BITSTREAMREADER_H
16
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/Bitstream/BitCodes.h"
20#include "llvm/Support/Endian.h"
21#include "llvm/Support/Error.h"
22#include "llvm/Support/MemoryBufferRef.h"
23#include <algorithm>
24#include <cassert>
25#include <climits>
26#include <cstddef>
27#include <cstdint>
28#include <memory>
29#include <optional>
30#include <string>
31#include <utility>
32#include <vector>
33
34namespace llvm {
35
36/// This class maintains the abbreviations read from a block info block.
37class BitstreamBlockInfo {
38public:
39 /// This contains information emitted to BLOCKINFO_BLOCK blocks. These
40 /// describe abbreviations that all blocks of the specified ID inherit.
41 struct BlockInfo {
42 unsigned BlockID = 0;
43 std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs;
44 std::string Name;
45 std::vector<std::pair<unsigned, std::string>> RecordNames;
46 };
47
48private:
49 std::vector<BlockInfo> BlockInfoRecords;
50
51public:
52 /// If there is block info for the specified ID, return it, otherwise return
53 /// null.
54 const BlockInfo *getBlockInfo(unsigned BlockID) const {
55 // Common case, the most recent entry matches BlockID.
56 if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
57 return &BlockInfoRecords.back();
58
59 for (const BlockInfo &BI : BlockInfoRecords)
60 if (BI.BlockID == BlockID)
61 return &BI;
62 return nullptr;
63 }
64
65 BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
66 if (const BlockInfo *BI = getBlockInfo(BlockID))
67 return *const_cast<BlockInfo*>(BI);
68
69 // Otherwise, add a new record.
70 BlockInfoRecords.emplace_back();
71 BlockInfoRecords.back().BlockID = BlockID;
72 return BlockInfoRecords.back();
73 }
74};
75
76/// This represents a position within a bitstream. There may be multiple
77/// independent cursors reading within one bitstream, each maintaining their
78/// own local state.
79class SimpleBitstreamCursor {
80 ArrayRef<uint8_t> BitcodeBytes;
81 size_t NextChar = 0;
82
83public:
84 /// This is the current data we have pulled from the stream but have not
85 /// returned to the client. This is specifically and intentionally defined to
86 /// follow the word size of the host machine for efficiency. We use word_t in
87 /// places that are aware of this to make it perfectly explicit what is going
88 /// on.
89 using word_t = size_t;
90
91private:
92 word_t CurWord = 0;
93
94 /// This is the number of bits in CurWord that are valid. This is always from
95 /// [0...bits_of(size_t)-1] inclusive.
96 unsigned BitsInCurWord = 0;
97
98public:
99 SimpleBitstreamCursor() = default;
100 explicit SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
101 : BitcodeBytes(BitcodeBytes) {}
102 explicit SimpleBitstreamCursor(StringRef BitcodeBytes)
103 : BitcodeBytes(arrayRefFromStringRef(Input: BitcodeBytes)) {}
104 explicit SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes)
105 : SimpleBitstreamCursor(BitcodeBytes.getBuffer()) {}
106
107 bool canSkipToPos(size_t pos) const {
108 // pos can be skipped to if it is a valid address or one byte past the end.
109 return pos <= BitcodeBytes.size();
110 }
111
112 bool AtEndOfStream() {
113 return BitsInCurWord == 0 && BitcodeBytes.size() <= NextChar;
114 }
115
116 /// Return the bit # of the bit we are reading.
117 uint64_t GetCurrentBitNo() const {
118 return uint64_t(NextChar)*CHAR_BIT - BitsInCurWord;
119 }
120
121 // Return the byte # of the current bit.
122 uint64_t getCurrentByteNo() const { return GetCurrentBitNo() / 8; }
123
124 ArrayRef<uint8_t> getBitcodeBytes() const { return BitcodeBytes; }
125
126 /// Reset the stream to the specified bit number.
127 Error JumpToBit(uint64_t BitNo) {
128 size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1);
129 unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
130 assert(canSkipToPos(ByteNo) && "Invalid location");
131
132 // Move the cursor to the right word.
133 NextChar = ByteNo;
134 BitsInCurWord = 0;
135
136 // Skip over any bits that are already consumed.
137 if (WordBitNo) {
138 if (Expected<word_t> Res = Read(NumBits: WordBitNo))
139 return Error::success();
140 else
141 return Res.takeError();
142 }
143
144 return Error::success();
145 }
146
147 /// Get a pointer into the bitstream at the specified byte offset.
148 const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) {
149 return BitcodeBytes.data() + ByteNo;
150 }
151
152 /// Get a pointer into the bitstream at the specified bit offset.
153 ///
154 /// The bit offset must be on a byte boundary.
155 const uint8_t *getPointerToBit(uint64_t BitNo, uint64_t NumBytes) {
156 assert(!(BitNo % 8) && "Expected bit on byte boundary");
157 return getPointerToByte(ByteNo: BitNo / 8, NumBytes);
158 }
159
160 Error fillCurWord() {
161 if (NextChar >= BitcodeBytes.size())
162 return createStringError(EC: std::errc::io_error,
163 Fmt: "Unexpected end of file reading %u of %u bytes",
164 Vals: NextChar, Vals: BitcodeBytes.size());
165
166 // Read the next word from the stream.
167 const uint8_t *NextCharPtr = BitcodeBytes.data() + NextChar;
168 unsigned BytesRead;
169 if (BitcodeBytes.size() >= NextChar + sizeof(word_t)) {
170 BytesRead = sizeof(word_t);
171 CurWord =
172 support::endian::read<word_t, llvm::endianness::little>(P: NextCharPtr);
173 } else {
174 // Short read.
175 BytesRead = BitcodeBytes.size() - NextChar;
176 CurWord = 0;
177 for (unsigned B = 0; B != BytesRead; ++B)
178 CurWord |= uint64_t(NextCharPtr[B]) << (B * 8);
179 }
180 NextChar += BytesRead;
181 BitsInCurWord = BytesRead * 8;
182 return Error::success();
183 }
184
185 Expected<word_t> Read(unsigned NumBits) {
186 static const unsigned BitsInWord = sizeof(word_t) * 8;
187
188 assert(NumBits && NumBits <= BitsInWord &&
189 "Cannot return zero or more than BitsInWord bits!");
190
191 static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f;
192
193 // If the field is fully contained by CurWord, return it quickly.
194 if (BitsInCurWord >= NumBits) {
195 word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits));
196
197 // Use a mask to avoid undefined behavior.
198 CurWord >>= (NumBits & Mask);
199
200 BitsInCurWord -= NumBits;
201 return R;
202 }
203
204 word_t R = BitsInCurWord ? CurWord : 0;
205 unsigned BitsLeft = NumBits - BitsInCurWord;
206
207 if (Error fillResult = fillCurWord())
208 return std::move(fillResult);
209
210 // If we run out of data, abort.
211 if (BitsLeft > BitsInCurWord)
212 return createStringError(EC: std::errc::io_error,
213 Fmt: "Unexpected end of file reading %u of %u bits",
214 Vals: BitsInCurWord, Vals: BitsLeft);
215
216 word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft));
217
218 // Use a mask to avoid undefined behavior.
219 CurWord >>= (BitsLeft & Mask);
220
221 BitsInCurWord -= BitsLeft;
222
223 R |= R2 << (NumBits - BitsLeft);
224
225 return R;
226 }
227
228 Expected<uint32_t> ReadVBR(const unsigned NumBits) {
229 Expected<unsigned> MaybeRead = Read(NumBits);
230 if (!MaybeRead)
231 return MaybeRead;
232 uint32_t Piece = MaybeRead.get();
233
234 assert(NumBits <= 32 && NumBits >= 1 && "Invalid NumBits value");
235 const uint32_t MaskBitOrder = (NumBits - 1);
236 const uint32_t Mask = 1UL << MaskBitOrder;
237
238 if ((Piece & Mask) == 0)
239 return Piece;
240
241 uint32_t Result = 0;
242 unsigned NextBit = 0;
243 while (true) {
244 Result |= (Piece & (Mask - 1)) << NextBit;
245
246 if ((Piece & Mask) == 0)
247 return Result;
248
249 NextBit += NumBits-1;
250 if (NextBit >= 32)
251 return createStringError(EC: std::errc::illegal_byte_sequence,
252 Fmt: "Unterminated VBR");
253
254 MaybeRead = Read(NumBits);
255 if (!MaybeRead)
256 return MaybeRead;
257 Piece = MaybeRead.get();
258 }
259 }
260
261 // Read a VBR that may have a value up to 64-bits in size. The chunk size of
262 // the VBR must still be <= 32 bits though.
263 Expected<uint64_t> ReadVBR64(const unsigned NumBits) {
264 Expected<uint64_t> MaybeRead = Read(NumBits);
265 if (!MaybeRead)
266 return MaybeRead;
267 uint32_t Piece = MaybeRead.get();
268 assert(NumBits <= 32 && NumBits >= 1 && "Invalid NumBits value");
269 const uint32_t MaskBitOrder = (NumBits - 1);
270 const uint32_t Mask = 1UL << MaskBitOrder;
271
272 if ((Piece & Mask) == 0)
273 return uint64_t(Piece);
274
275 uint64_t Result = 0;
276 unsigned NextBit = 0;
277 while (true) {
278 Result |= uint64_t(Piece & (Mask - 1)) << NextBit;
279
280 if ((Piece & Mask) == 0)
281 return Result;
282
283 NextBit += NumBits-1;
284 if (NextBit >= 64)
285 return createStringError(EC: std::errc::illegal_byte_sequence,
286 Fmt: "Unterminated VBR");
287
288 MaybeRead = Read(NumBits);
289 if (!MaybeRead)
290 return MaybeRead;
291 Piece = MaybeRead.get();
292 }
293 }
294
295 void SkipToFourByteBoundary() {
296 // If word_t is 64-bits and if we've read less than 32 bits, just dump
297 // the bits we have up to the next 32-bit boundary.
298 if (sizeof(word_t) > 4 &&
299 BitsInCurWord >= 32) {
300 CurWord >>= BitsInCurWord-32;
301 BitsInCurWord = 32;
302 return;
303 }
304
305 BitsInCurWord = 0;
306 }
307
308 /// Return the size of the stream in bytes.
309 size_t SizeInBytes() const { return BitcodeBytes.size(); }
310
311 /// Skip to the end of the file.
312 void skipToEnd() { NextChar = BitcodeBytes.size(); }
313
314 /// Check whether a reservation of Size elements is plausible.
315 bool isSizePlausible(size_t Size) const {
316 // Don't allow reserving more elements than the number of bits, assuming
317 // at least one bit is needed to encode an element.
318 return Size < BitcodeBytes.size() * 8;
319 }
320};
321
322/// When advancing through a bitstream cursor, each advance can discover a few
323/// different kinds of entries:
324struct BitstreamEntry {
325 enum {
326 Error, // Malformed bitcode was found.
327 EndBlock, // We've reached the end of the current block, (or the end of the
328 // file, which is treated like a series of EndBlock records.
329 SubBlock, // This is the start of a new subblock of a specific ID.
330 Record // This is a record with a specific AbbrevID.
331 } Kind;
332
333 unsigned ID;
334
335 static BitstreamEntry getError() {
336 BitstreamEntry E; E.Kind = Error; return E;
337 }
338
339 static BitstreamEntry getEndBlock() {
340 BitstreamEntry E; E.Kind = EndBlock; return E;
341 }
342
343 static BitstreamEntry getSubBlock(unsigned ID) {
344 BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
345 }
346
347 static BitstreamEntry getRecord(unsigned AbbrevID) {
348 BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
349 }
350};
351
352/// This represents a position within a bitcode file, implemented on top of a
353/// SimpleBitstreamCursor.
354///
355/// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
356/// be passed by value.
357class BitstreamCursor : SimpleBitstreamCursor {
358 // This is the declared size of code values used for the current block, in
359 // bits.
360 unsigned CurCodeSize = 2;
361
362 /// Abbrevs installed at in this block.
363 std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs;
364
365 struct Block {
366 unsigned PrevCodeSize;
367 std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs;
368
369 explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
370 };
371
372 /// This tracks the codesize of parent blocks.
373 SmallVector<Block, 8> BlockScope;
374
375 BitstreamBlockInfo *BlockInfo = nullptr;
376
377public:
378 static const size_t MaxChunkSize = 32;
379
380 BitstreamCursor() = default;
381 explicit BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
382 : SimpleBitstreamCursor(BitcodeBytes) {}
383 explicit BitstreamCursor(StringRef BitcodeBytes)
384 : SimpleBitstreamCursor(BitcodeBytes) {}
385 explicit BitstreamCursor(MemoryBufferRef BitcodeBytes)
386 : SimpleBitstreamCursor(BitcodeBytes) {}
387
388 using SimpleBitstreamCursor::AtEndOfStream;
389 using SimpleBitstreamCursor::canSkipToPos;
390 using SimpleBitstreamCursor::fillCurWord;
391 using SimpleBitstreamCursor::getBitcodeBytes;
392 using SimpleBitstreamCursor::GetCurrentBitNo;
393 using SimpleBitstreamCursor::getCurrentByteNo;
394 using SimpleBitstreamCursor::getPointerToByte;
395 using SimpleBitstreamCursor::JumpToBit;
396 using SimpleBitstreamCursor::Read;
397 using SimpleBitstreamCursor::ReadVBR;
398 using SimpleBitstreamCursor::ReadVBR64;
399 using SimpleBitstreamCursor::SizeInBytes;
400 using SimpleBitstreamCursor::skipToEnd;
401
402 /// Return the number of bits used to encode an abbrev #.
403 unsigned getAbbrevIDWidth() const { return CurCodeSize; }
404
405 /// Flags that modify the behavior of advance().
406 enum {
407 /// If this flag is used, the advance() method does not automatically pop
408 /// the block scope when the end of a block is reached.
409 AF_DontPopBlockAtEnd = 1,
410
411 /// If this flag is used, abbrev entries are returned just like normal
412 /// records.
413 AF_DontAutoprocessAbbrevs = 2
414 };
415
416 /// Advance the current bitstream, returning the next entry in the stream.
417 Expected<BitstreamEntry> advance(unsigned Flags = 0) {
418 while (true) {
419 if (AtEndOfStream())
420 return BitstreamEntry::getError();
421
422 Expected<unsigned> MaybeCode = ReadCode();
423 if (!MaybeCode)
424 return MaybeCode.takeError();
425 unsigned Code = MaybeCode.get();
426
427 if (Code == bitc::END_BLOCK) {
428 // Pop the end of the block unless Flags tells us not to.
429 if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
430 return BitstreamEntry::getError();
431 return BitstreamEntry::getEndBlock();
432 }
433
434 if (Code == bitc::ENTER_SUBBLOCK) {
435 if (Expected<unsigned> MaybeSubBlock = ReadSubBlockID())
436 return BitstreamEntry::getSubBlock(ID: MaybeSubBlock.get());
437 else
438 return MaybeSubBlock.takeError();
439 }
440
441 if (Code == bitc::DEFINE_ABBREV &&
442 !(Flags & AF_DontAutoprocessAbbrevs)) {
443 // We read and accumulate abbrev's, the client can't do anything with
444 // them anyway.
445 if (Error Err = ReadAbbrevRecord())
446 return std::move(Err);
447 continue;
448 }
449
450 return BitstreamEntry::getRecord(AbbrevID: Code);
451 }
452 }
453
454 /// This is a convenience function for clients that don't expect any
455 /// subblocks. This just skips over them automatically.
456 Expected<BitstreamEntry> advanceSkippingSubblocks(unsigned Flags = 0) {
457 while (true) {
458 // If we found a normal entry, return it.
459 Expected<BitstreamEntry> MaybeEntry = advance(Flags);
460 if (!MaybeEntry)
461 return MaybeEntry;
462 BitstreamEntry Entry = MaybeEntry.get();
463
464 if (Entry.Kind != BitstreamEntry::SubBlock)
465 return Entry;
466
467 // If we found a sub-block, just skip over it and check the next entry.
468 if (Error Err = SkipBlock())
469 return std::move(Err);
470 }
471 }
472
473 Expected<unsigned> ReadCode() { return Read(NumBits: CurCodeSize); }
474
475 // Block header:
476 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
477
478 /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block.
479 Expected<unsigned> ReadSubBlockID() { return ReadVBR(NumBits: bitc::BlockIDWidth); }
480
481 /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body
482 /// of this block.
483 Error SkipBlock() {
484 // Read and ignore the codelen value.
485 if (Expected<uint32_t> Res = ReadVBR(NumBits: bitc::CodeLenWidth))
486 ; // Since we are skipping this block, we don't care what code widths are
487 // used inside of it.
488 else
489 return Res.takeError();
490
491 SkipToFourByteBoundary();
492 Expected<unsigned> MaybeNum = Read(NumBits: bitc::BlockSizeWidth);
493 if (!MaybeNum)
494 return MaybeNum.takeError();
495 size_t NumFourBytes = MaybeNum.get();
496
497 // Check that the block wasn't partially defined, and that the offset isn't
498 // bogus.
499 size_t SkipTo = GetCurrentBitNo() + NumFourBytes * 4 * 8;
500 if (AtEndOfStream())
501 return createStringError(EC: std::errc::illegal_byte_sequence,
502 Fmt: "can't skip block: already at end of stream");
503 if (!canSkipToPos(pos: SkipTo / 8))
504 return createStringError(EC: std::errc::illegal_byte_sequence,
505 Fmt: "can't skip to bit %zu from %" PRIu64, Vals: SkipTo,
506 Vals: GetCurrentBitNo());
507
508 if (Error Res = JumpToBit(BitNo: SkipTo))
509 return Res;
510
511 return Error::success();
512 }
513
514 /// Having read the ENTER_SUBBLOCK abbrevid, and enter the block.
515 Error EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr);
516
517 bool ReadBlockEnd() {
518 if (BlockScope.empty()) return true;
519
520 // Block tail:
521 // [END_BLOCK, <align4bytes>]
522 SkipToFourByteBoundary();
523
524 popBlockScope();
525 return false;
526 }
527
528private:
529 void popBlockScope() {
530 CurCodeSize = BlockScope.back().PrevCodeSize;
531
532 CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs);
533 BlockScope.pop_back();
534 }
535
536 //===--------------------------------------------------------------------===//
537 // Record Processing
538 //===--------------------------------------------------------------------===//
539
540public:
541 /// Return the abbreviation for the specified AbbrevId.
542 Expected<const BitCodeAbbrev *> getAbbrev(unsigned AbbrevID) {
543 unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV;
544 if (AbbrevNo >= CurAbbrevs.size())
545 return createStringError(
546 EC: std::errc::illegal_byte_sequence, Fmt: "Invalid abbrev number");
547 return CurAbbrevs[AbbrevNo].get();
548 }
549
550 /// Read the current record and discard it, returning the code for the record.
551 Expected<unsigned> skipRecord(unsigned AbbrevID);
552
553 Expected<unsigned> readRecord(unsigned AbbrevID,
554 SmallVectorImpl<uint64_t> &Vals,
555 StringRef *Blob = nullptr);
556
557 //===--------------------------------------------------------------------===//
558 // Abbrev Processing
559 //===--------------------------------------------------------------------===//
560 Error ReadAbbrevRecord();
561
562 /// Read and return a block info block from the bitstream. If an error was
563 /// encountered, return std::nullopt.
564 ///
565 /// \param ReadBlockInfoNames Whether to read block/record name information in
566 /// the BlockInfo block. Only llvm-bcanalyzer uses this.
567 Expected<std::optional<BitstreamBlockInfo>>
568 ReadBlockInfoBlock(bool ReadBlockInfoNames = false);
569
570 /// Set the block info to be used by this BitstreamCursor to interpret
571 /// abbreviated records.
572 void setBlockInfo(BitstreamBlockInfo *BI) { BlockInfo = BI; }
573};
574
575} // end llvm namespace
576
577#endif // LLVM_BITSTREAM_BITSTREAMREADER_H
578

source code of llvm/include/llvm/Bitstream/BitstreamReader.h