BitstreamReader.h source code [llvm/include/llvm/Bitstream/BitstreamReader.h]

1	//===- BitstreamReader.h - Low-level bitstream reader interface -- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This header defines the BitstreamReader class. This class can be used to
10	// read an arbitrary bitstream, regardless of its contents.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#ifndef LLVM_BITSTREAM_BITSTREAMREADER_H
15	#define LLVM_BITSTREAM_BITSTREAMREADER_H
16
17	#include "llvm/ADT/ArrayRef.h"
18	#include "llvm/ADT/SmallVector.h"
19	#include "llvm/Bitstream/BitCodes.h"
20	#include "llvm/Support/Endian.h"
21	#include "llvm/Support/Error.h"
22	#include "llvm/Support/MemoryBufferRef.h"
23	#include <algorithm>
24	#include <cassert>
25	#include <climits>
26	#include <cstddef>
27	#include <cstdint>
28	#include <memory>
29	#include <optional>
30	#include <string>
31	#include <utility>
32	#include <vector>
33
34	namespace llvm {
35
36	/// This class maintains the abbreviations read from a block info block.
37	class BitstreamBlockInfo {
38	public:
39	/// This contains information emitted to BLOCKINFO_BLOCK blocks. These
40	/// describe abbreviations that all blocks of the specified ID inherit.
41	struct BlockInfo {
42	unsigned BlockID = `0`;
43	std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs;
44	std::string Name;
45	std::vector<std::pair<unsigned, std::string>> RecordNames;
46	};
47
48	private:
49	std::vector<BlockInfo> BlockInfoRecords;
50
51	public:
52	/// If there is block info for the specified ID, return it, otherwise return
53	/// null.
54	const BlockInfo getBlockInfo(unsigned* BlockID) const {
55	// Common case, the most recent entry matches BlockID.
56	if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
57	return &BlockInfoRecords.back();
58
59	for (const BlockInfo &BI : BlockInfoRecords)
60	if (BI.BlockID == BlockID)
61	return &BI;
62	return nullptr;
63	}
64
65	BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
66	if (const BlockInfo *BI = getBlockInfo(BlockID))
67	return *const_cast<BlockInfo*>(BI);
68
69	// Otherwise, add a new record.
70	BlockInfoRecords.emplace_back();
71	BlockInfoRecords.back().BlockID = BlockID;
72	return BlockInfoRecords.back();
73	}
74	};
75
76	/// This represents a position within a bitstream. There may be multiple
77	/// independent cursors reading within one bitstream, each maintaining their
78	/// own local state.
79	class SimpleBitstreamCursor {
80	ArrayRef<uint8_t> BitcodeBytes;
81	size_t NextChar = `0`;
82
83	public:
84	/// This is the current data we have pulled from the stream but have not
85	/// returned to the client. This is specifically and intentionally defined to
86	/// follow the word size of the host machine for efficiency. We use word_t in
87	/// places that are aware of this to make it perfectly explicit what is going
88	/// on.
89	using word_t = size_t;
90
91	private:
92	word_t CurWord = `0`;
93
94	/// This is the number of bits in CurWord that are valid. This is always from
95	/// [0...bits_of(size_t)-1] inclusive.
96	unsigned BitsInCurWord = `0`;
97
98	public:
99	SimpleBitstreamCursor() = default;
100	explicit SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
101	: BitcodeBytes (BitcodeBytes) {}
102	explicit SimpleBitstreamCursor(StringRef BitcodeBytes)
103	: BitcodeBytes(arrayRefFromStringRef(Input: BitcodeBytes)) {}
104	explicit SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes)
105	: SimpleBitstreamCursor (BitcodeBytes.getBuffer()) {}
106
107	bool canSkipToPos(size_t pos) const {
108	// pos can be skipped to if it is a valid address or one byte past the end.
109	return pos <= BitcodeBytes.size();
110	}
111
112	bool AtEndOfStream() {
113	return BitsInCurWord == `0` && BitcodeBytes.size() <= NextChar;
114	}
115
116	/// Return the bit # of the bit we are reading.
117	uint64_t GetCurrentBitNo() const {
118	return uint64_t(NextChar)*CHAR_BIT - BitsInCurWord;
119	}
120
121	// Return the byte # of the current bit.
122	uint64_t getCurrentByteNo() const { return GetCurrentBitNo() / `8`; }
123
124	ArrayRef<uint8_t> getBitcodeBytes() const { return BitcodeBytes; }
125
126	/// Reset the stream to the specified bit number.
127	Error JumpToBit(uint64_t BitNo) {
128	size_t ByteNo = size_t(BitNo/`8`) & ~(sizeof(word_t)-`1`);
129	unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*`8`-`1`));
130	assert(canSkipToPos(ByteNo) && "Invalid location");
131
132	// Move the cursor to the right word.
133	NextChar = ByteNo;
134	BitsInCurWord = `0`;
135
136	// Skip over any bits that are already consumed.
137	if (WordBitNo) {
138	if (Expected<word_t> Res = Read(NumBits: WordBitNo))
139	return Error::success();
140	else
141	return Res.takeError();
142	}
143
144	return Error::success();
145	}
146
147	/// Get a pointer into the bitstream at the specified byte offset.
148	const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) {
149	return BitcodeBytes.data() + ByteNo;
150	}
151
152	/// Get a pointer into the bitstream at the specified bit offset.
153	///
154	/// The bit offset must be on a byte boundary.
155	const uint8_t *getPointerToBit(uint64_t BitNo, uint64_t NumBytes) {
156	assert(!(BitNo % `8`) && "Expected bit on byte boundary");
157	return getPointerToByte(ByteNo: BitNo / `8`, NumBytes);
158	}
159
160	Error fillCurWord() {
161	if (NextChar >= BitcodeBytes.size())
162	return createStringError(EC: std::errc::io_error,
163	Fmt: "Unexpected end of file reading %u of %u bytes",
164	Vals: NextChar, Vals: BitcodeBytes.size());
165
166	// Read the next word from the stream.
167	const uint8_t *NextCharPtr = BitcodeBytes.data() + NextChar;
168	unsigned BytesRead;
169	if (BitcodeBytes.size() >= NextChar + sizeof(word_t)) {
170	BytesRead = sizeof(word_t);
171	CurWord =
172	support::endian::read<word_t, llvm::endianness::little>(P: NextCharPtr);
173	} else {
174	// Short read.
175	BytesRead = BitcodeBytes.size() - NextChar;
176	CurWord = `0`;
177	for (unsigned B = `0`; B != BytesRead; ++B)
178	CurWord \|= uint64_t(NextCharPtr[B]) << (B * `8`);
179	}
180	NextChar += BytesRead;
181	BitsInCurWord = BytesRead * `8`;
182	return Error::success();
183	}
184
185	Expected<word_t> Read(unsigned NumBits) {
186	static const unsigned BitsInWord = sizeof(word_t) * `8`;
187
188	assert(NumBits && NumBits <= BitsInWord &&
189	"Cannot return zero or more than BitsInWord bits!");
190
191	static const unsigned Mask = sizeof(word_t) > `4` ? `0x3f` : `0x1f`;
192
193	// If the field is fully contained by CurWord, return it quickly.
194	if (BitsInCurWord >= NumBits) {
195	word_t R = CurWord & (~word_t(`0`) >> (BitsInWord - NumBits));
196
197	// Use a mask to avoid undefined behavior.
198	CurWord >>= (NumBits & Mask);
199
200	BitsInCurWord -= NumBits;
201	return R;
202	}
203
204	word_t R = BitsInCurWord ? CurWord : `0`;
205	unsigned BitsLeft = NumBits - BitsInCurWord;
206
207	if (Error fillResult = fillCurWord())
208	return std::move(fillResult);
209
210	// If we run out of data, abort.
211	if (BitsLeft > BitsInCurWord)
212	return createStringError(EC: std::errc::io_error,
213	Fmt: "Unexpected end of file reading %u of %u bits",
214	Vals: BitsInCurWord, Vals: BitsLeft);
215
216	word_t R2 = CurWord & (~word_t(`0`) >> (BitsInWord - BitsLeft));
217
218	// Use a mask to avoid undefined behavior.
219	CurWord >>= (BitsLeft & Mask);
220
221	BitsInCurWord -= BitsLeft;
222
223	R \|= R2 << (NumBits - BitsLeft);
224
225	return R;
226	}
227
228	Expected<uint32_t> ReadVBR(const unsigned NumBits) {
229	Expected<unsigned> MaybeRead = Read(NumBits);
230	if (!MaybeRead)
231	return MaybeRead;
232	uint32_t Piece = MaybeRead.get();
233
234	assert(NumBits <= `32` && NumBits >= `1` && "Invalid NumBits value");
235	const uint32_t MaskBitOrder = (NumBits - `1`);
236	const uint32_t Mask = `1UL` << MaskBitOrder;
237
238	if ((Piece & Mask) == `0`)
239	return Piece;
240
241	uint32_t Result = `0`;
242	unsigned NextBit = `0`;
243	while (true) {
244	Result \|= (Piece & (Mask - `1`)) << NextBit;
245
246	if ((Piece & Mask) == `0`)
247	return Result;
248
249	NextBit += NumBits-`1`;
250	if (NextBit >= `32`)
251	return createStringError(EC: std::errc::illegal_byte_sequence,
252	Fmt: "Unterminated VBR");
253
254	MaybeRead = Read(NumBits);
255	if (!MaybeRead)
256	return MaybeRead;
257	Piece = MaybeRead.get();
258	}
259	}
260
261	// Read a VBR that may have a value up to 64-bits in size. The chunk size of
262	// the VBR must still be <= 32 bits though.
263	Expected<uint64_t> ReadVBR64(const unsigned NumBits) {
264	Expected<uint64_t> MaybeRead = Read(NumBits);
265	if (!MaybeRead)
266	return MaybeRead;
267	uint32_t Piece = MaybeRead.get();
268	assert(NumBits <= `32` && NumBits >= `1` && "Invalid NumBits value");
269	const uint32_t MaskBitOrder = (NumBits - `1`);
270	const uint32_t Mask = `1UL` << MaskBitOrder;
271
272	if ((Piece & Mask) == `0`)
273	return uint64_t(Piece);
274
275	uint64_t Result = `0`;
276	unsigned NextBit = `0`;
277	while (true) {
278	Result \|= uint64_t(Piece & (Mask - `1`)) << NextBit;
279
280	if ((Piece & Mask) == `0`)
281	return Result;
282
283	NextBit += NumBits-`1`;
284	if (NextBit >= `64`)
285	return createStringError(EC: std::errc::illegal_byte_sequence,
286	Fmt: "Unterminated VBR");
287
288	MaybeRead = Read(NumBits);
289	if (!MaybeRead)
290	return MaybeRead;
291	Piece = MaybeRead.get();
292	}
293	}
294
295	void SkipToFourByteBoundary() {
296	// If word_t is 64-bits and if we've read less than 32 bits, just dump
297	// the bits we have up to the next 32-bit boundary.
298	if (sizeof(word_t) > `4` &&
299	BitsInCurWord >= `32`) {
300	CurWord >>= BitsInCurWord-`32`;
301	BitsInCurWord = `32`;
302	return;
303	}
304
305	BitsInCurWord = `0`;
306	}
307
308	/// Return the size of the stream in bytes.
309	size_t SizeInBytes() const { return BitcodeBytes.size(); }
310
311	/// Skip to the end of the file.
312	void skipToEnd() { NextChar = BitcodeBytes.size(); }
313
314	/// Check whether a reservation of Size elements is plausible.
315	bool isSizePlausible(size_t Size) const {
316	// Don't allow reserving more elements than the number of bits, assuming
317	// at least one bit is needed to encode an element.
318	return Size < BitcodeBytes.size() * `8`;
319	}
320	};
321
322	/// When advancing through a bitstream cursor, each advance can discover a few
323	/// different kinds of entries:
324	struct BitstreamEntry {
325	enum {
326	Error, // Malformed bitcode was found.
327	EndBlock, // We've reached the end of the current block, (or the end of the
328	// file, which is treated like a series of EndBlock records.
329	SubBlock, // This is the start of a new subblock of a specific ID.
330	Record // This is a record with a specific AbbrevID.
331	} Kind;
332
333	unsigned ID;
334
335	static BitstreamEntry getError() {
336	BitstreamEntry E; E.Kind = Error; return E;
337	}
338
339	static BitstreamEntry getEndBlock() {
340	BitstreamEntry E; E.Kind = EndBlock; return E;
341	}
342
343	static BitstreamEntry getSubBlock(unsigned ID) {
344	BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
345	}
346
347	static BitstreamEntry getRecord(unsigned AbbrevID) {
348	BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
349	}
350	};
351
352	/// This represents a position within a bitcode file, implemented on top of a
353	/// SimpleBitstreamCursor.
354	///
355	/// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
356	/// be passed by value.
357	class BitstreamCursor : SimpleBitstreamCursor {
358	// This is the declared size of code values used for the current block, in
359	// bits.
360	unsigned CurCodeSize = `2`;
361
362	/// Abbrevs installed at in this block.
363	std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs;
364
365	struct Block {
366	unsigned PrevCodeSize;
367	std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs;
368
369	explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
370	};
371
372	/// This tracks the codesize of parent blocks.
373	SmallVector<Block, `8`> BlockScope;
374
375	BitstreamBlockInfo BlockInfo = nullptr*;
376
377	public:
378	static const size_t MaxChunkSize = `32`;
379
380	BitstreamCursor() = default;
381	explicit BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
382	: SimpleBitstreamCursor (BitcodeBytes) {}
383	explicit BitstreamCursor(StringRef BitcodeBytes)
384	: SimpleBitstreamCursor (BitcodeBytes) {}
385	explicit BitstreamCursor(MemoryBufferRef BitcodeBytes)
386	: SimpleBitstreamCursor (BitcodeBytes) {}
387
388	using SimpleBitstreamCursor::AtEndOfStream;
389	using SimpleBitstreamCursor::canSkipToPos;
390	using SimpleBitstreamCursor::fillCurWord;
391	using SimpleBitstreamCursor::getBitcodeBytes;
392	using SimpleBitstreamCursor::GetCurrentBitNo;
393	using SimpleBitstreamCursor::getCurrentByteNo;
394	using SimpleBitstreamCursor::getPointerToByte;
395	using SimpleBitstreamCursor::JumpToBit;
396	using SimpleBitstreamCursor::Read;
397	using SimpleBitstreamCursor::ReadVBR;
398	using SimpleBitstreamCursor::ReadVBR64;
399	using SimpleBitstreamCursor::SizeInBytes;
400	using SimpleBitstreamCursor::skipToEnd;
401
402	/// Return the number of bits used to encode an abbrev #.
403	unsigned getAbbrevIDWidth() const { return CurCodeSize; }
404
405	/// Flags that modify the behavior of advance().
406	enum {
407	/// If this flag is used, the advance() method does not automatically pop
408	/// the block scope when the end of a block is reached.
409	AF_DontPopBlockAtEnd = `1`,
410
411	/// If this flag is used, abbrev entries are returned just like normal
412	/// records.
413	AF_DontAutoprocessAbbrevs = `2`
414	};
415
416	/// Advance the current bitstream, returning the next entry in the stream.
417	Expected<BitstreamEntry> advance(unsigned Flags = `0`) {
418	while (true) {
419	if (AtEndOfStream())
420	return BitstreamEntry::getError();
421
422	Expected<unsigned> MaybeCode = ReadCode();
423	if (!MaybeCode)
424	return MaybeCode.takeError();
425	unsigned Code = MaybeCode.get();
426
427	if (Code == bitc::END_BLOCK) {
428	// Pop the end of the block unless Flags tells us not to.
429	if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
430	return BitstreamEntry::getError();
431	return BitstreamEntry::getEndBlock();
432	}
433
434	if (Code == bitc::ENTER_SUBBLOCK) {
435	if (Expected<unsigned> MaybeSubBlock = ReadSubBlockID())
436	return BitstreamEntry::getSubBlock(ID: MaybeSubBlock.get());
437	else
438	return MaybeSubBlock.takeError();
439	}
440
441	if (Code == bitc::DEFINE_ABBREV &&
442	!(Flags & AF_DontAutoprocessAbbrevs)) {
443	// We read and accumulate abbrev's, the client can't do anything with
444	// them anyway.
445	if (Error Err = ReadAbbrevRecord())
446	return std::move(Err);
447	continue;
448	}
449
450	return BitstreamEntry::getRecord(AbbrevID: Code);
451	}
452	}
453
454	/// This is a convenience function for clients that don't expect any
455	/// subblocks. This just skips over them automatically.
456	Expected<BitstreamEntry> advanceSkippingSubblocks(unsigned Flags = `0`) {
457	while (true) {
458	// If we found a normal entry, return it.
459	Expected<BitstreamEntry> MaybeEntry = advance(Flags);
460	if (!MaybeEntry)
461	return MaybeEntry;
462	BitstreamEntry Entry = MaybeEntry.get();
463
464	if (Entry.Kind != BitstreamEntry::SubBlock)
465	return Entry;
466
467	// If we found a sub-block, just skip over it and check the next entry.
468	if (Error Err = SkipBlock())
469	return std::move(Err);
470	}
471	}
472
473	Expected<unsigned> ReadCode() { return Read(NumBits: CurCodeSize); }
474
475	// Block header:
476	// [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
477
478	/// Having read the ENTER_SUBBLOCK code, read the BlockID for the block.
479	Expected<unsigned> ReadSubBlockID() { return ReadVBR(NumBits: bitc::BlockIDWidth); }
480
481	/// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body
482	/// of this block.
483	Error SkipBlock() {
484	// Read and ignore the codelen value.
485	if (Expected<uint32_t> Res = ReadVBR(NumBits: bitc::CodeLenWidth))
486	; // Since we are skipping this block, we don't care what code widths are
487	// used inside of it.
488	else
489	return Res.takeError();
490
491	SkipToFourByteBoundary();
492	Expected<unsigned> MaybeNum = Read(NumBits: bitc::BlockSizeWidth);
493	if (!MaybeNum)
494	return MaybeNum.takeError();
495	size_t NumFourBytes = MaybeNum.get();
496
497	// Check that the block wasn't partially defined, and that the offset isn't
498	// bogus.
499	size_t SkipTo = GetCurrentBitNo() + NumFourBytes * `4` * `8`;
500	if (AtEndOfStream())
501	return createStringError(EC: std::errc::illegal_byte_sequence,
502	Fmt: "can't skip block: already at end of stream");
503	if (!canSkipToPos(pos: SkipTo / `8`))
504	return createStringError(EC: std::errc::illegal_byte_sequence,
505	Fmt: "can't skip to bit %zu from %" PRIu64, Vals: SkipTo,
506	Vals: GetCurrentBitNo());
507
508	if (Error Res = JumpToBit(BitNo: SkipTo))
509	return Res;
510
511	return Error::success();
512	}
513
514	/// Having read the ENTER_SUBBLOCK abbrevid, and enter the block.
515	Error EnterSubBlock(unsigned BlockID, unsigned NumWordsP = nullptr*);
516
517	bool ReadBlockEnd() {
518	if (BlockScope.empty()) return true;
519
520	// Block tail:
521	// [END_BLOCK, <align4bytes>]
522	SkipToFourByteBoundary();
523
524	popBlockScope();
525	return false;
526	}
527
528	private:
529	void popBlockScope() {
530	CurCodeSize = BlockScope.back().PrevCodeSize;
531
532	CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs);
533	BlockScope.pop_back();
534	}
535
536	//===--------------------------------------------------------------------===//
537	// Record Processing
538	//===--------------------------------------------------------------------===//
539
540	public:
541	/// Return the abbreviation for the specified AbbrevId.
542	Expected<const BitCodeAbbrev > getAbbrev(unsigned* AbbrevID) {
543	unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV;
544	if (AbbrevNo >= CurAbbrevs.size())
545	return createStringError(
546	EC: std::errc::illegal_byte_sequence, Fmt: "Invalid abbrev number");
547	return CurAbbrevs [AbbrevNo].get();
548	}
549
550	/// Read the current record and discard it, returning the code for the record.
551	Expected<unsigned> skipRecord(unsigned AbbrevID);
552
553	Expected<unsigned> readRecord(unsigned AbbrevID,
554	SmallVectorImpl<uint64_t> &Vals,
555	StringRef Blob = nullptr*);
556
557	//===--------------------------------------------------------------------===//
558	// Abbrev Processing
559	//===--------------------------------------------------------------------===//
560	Error ReadAbbrevRecord();
561
562	/// Read and return a block info block from the bitstream. If an error was
563	/// encountered, return std::nullopt.
564	///
565	/// \param ReadBlockInfoNames Whether to read block/record name information in
566	/// the BlockInfo block. Only llvm-bcanalyzer uses this.
567	Expected<std::optional<BitstreamBlockInfo>>
568	ReadBlockInfoBlock(bool ReadBlockInfoNames = false);
569
570	/// Set the block info to be used by this BitstreamCursor to interpret
571	/// abbreviated records.
572	void setBlockInfo(BitstreamBlockInfo *BI) { BlockInfo = BI; }
573	};
574
575	} // end llvm namespace
576
577	#endif // LLVM_BITSTREAM_BITSTREAMREADER_H
578

source code of llvm/include/llvm/Bitstream/BitstreamReader.h