1 //===- Tokens.cpp - collect tokens from preprocessing ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "clang/Tooling/Syntax/Tokens.h"
9
10 #include "clang/Basic/Diagnostic.h"
11 #include "clang/Basic/IdentifierTable.h"
12 #include "clang/Basic/LLVM.h"
13 #include "clang/Basic/LangOptions.h"
14 #include "clang/Basic/SourceLocation.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "clang/Basic/TokenKinds.h"
17 #include "clang/Lex/PPCallbacks.h"
18 #include "clang/Lex/Preprocessor.h"
19 #include "clang/Lex/Token.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/None.h"
22 #include "llvm/ADT/Optional.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include "llvm/Support/FormatVariadic.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <iterator>
31 #include <string>
32 #include <utility>
33 #include <vector>
34
35 using namespace clang;
36 using namespace clang::syntax;
37
Token(SourceLocation Location,unsigned Length,tok::TokenKind Kind)38 syntax::Token::Token(SourceLocation Location, unsigned Length,
39 tok::TokenKind Kind)
40 : Location(Location), Length(Length), Kind(Kind) {
41 assert(Location.isValid());
42 }
43
Token(const clang::Token & T)44 syntax::Token::Token(const clang::Token &T)
45 : Token(T.getLocation(), T.getLength(), T.getKind()) {
46 assert(!T.isAnnotation());
47 }
48
text(const SourceManager & SM) const49 llvm::StringRef syntax::Token::text(const SourceManager &SM) const {
50 bool Invalid = false;
51 const char *Start = SM.getCharacterData(location(), &Invalid);
52 assert(!Invalid);
53 return llvm::StringRef(Start, length());
54 }
55
range(const SourceManager & SM) const56 FileRange syntax::Token::range(const SourceManager &SM) const {
57 assert(location().isFileID() && "must be a spelled token");
58 FileID File;
59 unsigned StartOffset;
60 std::tie(File, StartOffset) = SM.getDecomposedLoc(location());
61 return FileRange(File, StartOffset, StartOffset + length());
62 }
63
range(const SourceManager & SM,const syntax::Token & First,const syntax::Token & Last)64 FileRange syntax::Token::range(const SourceManager &SM,
65 const syntax::Token &First,
66 const syntax::Token &Last) {
67 auto F = First.range(SM);
68 auto L = Last.range(SM);
69 assert(F.file() == L.file() && "tokens from different files");
70 assert((F == L || F.endOffset() <= L.beginOffset()) && "wrong order of tokens");
71 return FileRange(F.file(), F.beginOffset(), L.endOffset());
72 }
73
operator <<(llvm::raw_ostream & OS,const Token & T)74 llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, const Token &T) {
75 return OS << T.str();
76 }
77
FileRange(FileID File,unsigned BeginOffset,unsigned EndOffset)78 FileRange::FileRange(FileID File, unsigned BeginOffset, unsigned EndOffset)
79 : File(File), Begin(BeginOffset), End(EndOffset) {
80 assert(File.isValid());
81 assert(BeginOffset <= EndOffset);
82 }
83
FileRange(const SourceManager & SM,SourceLocation BeginLoc,unsigned Length)84 FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
85 unsigned Length) {
86 assert(BeginLoc.isValid());
87 assert(BeginLoc.isFileID());
88
89 std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
90 End = Begin + Length;
91 }
FileRange(const SourceManager & SM,SourceLocation BeginLoc,SourceLocation EndLoc)92 FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
93 SourceLocation EndLoc) {
94 assert(BeginLoc.isValid());
95 assert(BeginLoc.isFileID());
96 assert(EndLoc.isValid());
97 assert(EndLoc.isFileID());
98 assert(SM.getFileID(BeginLoc) == SM.getFileID(EndLoc));
99 assert(SM.getFileOffset(BeginLoc) <= SM.getFileOffset(EndLoc));
100
101 std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
102 End = SM.getFileOffset(EndLoc);
103 }
104
operator <<(llvm::raw_ostream & OS,const FileRange & R)105 llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS,
106 const FileRange &R) {
107 return OS << llvm::formatv("FileRange(file = {0}, offsets = {1}-{2})",
108 R.file().getHashValue(), R.beginOffset(),
109 R.endOffset());
110 }
111
text(const SourceManager & SM) const112 llvm::StringRef FileRange::text(const SourceManager &SM) const {
113 bool Invalid = false;
114 StringRef Text = SM.getBufferData(File, &Invalid);
115 if (Invalid)
116 return "";
117 assert(Begin <= Text.size());
118 assert(End <= Text.size());
119 return Text.substr(Begin, length());
120 }
121
expandedTokens(SourceRange R) const122 llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const {
123 if (R.isInvalid())
124 return {};
125 const Token *Begin =
126 llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) {
127 return SourceMgr->isBeforeInTranslationUnit(T.location(), R.getBegin());
128 });
129 const Token *End =
130 llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) {
131 return !SourceMgr->isBeforeInTranslationUnit(R.getEnd(), T.location());
132 });
133 if (Begin > End)
134 return {};
135 return {Begin, End};
136 }
137
toCharRange(const SourceManager & SM) const138 CharSourceRange FileRange::toCharRange(const SourceManager &SM) const {
139 return CharSourceRange(
140 SourceRange(SM.getComposedLoc(File, Begin), SM.getComposedLoc(File, End)),
141 /*IsTokenRange=*/false);
142 }
143
144 std::pair<const syntax::Token *, const TokenBuffer::Mapping *>
spelledForExpandedToken(const syntax::Token * Expanded) const145 TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
146 assert(Expanded);
147 assert(ExpandedTokens.data() <= Expanded &&
148 Expanded < ExpandedTokens.data() + ExpandedTokens.size());
149
150 auto FileIt = Files.find(
151 SourceMgr->getFileID(SourceMgr->getExpansionLoc(Expanded->location())));
152 assert(FileIt != Files.end() && "no file for an expanded token");
153
154 const MarkedFile &File = FileIt->second;
155
156 unsigned ExpandedIndex = Expanded - ExpandedTokens.data();
157 // Find the first mapping that produced tokens after \p Expanded.
158 auto It = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
159 return M.BeginExpanded <= ExpandedIndex;
160 });
161 // Our token could only be produced by the previous mapping.
162 if (It == File.Mappings.begin()) {
163 // No previous mapping, no need to modify offsets.
164 return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded], nullptr};
165 }
166 --It; // 'It' now points to last mapping that started before our token.
167
168 // Check if the token is part of the mapping.
169 if (ExpandedIndex < It->EndExpanded)
170 return {&File.SpelledTokens[It->BeginSpelled], /*Mapping*/ &*It};
171
172 // Not part of the mapping, use the index from previous mapping to compute the
173 // corresponding spelled token.
174 return {
175 &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)],
176 /*Mapping*/ nullptr};
177 }
178
spelledTokens(FileID FID) const179 llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const {
180 auto It = Files.find(FID);
181 assert(It != Files.end());
182 return It->second.SpelledTokens;
183 }
184
str() const185 std::string TokenBuffer::Mapping::str() const {
186 return llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})",
187 BeginSpelled, EndSpelled, BeginExpanded, EndExpanded);
188 }
189
190 llvm::Optional<llvm::ArrayRef<syntax::Token>>
spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const191 TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
192 // Mapping an empty range is ambiguous in case of empty mappings at either end
193 // of the range, bail out in that case.
194 if (Expanded.empty())
195 return llvm::None;
196
197 // FIXME: also allow changes uniquely mapping to macro arguments.
198
199 const syntax::Token *BeginSpelled;
200 const Mapping *BeginMapping;
201 std::tie(BeginSpelled, BeginMapping) =
202 spelledForExpandedToken(&Expanded.front());
203
204 const syntax::Token *LastSpelled;
205 const Mapping *LastMapping;
206 std::tie(LastSpelled, LastMapping) =
207 spelledForExpandedToken(&Expanded.back());
208
209 FileID FID = SourceMgr->getFileID(BeginSpelled->location());
210 // FIXME: Handle multi-file changes by trying to map onto a common root.
211 if (FID != SourceMgr->getFileID(LastSpelled->location()))
212 return llvm::None;
213
214 const MarkedFile &File = Files.find(FID)->second;
215
216 // Do not allow changes that cross macro expansion boundaries.
217 unsigned BeginExpanded = Expanded.begin() - ExpandedTokens.data();
218 unsigned EndExpanded = Expanded.end() - ExpandedTokens.data();
219 if (BeginMapping && BeginMapping->BeginExpanded < BeginExpanded)
220 return llvm::None;
221 if (LastMapping && EndExpanded < LastMapping->EndExpanded)
222 return llvm::None;
223 // All is good, return the result.
224 return llvm::makeArrayRef(
225 BeginMapping ? File.SpelledTokens.data() + BeginMapping->BeginSpelled
226 : BeginSpelled,
227 LastMapping ? File.SpelledTokens.data() + LastMapping->EndSpelled
228 : LastSpelled + 1);
229 }
230
231 llvm::Optional<TokenBuffer::Expansion>
expansionStartingAt(const syntax::Token * Spelled) const232 TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const {
233 assert(Spelled);
234 assert(Spelled->location().isFileID() && "not a spelled token");
235 auto FileIt = Files.find(SourceMgr->getFileID(Spelled->location()));
236 assert(FileIt != Files.end() && "file not tracked by token buffer");
237
238 auto &File = FileIt->second;
239 assert(File.SpelledTokens.data() <= Spelled &&
240 Spelled < (File.SpelledTokens.data() + File.SpelledTokens.size()));
241
242 unsigned SpelledIndex = Spelled - File.SpelledTokens.data();
243 auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
244 return M.BeginSpelled < SpelledIndex;
245 });
246 if (M == File.Mappings.end() || M->BeginSpelled != SpelledIndex)
247 return llvm::None;
248
249 Expansion E;
250 E.Spelled = llvm::makeArrayRef(File.SpelledTokens.data() + M->BeginSpelled,
251 File.SpelledTokens.data() + M->EndSpelled);
252 E.Expanded = llvm::makeArrayRef(ExpandedTokens.data() + M->BeginExpanded,
253 ExpandedTokens.data() + M->EndExpanded);
254 return E;
255 }
256
257 llvm::ArrayRef<syntax::Token>
spelledTokensTouching(SourceLocation Loc,const syntax::TokenBuffer & Tokens)258 syntax::spelledTokensTouching(SourceLocation Loc,
259 const syntax::TokenBuffer &Tokens) {
260 assert(Loc.isFileID());
261 llvm::ArrayRef<syntax::Token> All =
262 Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc));
263 auto *Right = llvm::partition_point(
264 All, [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
265 bool AcceptRight = Right != All.end() && Right->location() <= Loc;
266 bool AcceptLeft = Right != All.begin() && (Right - 1)->endLocation() >= Loc;
267 return llvm::makeArrayRef(Right - (AcceptLeft ? 1 : 0),
268 Right + (AcceptRight ? 1 : 0));
269 }
270
271 const syntax::Token *
spelledIdentifierTouching(SourceLocation Loc,const syntax::TokenBuffer & Tokens)272 syntax::spelledIdentifierTouching(SourceLocation Loc,
273 const syntax::TokenBuffer &Tokens) {
274 for (const syntax::Token &Tok : spelledTokensTouching(Loc, Tokens)) {
275 if (Tok.kind() == tok::identifier)
276 return &Tok;
277 }
278 return nullptr;
279 }
280
281 std::vector<const syntax::Token *>
macroExpansions(FileID FID) const282 TokenBuffer::macroExpansions(FileID FID) const {
283 auto FileIt = Files.find(FID);
284 assert(FileIt != Files.end() && "file not tracked by token buffer");
285 auto &File = FileIt->second;
286 std::vector<const syntax::Token *> Expansions;
287 auto &Spelled = File.SpelledTokens;
288 for (auto Mapping : File.Mappings) {
289 const syntax::Token *Token = &Spelled[Mapping.BeginSpelled];
290 if (Token->kind() == tok::TokenKind::identifier)
291 Expansions.push_back(Token);
292 }
293 return Expansions;
294 }
295
tokenize(FileID FID,const SourceManager & SM,const LangOptions & LO)296 std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM,
297 const LangOptions &LO) {
298 std::vector<syntax::Token> Tokens;
299 IdentifierTable Identifiers(LO);
300 auto AddToken = [&](clang::Token T) {
301 // Fill the proper token kind for keywords, etc.
302 if (T.getKind() == tok::raw_identifier && !T.needsCleaning() &&
303 !T.hasUCN()) { // FIXME: support needsCleaning and hasUCN cases.
304 clang::IdentifierInfo &II = Identifiers.get(T.getRawIdentifier());
305 T.setIdentifierInfo(&II);
306 T.setKind(II.getTokenID());
307 }
308 Tokens.push_back(syntax::Token(T));
309 };
310
311 Lexer L(FID, SM.getBuffer(FID), SM, LO);
312
313 clang::Token T;
314 while (!L.LexFromRawLexer(T))
315 AddToken(T);
316 // 'eof' is only the last token if the input is null-terminated. Never store
317 // it, for consistency.
318 if (T.getKind() != tok::eof)
319 AddToken(T);
320 return Tokens;
321 }
322
323 /// Records information reqired to construct mappings for the token buffer that
324 /// we are collecting.
325 class TokenCollector::CollectPPExpansions : public PPCallbacks {
326 public:
CollectPPExpansions(TokenCollector & C)327 CollectPPExpansions(TokenCollector &C) : Collector(&C) {}
328
329 /// Disabled instance will stop reporting anything to TokenCollector.
330 /// This ensures that uses of the preprocessor after TokenCollector::consume()
331 /// is called do not access the (possibly invalid) collector instance.
disable()332 void disable() { Collector = nullptr; }
333
MacroExpands(const clang::Token & MacroNameTok,const MacroDefinition & MD,SourceRange Range,const MacroArgs * Args)334 void MacroExpands(const clang::Token &MacroNameTok, const MacroDefinition &MD,
335 SourceRange Range, const MacroArgs *Args) override {
336 if (!Collector)
337 return;
338 const auto &SM = Collector->PP.getSourceManager();
339 // Only record top-level expansions that directly produce expanded tokens.
340 // This excludes those where:
341 // - the macro use is inside a macro body,
342 // - the macro appears in an argument to another macro.
343 // However macro expansion isn't really a tree, it's token rewrite rules,
344 // so there are other cases, e.g.
345 // #define B(X) X
346 // #define A 1 + B
347 // A(2)
348 // Both A and B produce expanded tokens, though the macro name 'B' comes
349 // from an expansion. The best we can do is merge the mappings for both.
350
351 // The *last* token of any top-level macro expansion must be in a file.
352 // (In the example above, see the closing paren of the expansion of B).
353 if (!Range.getEnd().isFileID())
354 return;
355 // If there's a current expansion that encloses this one, this one can't be
356 // top-level.
357 if (LastExpansionEnd.isValid() &&
358 !SM.isBeforeInTranslationUnit(LastExpansionEnd, Range.getEnd()))
359 return;
360
361 // If the macro invocation (B) starts in a macro (A) but ends in a file,
362 // we'll create a merged mapping for A + B by overwriting the endpoint for
363 // A's startpoint.
364 if (!Range.getBegin().isFileID()) {
365 Range.setBegin(SM.getExpansionLoc(Range.getBegin()));
366 assert(Collector->Expansions.count(Range.getBegin().getRawEncoding()) &&
367 "Overlapping macros should have same expansion location");
368 }
369
370 Collector->Expansions[Range.getBegin().getRawEncoding()] = Range.getEnd();
371 LastExpansionEnd = Range.getEnd();
372 }
373 // FIXME: handle directives like #pragma, #include, etc.
374 private:
375 TokenCollector *Collector;
376 /// Used to detect recursive macro expansions.
377 SourceLocation LastExpansionEnd;
378 };
379
380 /// Fills in the TokenBuffer by tracing the run of a preprocessor. The
381 /// implementation tracks the tokens, macro expansions and directives coming
382 /// from the preprocessor and:
383 /// - for each token, figures out if it is a part of an expanded token stream,
384 /// spelled token stream or both. Stores the tokens appropriately.
385 /// - records mappings from the spelled to expanded token ranges, e.g. for macro
386 /// expansions.
387 /// FIXME: also properly record:
388 /// - #include directives,
389 /// - #pragma, #line and other PP directives,
390 /// - skipped pp regions,
391 /// - ...
392
TokenCollector(Preprocessor & PP)393 TokenCollector::TokenCollector(Preprocessor &PP) : PP(PP) {
394 // Collect the expanded token stream during preprocessing.
395 PP.setTokenWatcher([this](const clang::Token &T) {
396 if (T.isAnnotation())
397 return;
398 DEBUG_WITH_TYPE("collect-tokens", llvm::dbgs()
399 << "Token: "
400 << syntax::Token(T).dumpForTests(
401 this->PP.getSourceManager())
402 << "\n"
403
404 );
405 Expanded.push_back(syntax::Token(T));
406 });
407 // And locations of macro calls, to properly recover boundaries of those in
408 // case of empty expansions.
409 auto CB = std::make_unique<CollectPPExpansions>(*this);
410 this->Collector = CB.get();
411 PP.addPPCallbacks(std::move(CB));
412 }
413
414 /// Builds mappings and spelled tokens in the TokenBuffer based on the expanded
415 /// token stream.
416 class TokenCollector::Builder {
417 public:
Builder(std::vector<syntax::Token> Expanded,PPExpansions CollectedExpansions,const SourceManager & SM,const LangOptions & LangOpts)418 Builder(std::vector<syntax::Token> Expanded, PPExpansions CollectedExpansions,
419 const SourceManager &SM, const LangOptions &LangOpts)
420 : Result(SM), CollectedExpansions(std::move(CollectedExpansions)), SM(SM),
421 LangOpts(LangOpts) {
422 Result.ExpandedTokens = std::move(Expanded);
423 }
424
build()425 TokenBuffer build() && {
426 assert(!Result.ExpandedTokens.empty());
427 assert(Result.ExpandedTokens.back().kind() == tok::eof);
428
429 // Tokenize every file that contributed tokens to the expanded stream.
430 buildSpelledTokens();
431
432 // The expanded token stream consists of runs of tokens that came from
433 // the same source (a macro expansion, part of a file etc).
434 // Between these runs are the logical positions of spelled tokens that
435 // didn't expand to anything.
436 while (NextExpanded < Result.ExpandedTokens.size() - 1 /* eof */) {
437 // Create empty mappings for spelled tokens that expanded to nothing here.
438 // May advance NextSpelled, but NextExpanded is unchanged.
439 discard();
440 // Create mapping for a contiguous run of expanded tokens.
441 // Advances NextExpanded past the run, and NextSpelled accordingly.
442 unsigned OldPosition = NextExpanded;
443 advance();
444 if (NextExpanded == OldPosition)
445 diagnoseAdvanceFailure();
446 }
447 // If any tokens remain in any of the files, they didn't expand to anything.
448 // Create empty mappings up until the end of the file.
449 for (const auto &File : Result.Files)
450 discard(File.first);
451
452 return std::move(Result);
453 }
454
455 private:
456 // Consume a sequence of spelled tokens that didn't expand to anything.
457 // In the simplest case, skips spelled tokens until finding one that produced
458 // the NextExpanded token, and creates an empty mapping for them.
459 // If Drain is provided, skips remaining tokens from that file instead.
discard(llvm::Optional<FileID> Drain=llvm::None)460 void discard(llvm::Optional<FileID> Drain = llvm::None) {
461 SourceLocation Target =
462 Drain ? SM.getLocForEndOfFile(*Drain)
463 : SM.getExpansionLoc(
464 Result.ExpandedTokens[NextExpanded].location());
465 FileID File = SM.getFileID(Target);
466 const auto &SpelledTokens = Result.Files[File].SpelledTokens;
467 auto &NextSpelled = this->NextSpelled[File];
468
469 TokenBuffer::Mapping Mapping;
470 Mapping.BeginSpelled = NextSpelled;
471 // When dropping trailing tokens from a file, the empty mapping should
472 // be positioned within the file's expanded-token range (at the end).
473 Mapping.BeginExpanded = Mapping.EndExpanded =
474 Drain ? Result.Files[*Drain].EndExpanded : NextExpanded;
475 // We may want to split into several adjacent empty mappings.
476 // FlushMapping() emits the current mapping and starts a new one.
477 auto FlushMapping = [&, this] {
478 Mapping.EndSpelled = NextSpelled;
479 if (Mapping.BeginSpelled != Mapping.EndSpelled)
480 Result.Files[File].Mappings.push_back(Mapping);
481 Mapping.BeginSpelled = NextSpelled;
482 };
483
484 while (NextSpelled < SpelledTokens.size() &&
485 SpelledTokens[NextSpelled].location() < Target) {
486 // If we know mapping bounds at [NextSpelled, KnownEnd] (macro expansion)
487 // then we want to partition our (empty) mapping.
488 // [Start, NextSpelled) [NextSpelled, KnownEnd] (KnownEnd, Target)
489 SourceLocation KnownEnd = CollectedExpansions.lookup(
490 SpelledTokens[NextSpelled].location().getRawEncoding());
491 if (KnownEnd.isValid()) {
492 FlushMapping(); // Emits [Start, NextSpelled)
493 while (NextSpelled < SpelledTokens.size() &&
494 SpelledTokens[NextSpelled].location() <= KnownEnd)
495 ++NextSpelled;
496 FlushMapping(); // Emits [NextSpelled, KnownEnd]
497 // Now the loop contitues and will emit (KnownEnd, Target).
498 } else {
499 ++NextSpelled;
500 }
501 }
502 FlushMapping();
503 }
504
505 // Consumes the NextExpanded token and others that are part of the same run.
506 // Increases NextExpanded and NextSpelled by at least one, and adds a mapping
507 // (unless this is a run of file tokens, which we represent with no mapping).
advance()508 void advance() {
509 const syntax::Token &Tok = Result.ExpandedTokens[NextExpanded];
510 SourceLocation Expansion = SM.getExpansionLoc(Tok.location());
511 FileID File = SM.getFileID(Expansion);
512 const auto &SpelledTokens = Result.Files[File].SpelledTokens;
513 auto &NextSpelled = this->NextSpelled[File];
514
515 if (Tok.location().isFileID()) {
516 // A run of file tokens continues while the expanded/spelled tokens match.
517 while (NextSpelled < SpelledTokens.size() &&
518 NextExpanded < Result.ExpandedTokens.size() &&
519 SpelledTokens[NextSpelled].location() ==
520 Result.ExpandedTokens[NextExpanded].location()) {
521 ++NextSpelled;
522 ++NextExpanded;
523 }
524 // We need no mapping for file tokens copied to the expanded stream.
525 } else {
526 // We found a new macro expansion. We should have its spelling bounds.
527 auto End = CollectedExpansions.lookup(Expansion.getRawEncoding());
528 assert(End.isValid() && "Macro expansion wasn't captured?");
529
530 // Mapping starts here...
531 TokenBuffer::Mapping Mapping;
532 Mapping.BeginExpanded = NextExpanded;
533 Mapping.BeginSpelled = NextSpelled;
534 // ... consumes spelled tokens within bounds we captured ...
535 while (NextSpelled < SpelledTokens.size() &&
536 SpelledTokens[NextSpelled].location() <= End)
537 ++NextSpelled;
538 // ... consumes expanded tokens rooted at the same expansion ...
539 while (NextExpanded < Result.ExpandedTokens.size() &&
540 SM.getExpansionLoc(
541 Result.ExpandedTokens[NextExpanded].location()) == Expansion)
542 ++NextExpanded;
543 // ... and ends here.
544 Mapping.EndExpanded = NextExpanded;
545 Mapping.EndSpelled = NextSpelled;
546 Result.Files[File].Mappings.push_back(Mapping);
547 }
548 }
549
550 // advance() is supposed to consume at least one token - if not, we crash.
diagnoseAdvanceFailure()551 void diagnoseAdvanceFailure() {
552 #ifndef NDEBUG
553 // Show the failed-to-map token in context.
554 for (unsigned I = (NextExpanded < 10) ? 0 : NextExpanded - 10;
555 I < NextExpanded + 5 && I < Result.ExpandedTokens.size(); ++I) {
556 const char *L =
557 (I == NextExpanded) ? "!! " : (I < NextExpanded) ? "ok " : " ";
558 llvm::errs() << L << Result.ExpandedTokens[I].dumpForTests(SM) << "\n";
559 }
560 #endif
561 llvm_unreachable("Couldn't map expanded token to spelled tokens!");
562 }
563
564 /// Initializes TokenBuffer::Files and fills spelled tokens and expanded
565 /// ranges for each of the files.
buildSpelledTokens()566 void buildSpelledTokens() {
567 for (unsigned I = 0; I < Result.ExpandedTokens.size(); ++I) {
568 const auto &Tok = Result.ExpandedTokens[I];
569 auto FID = SM.getFileID(SM.getExpansionLoc(Tok.location()));
570 auto It = Result.Files.try_emplace(FID);
571 TokenBuffer::MarkedFile &File = It.first->second;
572
573 // The eof token should not be considered part of the main-file's range.
574 File.EndExpanded = Tok.kind() == tok::eof ? I : I + 1;
575
576 if (!It.second)
577 continue; // we have seen this file before.
578 // This is the first time we see this file.
579 File.BeginExpanded = I;
580 File.SpelledTokens = tokenize(FID, SM, LangOpts);
581 }
582 }
583
584 TokenBuffer Result;
585 unsigned NextExpanded = 0; // cursor in ExpandedTokens
586 llvm::DenseMap<FileID, unsigned> NextSpelled; // cursor in SpelledTokens
587 PPExpansions CollectedExpansions;
588 const SourceManager &SM;
589 const LangOptions &LangOpts;
590 };
591
consume()592 TokenBuffer TokenCollector::consume() && {
593 PP.setTokenWatcher(nullptr);
594 Collector->disable();
595 return Builder(std::move(Expanded), std::move(Expansions),
596 PP.getSourceManager(), PP.getLangOpts())
597 .build();
598 }
599
str() const600 std::string syntax::Token::str() const {
601 return llvm::formatv("Token({0}, length = {1})", tok::getTokenName(kind()),
602 length());
603 }
604
dumpForTests(const SourceManager & SM) const605 std::string syntax::Token::dumpForTests(const SourceManager &SM) const {
606 return llvm::formatv("{0} {1}", tok::getTokenName(kind()), text(SM));
607 }
608
dumpForTests() const609 std::string TokenBuffer::dumpForTests() const {
610 auto PrintToken = [this](const syntax::Token &T) -> std::string {
611 if (T.kind() == tok::eof)
612 return "<eof>";
613 return T.text(*SourceMgr);
614 };
615
616 auto DumpTokens = [this, &PrintToken](llvm::raw_ostream &OS,
617 llvm::ArrayRef<syntax::Token> Tokens) {
618 if (Tokens.empty()) {
619 OS << "<empty>";
620 return;
621 }
622 OS << Tokens[0].text(*SourceMgr);
623 for (unsigned I = 1; I < Tokens.size(); ++I) {
624 if (Tokens[I].kind() == tok::eof)
625 continue;
626 OS << " " << PrintToken(Tokens[I]);
627 }
628 };
629
630 std::string Dump;
631 llvm::raw_string_ostream OS(Dump);
632
633 OS << "expanded tokens:\n"
634 << " ";
635 // (!) we do not show '<eof>'.
636 DumpTokens(OS, llvm::makeArrayRef(ExpandedTokens).drop_back());
637 OS << "\n";
638
639 std::vector<FileID> Keys;
640 for (auto F : Files)
641 Keys.push_back(F.first);
642 llvm::sort(Keys);
643
644 for (FileID ID : Keys) {
645 const MarkedFile &File = Files.find(ID)->second;
646 auto *Entry = SourceMgr->getFileEntryForID(ID);
647 if (!Entry)
648 continue; // Skip builtin files.
649 OS << llvm::formatv("file '{0}'\n", Entry->getName())
650 << " spelled tokens:\n"
651 << " ";
652 DumpTokens(OS, File.SpelledTokens);
653 OS << "\n";
654
655 if (File.Mappings.empty()) {
656 OS << " no mappings.\n";
657 continue;
658 }
659 OS << " mappings:\n";
660 for (auto &M : File.Mappings) {
661 OS << llvm::formatv(
662 " ['{0}'_{1}, '{2}'_{3}) => ['{4}'_{5}, '{6}'_{7})\n",
663 PrintToken(File.SpelledTokens[M.BeginSpelled]), M.BeginSpelled,
664 M.EndSpelled == File.SpelledTokens.size()
665 ? "<eof>"
666 : PrintToken(File.SpelledTokens[M.EndSpelled]),
667 M.EndSpelled, PrintToken(ExpandedTokens[M.BeginExpanded]),
668 M.BeginExpanded, PrintToken(ExpandedTokens[M.EndExpanded]),
669 M.EndExpanded);
670 }
671 }
672 return OS.str();
673 }
674