1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14 // Options to support:
15 // -H - Print the name of each header file used.
16 // -d[DNI] - Dump various things.
17 // -fworking-directory - #line's with preprocessor's working dir.
18 // -fpreprocessed
19 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
20 // -W*
21 // -w
22 //
23 // Messages to emit:
24 // "Multiple include guards may be useful for:\n"
25 //
26 //===----------------------------------------------------------------------===//
27
28 #include "clang/Lex/Preprocessor.h"
29 #include "clang/Basic/FileManager.h"
30 #include "clang/Basic/FileSystemStatCache.h"
31 #include "clang/Basic/SourceManager.h"
32 #include "clang/Basic/TargetInfo.h"
33 #include "clang/Lex/CodeCompletionHandler.h"
34 #include "clang/Lex/ExternalPreprocessorSource.h"
35 #include "clang/Lex/HeaderSearch.h"
36 #include "clang/Lex/LexDiagnostic.h"
37 #include "clang/Lex/LiteralSupport.h"
38 #include "clang/Lex/MacroArgs.h"
39 #include "clang/Lex/MacroInfo.h"
40 #include "clang/Lex/ModuleLoader.h"
41 #include "clang/Lex/Pragma.h"
42 #include "clang/Lex/PreprocessingRecord.h"
43 #include "clang/Lex/PreprocessorOptions.h"
44 #include "clang/Lex/ScratchBuffer.h"
45 #include "llvm/ADT/APFloat.h"
46 #include "llvm/ADT/STLExtras.h"
47 #include "llvm/ADT/SmallString.h"
48 #include "llvm/ADT/StringExtras.h"
49 #include "llvm/Support/Capacity.h"
50 #include "llvm/Support/ConvertUTF.h"
51 #include "llvm/Support/MemoryBuffer.h"
52 #include "llvm/Support/raw_ostream.h"
53 using namespace clang;
54
55 //===----------------------------------------------------------------------===//
~ExternalPreprocessorSource()56 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
57
Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,DiagnosticsEngine & diags,LangOptions & opts,SourceManager & SM,HeaderSearch & Headers,ModuleLoader & TheModuleLoader,IdentifierInfoLookup * IILookup,bool OwnsHeaders,TranslationUnitKind TUKind)58 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
59 DiagnosticsEngine &diags, LangOptions &opts,
60 SourceManager &SM, HeaderSearch &Headers,
61 ModuleLoader &TheModuleLoader,
62 IdentifierInfoLookup *IILookup, bool OwnsHeaders,
63 TranslationUnitKind TUKind)
64 : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr),
65 FileMgr(Headers.getFileMgr()), SourceMgr(SM),
66 ScratchBuf(new ScratchBuffer(SourceMgr)),HeaderInfo(Headers),
67 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
68 Identifiers(opts, IILookup),
69 PragmaHandlers(new PragmaNamespace(StringRef())),
70 IncrementalProcessing(false), TUKind(TUKind),
71 CodeComplete(nullptr), CodeCompletionFile(nullptr),
72 CodeCompletionOffset(0), LastTokenWasAt(false),
73 ModuleImportExpectsIdentifier(false), CodeCompletionReached(0),
74 MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr),
75 CurDirLookup(nullptr), CurLexerKind(CLK_Lexer), CurSubmodule(nullptr),
76 Callbacks(nullptr), CurSubmoduleState(&NullSubmoduleState),
77 MacroArgCache(nullptr), Record(nullptr),
78 MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
79 OwnsHeaderSearch = OwnsHeaders;
80
81 CounterValue = 0; // __COUNTER__ starts at 0.
82
83 // Clear stats.
84 NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
85 NumIf = NumElse = NumEndif = 0;
86 NumEnteredSourceFiles = 0;
87 NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
88 NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
89 MaxIncludeStackDepth = 0;
90 NumSkipped = 0;
91
92 // Default to discarding comments.
93 KeepComments = false;
94 KeepMacroComments = false;
95 SuppressIncludeNotFoundError = false;
96
97 // Macro expansion is enabled.
98 DisableMacroExpansion = false;
99 MacroExpansionInDirectivesOverride = false;
100 InMacroArgs = false;
101 InMacroArgPreExpansion = false;
102 NumCachedTokenLexers = 0;
103 PragmasEnabled = true;
104 ParsingIfOrElifDirective = false;
105 PreprocessedOutput = false;
106
107 CachedLexPos = 0;
108
109 // We haven't read anything from the external source.
110 ReadMacrosFromExternalSource = false;
111
112 // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
113 // This gets unpoisoned where it is allowed.
114 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
115 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
116
117 // Initialize the pragma handlers.
118 RegisterBuiltinPragmas();
119
120 // Initialize builtin macros like __LINE__ and friends.
121 RegisterBuiltinMacros();
122
123 if(LangOpts.Borland) {
124 Ident__exception_info = getIdentifierInfo("_exception_info");
125 Ident___exception_info = getIdentifierInfo("__exception_info");
126 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation");
127 Ident__exception_code = getIdentifierInfo("_exception_code");
128 Ident___exception_code = getIdentifierInfo("__exception_code");
129 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode");
130 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination");
131 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
132 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination");
133 } else {
134 Ident__exception_info = Ident__exception_code = nullptr;
135 Ident__abnormal_termination = Ident___exception_info = nullptr;
136 Ident___exception_code = Ident___abnormal_termination = nullptr;
137 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
138 Ident_AbnormalTermination = nullptr;
139 }
140 }
141
~Preprocessor()142 Preprocessor::~Preprocessor() {
143 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
144
145 IncludeMacroStack.clear();
146
147 // Destroy any macro definitions.
148 while (MacroInfoChain *I = MIChainHead) {
149 MIChainHead = I->Next;
150 I->~MacroInfoChain();
151 }
152
153 // Free any cached macro expanders.
154 // This populates MacroArgCache, so all TokenLexers need to be destroyed
155 // before the code below that frees up the MacroArgCache list.
156 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
157 CurTokenLexer.reset();
158
159 while (DeserializedMacroInfoChain *I = DeserialMIChainHead) {
160 DeserialMIChainHead = I->Next;
161 I->~DeserializedMacroInfoChain();
162 }
163
164 // Free any cached MacroArgs.
165 for (MacroArgs *ArgList = MacroArgCache; ArgList;)
166 ArgList = ArgList->deallocate();
167
168 // Delete the header search info, if we own it.
169 if (OwnsHeaderSearch)
170 delete &HeaderInfo;
171 }
172
Initialize(const TargetInfo & Target)173 void Preprocessor::Initialize(const TargetInfo &Target) {
174 assert((!this->Target || this->Target == &Target) &&
175 "Invalid override of target information");
176 this->Target = &Target;
177
178 // Initialize information about built-ins.
179 BuiltinInfo.InitializeTarget(Target);
180 HeaderInfo.setTarget(Target);
181 }
182
InitializeForModelFile()183 void Preprocessor::InitializeForModelFile() {
184 NumEnteredSourceFiles = 0;
185
186 // Reset pragmas
187 PragmaHandlersBackup = std::move(PragmaHandlers);
188 PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
189 RegisterBuiltinPragmas();
190
191 // Reset PredefinesFileID
192 PredefinesFileID = FileID();
193 }
194
FinalizeForModelFile()195 void Preprocessor::FinalizeForModelFile() {
196 NumEnteredSourceFiles = 1;
197
198 PragmaHandlers = std::move(PragmaHandlersBackup);
199 }
200
setPTHManager(PTHManager * pm)201 void Preprocessor::setPTHManager(PTHManager* pm) {
202 PTH.reset(pm);
203 FileMgr.addStatCache(PTH->createStatCache());
204 }
205
DumpToken(const Token & Tok,bool DumpFlags) const206 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
207 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
208 << getSpelling(Tok) << "'";
209
210 if (!DumpFlags) return;
211
212 llvm::errs() << "\t";
213 if (Tok.isAtStartOfLine())
214 llvm::errs() << " [StartOfLine]";
215 if (Tok.hasLeadingSpace())
216 llvm::errs() << " [LeadingSpace]";
217 if (Tok.isExpandDisabled())
218 llvm::errs() << " [ExpandDisabled]";
219 if (Tok.needsCleaning()) {
220 const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
221 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
222 << "']";
223 }
224
225 llvm::errs() << "\tLoc=<";
226 DumpLocation(Tok.getLocation());
227 llvm::errs() << ">";
228 }
229
DumpLocation(SourceLocation Loc) const230 void Preprocessor::DumpLocation(SourceLocation Loc) const {
231 Loc.dump(SourceMgr);
232 }
233
DumpMacro(const MacroInfo & MI) const234 void Preprocessor::DumpMacro(const MacroInfo &MI) const {
235 llvm::errs() << "MACRO: ";
236 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
237 DumpToken(MI.getReplacementToken(i));
238 llvm::errs() << " ";
239 }
240 llvm::errs() << "\n";
241 }
242
PrintStats()243 void Preprocessor::PrintStats() {
244 llvm::errs() << "\n*** Preprocessor Stats:\n";
245 llvm::errs() << NumDirectives << " directives found:\n";
246 llvm::errs() << " " << NumDefined << " #define.\n";
247 llvm::errs() << " " << NumUndefined << " #undef.\n";
248 llvm::errs() << " #include/#include_next/#import:\n";
249 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n";
250 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n";
251 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n";
252 llvm::errs() << " " << NumElse << " #else/#elif.\n";
253 llvm::errs() << " " << NumEndif << " #endif.\n";
254 llvm::errs() << " " << NumPragma << " #pragma.\n";
255 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
256
257 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
258 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
259 << NumFastMacroExpanded << " on the fast path.\n";
260 llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
261 << " token paste (##) operations performed, "
262 << NumFastTokenPaste << " on the fast path.\n";
263
264 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
265
266 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory();
267 llvm::errs() << "\n Macro Expanded Tokens: "
268 << llvm::capacity_in_bytes(MacroExpandedTokens);
269 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity();
270 // FIXME: List information for all submodules.
271 llvm::errs() << "\n Macros: "
272 << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
273 llvm::errs() << "\n #pragma push_macro Info: "
274 << llvm::capacity_in_bytes(PragmaPushMacroInfo);
275 llvm::errs() << "\n Poison Reasons: "
276 << llvm::capacity_in_bytes(PoisonReasons);
277 llvm::errs() << "\n Comment Handlers: "
278 << llvm::capacity_in_bytes(CommentHandlers) << "\n";
279 }
280
281 Preprocessor::macro_iterator
macro_begin(bool IncludeExternalMacros) const282 Preprocessor::macro_begin(bool IncludeExternalMacros) const {
283 if (IncludeExternalMacros && ExternalSource &&
284 !ReadMacrosFromExternalSource) {
285 ReadMacrosFromExternalSource = true;
286 ExternalSource->ReadDefinedMacros();
287 }
288
289 // Make sure we cover all macros in visible modules.
290 for (const ModuleMacro &Macro : ModuleMacros)
291 CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
292
293 return CurSubmoduleState->Macros.begin();
294 }
295
getTotalMemory() const296 size_t Preprocessor::getTotalMemory() const {
297 return BP.getTotalMemory()
298 + llvm::capacity_in_bytes(MacroExpandedTokens)
299 + Predefines.capacity() /* Predefines buffer. */
300 // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
301 // and ModuleMacros.
302 + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
303 + llvm::capacity_in_bytes(PragmaPushMacroInfo)
304 + llvm::capacity_in_bytes(PoisonReasons)
305 + llvm::capacity_in_bytes(CommentHandlers);
306 }
307
308 Preprocessor::macro_iterator
macro_end(bool IncludeExternalMacros) const309 Preprocessor::macro_end(bool IncludeExternalMacros) const {
310 if (IncludeExternalMacros && ExternalSource &&
311 !ReadMacrosFromExternalSource) {
312 ReadMacrosFromExternalSource = true;
313 ExternalSource->ReadDefinedMacros();
314 }
315
316 return CurSubmoduleState->Macros.end();
317 }
318
319 /// \brief Compares macro tokens with a specified token value sequence.
MacroDefinitionEquals(const MacroInfo * MI,ArrayRef<TokenValue> Tokens)320 static bool MacroDefinitionEquals(const MacroInfo *MI,
321 ArrayRef<TokenValue> Tokens) {
322 return Tokens.size() == MI->getNumTokens() &&
323 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
324 }
325
getLastMacroWithSpelling(SourceLocation Loc,ArrayRef<TokenValue> Tokens) const326 StringRef Preprocessor::getLastMacroWithSpelling(
327 SourceLocation Loc,
328 ArrayRef<TokenValue> Tokens) const {
329 SourceLocation BestLocation;
330 StringRef BestSpelling;
331 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
332 I != E; ++I) {
333 const MacroDirective::DefInfo
334 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
335 if (!Def || !Def.getMacroInfo())
336 continue;
337 if (!Def.getMacroInfo()->isObjectLike())
338 continue;
339 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
340 continue;
341 SourceLocation Location = Def.getLocation();
342 // Choose the macro defined latest.
343 if (BestLocation.isInvalid() ||
344 (Location.isValid() &&
345 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
346 BestLocation = Location;
347 BestSpelling = I->first->getName();
348 }
349 }
350 return BestSpelling;
351 }
352
recomputeCurLexerKind()353 void Preprocessor::recomputeCurLexerKind() {
354 if (CurLexer)
355 CurLexerKind = CLK_Lexer;
356 else if (CurPTHLexer)
357 CurLexerKind = CLK_PTHLexer;
358 else if (CurTokenLexer)
359 CurLexerKind = CLK_TokenLexer;
360 else
361 CurLexerKind = CLK_CachingLexer;
362 }
363
SetCodeCompletionPoint(const FileEntry * File,unsigned CompleteLine,unsigned CompleteColumn)364 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
365 unsigned CompleteLine,
366 unsigned CompleteColumn) {
367 assert(File);
368 assert(CompleteLine && CompleteColumn && "Starts from 1:1");
369 assert(!CodeCompletionFile && "Already set");
370
371 using llvm::MemoryBuffer;
372
373 // Load the actual file's contents.
374 bool Invalid = false;
375 const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
376 if (Invalid)
377 return true;
378
379 // Find the byte position of the truncation point.
380 const char *Position = Buffer->getBufferStart();
381 for (unsigned Line = 1; Line < CompleteLine; ++Line) {
382 for (; *Position; ++Position) {
383 if (*Position != '\r' && *Position != '\n')
384 continue;
385
386 // Eat \r\n or \n\r as a single line.
387 if ((Position[1] == '\r' || Position[1] == '\n') &&
388 Position[0] != Position[1])
389 ++Position;
390 ++Position;
391 break;
392 }
393 }
394
395 Position += CompleteColumn - 1;
396
397 // If pointing inside the preamble, adjust the position at the beginning of
398 // the file after the preamble.
399 if (SkipMainFilePreamble.first &&
400 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
401 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
402 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
403 }
404
405 if (Position > Buffer->getBufferEnd())
406 Position = Buffer->getBufferEnd();
407
408 CodeCompletionFile = File;
409 CodeCompletionOffset = Position - Buffer->getBufferStart();
410
411 std::unique_ptr<MemoryBuffer> NewBuffer =
412 MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
413 Buffer->getBufferIdentifier());
414 char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
415 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
416 *NewPos = '\0';
417 std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
418 SourceMgr.overrideFileContents(File, std::move(NewBuffer));
419
420 return false;
421 }
422
CodeCompleteNaturalLanguage()423 void Preprocessor::CodeCompleteNaturalLanguage() {
424 if (CodeComplete)
425 CodeComplete->CodeCompleteNaturalLanguage();
426 setCodeCompletionReached();
427 }
428
429 /// getSpelling - This method is used to get the spelling of a token into a
430 /// SmallVector. Note that the returned StringRef may not point to the
431 /// supplied buffer if a copy can be avoided.
getSpelling(const Token & Tok,SmallVectorImpl<char> & Buffer,bool * Invalid) const432 StringRef Preprocessor::getSpelling(const Token &Tok,
433 SmallVectorImpl<char> &Buffer,
434 bool *Invalid) const {
435 // NOTE: this has to be checked *before* testing for an IdentifierInfo.
436 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
437 // Try the fast path.
438 if (const IdentifierInfo *II = Tok.getIdentifierInfo())
439 return II->getName();
440 }
441
442 // Resize the buffer if we need to copy into it.
443 if (Tok.needsCleaning())
444 Buffer.resize(Tok.getLength());
445
446 const char *Ptr = Buffer.data();
447 unsigned Len = getSpelling(Tok, Ptr, Invalid);
448 return StringRef(Ptr, Len);
449 }
450
451 /// CreateString - Plop the specified string into a scratch buffer and return a
452 /// location for it. If specified, the source location provides a source
453 /// location for the token.
CreateString(StringRef Str,Token & Tok,SourceLocation ExpansionLocStart,SourceLocation ExpansionLocEnd)454 void Preprocessor::CreateString(StringRef Str, Token &Tok,
455 SourceLocation ExpansionLocStart,
456 SourceLocation ExpansionLocEnd) {
457 Tok.setLength(Str.size());
458
459 const char *DestPtr;
460 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
461
462 if (ExpansionLocStart.isValid())
463 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
464 ExpansionLocEnd, Str.size());
465 Tok.setLocation(Loc);
466
467 // If this is a raw identifier or a literal token, set the pointer data.
468 if (Tok.is(tok::raw_identifier))
469 Tok.setRawIdentifierData(DestPtr);
470 else if (Tok.isLiteral())
471 Tok.setLiteralData(DestPtr);
472 }
473
getCurrentModule()474 Module *Preprocessor::getCurrentModule() {
475 if (getLangOpts().CurrentModule.empty())
476 return nullptr;
477
478 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
479 }
480
481 //===----------------------------------------------------------------------===//
482 // Preprocessor Initialization Methods
483 //===----------------------------------------------------------------------===//
484
485
486 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
487 /// which implicitly adds the builtin defines etc.
EnterMainSourceFile()488 void Preprocessor::EnterMainSourceFile() {
489 // We do not allow the preprocessor to reenter the main file. Doing so will
490 // cause FileID's to accumulate information from both runs (e.g. #line
491 // information) and predefined macros aren't guaranteed to be set properly.
492 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
493 FileID MainFileID = SourceMgr.getMainFileID();
494
495 // If MainFileID is loaded it means we loaded an AST file, no need to enter
496 // a main file.
497 if (!SourceMgr.isLoadedFileID(MainFileID)) {
498 // Enter the main file source buffer.
499 EnterSourceFile(MainFileID, nullptr, SourceLocation());
500
501 // If we've been asked to skip bytes in the main file (e.g., as part of a
502 // precompiled preamble), do so now.
503 if (SkipMainFilePreamble.first > 0)
504 CurLexer->SkipBytes(SkipMainFilePreamble.first,
505 SkipMainFilePreamble.second);
506
507 // Tell the header info that the main file was entered. If the file is later
508 // #imported, it won't be re-entered.
509 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
510 HeaderInfo.IncrementIncludeCount(FE);
511 }
512
513 // Preprocess Predefines to populate the initial preprocessor state.
514 std::unique_ptr<llvm::MemoryBuffer> SB =
515 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
516 assert(SB && "Cannot create predefined source buffer");
517 FileID FID = SourceMgr.createFileID(std::move(SB));
518 assert(!FID.isInvalid() && "Could not create FileID for predefines?");
519 setPredefinesFileID(FID);
520
521 // Start parsing the predefines.
522 EnterSourceFile(FID, nullptr, SourceLocation());
523 }
524
EndSourceFile()525 void Preprocessor::EndSourceFile() {
526 // Notify the client that we reached the end of the source file.
527 if (Callbacks)
528 Callbacks->EndOfMainFile();
529 }
530
531 //===----------------------------------------------------------------------===//
532 // Lexer Event Handling.
533 //===----------------------------------------------------------------------===//
534
535 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
536 /// identifier information for the token and install it into the token,
537 /// updating the token kind accordingly.
LookUpIdentifierInfo(Token & Identifier) const538 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
539 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
540
541 // Look up this token, see if it is a macro, or if it is a language keyword.
542 IdentifierInfo *II;
543 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
544 // No cleaning needed, just use the characters from the lexed buffer.
545 II = getIdentifierInfo(Identifier.getRawIdentifier());
546 } else {
547 // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
548 SmallString<64> IdentifierBuffer;
549 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
550
551 if (Identifier.hasUCN()) {
552 SmallString<64> UCNIdentifierBuffer;
553 expandUCNs(UCNIdentifierBuffer, CleanedStr);
554 II = getIdentifierInfo(UCNIdentifierBuffer);
555 } else {
556 II = getIdentifierInfo(CleanedStr);
557 }
558 }
559
560 // Update the token info (identifier info and appropriate token kind).
561 Identifier.setIdentifierInfo(II);
562 Identifier.setKind(II->getTokenID());
563
564 return II;
565 }
566
SetPoisonReason(IdentifierInfo * II,unsigned DiagID)567 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
568 PoisonReasons[II] = DiagID;
569 }
570
PoisonSEHIdentifiers(bool Poison)571 void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
572 assert(Ident__exception_code && Ident__exception_info);
573 assert(Ident___exception_code && Ident___exception_info);
574 Ident__exception_code->setIsPoisoned(Poison);
575 Ident___exception_code->setIsPoisoned(Poison);
576 Ident_GetExceptionCode->setIsPoisoned(Poison);
577 Ident__exception_info->setIsPoisoned(Poison);
578 Ident___exception_info->setIsPoisoned(Poison);
579 Ident_GetExceptionInfo->setIsPoisoned(Poison);
580 Ident__abnormal_termination->setIsPoisoned(Poison);
581 Ident___abnormal_termination->setIsPoisoned(Poison);
582 Ident_AbnormalTermination->setIsPoisoned(Poison);
583 }
584
HandlePoisonedIdentifier(Token & Identifier)585 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
586 assert(Identifier.getIdentifierInfo() &&
587 "Can't handle identifiers without identifier info!");
588 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
589 PoisonReasons.find(Identifier.getIdentifierInfo());
590 if(it == PoisonReasons.end())
591 Diag(Identifier, diag::err_pp_used_poisoned_id);
592 else
593 Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
594 }
595
596 /// \brief Returns a diagnostic message kind for reporting a future keyword as
597 /// appropriate for the identifier and specified language.
getFutureCompatDiagKind(const IdentifierInfo & II,const LangOptions & LangOpts)598 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
599 const LangOptions &LangOpts) {
600 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
601
602 if (LangOpts.CPlusPlus)
603 return llvm::StringSwitch<diag::kind>(II.getName())
604 #define CXX11_KEYWORD(NAME, FLAGS) \
605 .Case(#NAME, diag::warn_cxx11_keyword)
606 #include "clang/Basic/TokenKinds.def"
607 ;
608
609 llvm_unreachable(
610 "Keyword not known to come from a newer Standard or proposed Standard");
611 }
612
613 /// HandleIdentifier - This callback is invoked when the lexer reads an
614 /// identifier. This callback looks up the identifier in the map and/or
615 /// potentially macro expands it or turns it into a named token (like 'for').
616 ///
617 /// Note that callers of this method are guarded by checking the
618 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the
619 /// IdentifierInfo methods that compute these properties will need to change to
620 /// match.
HandleIdentifier(Token & Identifier)621 bool Preprocessor::HandleIdentifier(Token &Identifier) {
622 assert(Identifier.getIdentifierInfo() &&
623 "Can't handle identifiers without identifier info!");
624
625 IdentifierInfo &II = *Identifier.getIdentifierInfo();
626
627 // If the information about this identifier is out of date, update it from
628 // the external source.
629 // We have to treat __VA_ARGS__ in a special way, since it gets
630 // serialized with isPoisoned = true, but our preprocessor may have
631 // unpoisoned it if we're defining a C99 macro.
632 if (II.isOutOfDate()) {
633 bool CurrentIsPoisoned = false;
634 if (&II == Ident__VA_ARGS__)
635 CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();
636
637 ExternalSource->updateOutOfDateIdentifier(II);
638 Identifier.setKind(II.getTokenID());
639
640 if (&II == Ident__VA_ARGS__)
641 II.setIsPoisoned(CurrentIsPoisoned);
642 }
643
644 // If this identifier was poisoned, and if it was not produced from a macro
645 // expansion, emit an error.
646 if (II.isPoisoned() && CurPPLexer) {
647 HandlePoisonedIdentifier(Identifier);
648 }
649
650 // If this is a macro to be expanded, do it.
651 if (MacroDefinition MD = getMacroDefinition(&II)) {
652 auto *MI = MD.getMacroInfo();
653 assert(MI && "macro definition with no macro info?");
654 if (!DisableMacroExpansion) {
655 if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
656 // C99 6.10.3p10: If the preprocessing token immediately after the
657 // macro name isn't a '(', this macro should not be expanded.
658 if (!MI->isFunctionLike() || isNextPPTokenLParen())
659 return HandleMacroExpandedIdentifier(Identifier, MD);
660 } else {
661 // C99 6.10.3.4p2 says that a disabled macro may never again be
662 // expanded, even if it's in a context where it could be expanded in the
663 // future.
664 Identifier.setFlag(Token::DisableExpand);
665 if (MI->isObjectLike() || isNextPPTokenLParen())
666 Diag(Identifier, diag::pp_disabled_macro_expansion);
667 }
668 }
669 }
670
671 // If this identifier is a keyword in a newer Standard or proposed Standard,
672 // produce a warning. Don't warn if we're not considering macro expansion,
673 // since this identifier might be the name of a macro.
674 // FIXME: This warning is disabled in cases where it shouldn't be, like
675 // "#define constexpr constexpr", "int constexpr;"
676 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
677 Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
678 << II.getName();
679 // Don't diagnose this keyword again in this translation unit.
680 II.setIsFutureCompatKeyword(false);
681 }
682
683 // C++ 2.11p2: If this is an alternative representation of a C++ operator,
684 // then we act as if it is the actual operator and not the textual
685 // representation of it.
686 if (II.isCPlusPlusOperatorKeyword())
687 Identifier.setIdentifierInfo(nullptr);
688
689 // If this is an extension token, diagnose its use.
690 // We avoid diagnosing tokens that originate from macro definitions.
691 // FIXME: This warning is disabled in cases where it shouldn't be,
692 // like "#define TY typeof", "TY(1) x".
693 if (II.isExtensionToken() && !DisableMacroExpansion)
694 Diag(Identifier, diag::ext_token_used);
695
696 // If this is the 'import' contextual keyword following an '@', note
697 // that the next token indicates a module name.
698 //
699 // Note that we do not treat 'import' as a contextual
700 // keyword when we're in a caching lexer, because caching lexers only get
701 // used in contexts where import declarations are disallowed.
702 if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs &&
703 !DisableMacroExpansion &&
704 (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
705 CurLexerKind != CLK_CachingLexer) {
706 ModuleImportLoc = Identifier.getLocation();
707 ModuleImportPath.clear();
708 ModuleImportExpectsIdentifier = true;
709 CurLexerKind = CLK_LexAfterModuleImport;
710 }
711 return true;
712 }
713
Lex(Token & Result)714 void Preprocessor::Lex(Token &Result) {
715 // We loop here until a lex function retuns a token; this avoids recursion.
716 bool ReturnedToken;
717 do {
718 switch (CurLexerKind) {
719 case CLK_Lexer:
720 ReturnedToken = CurLexer->Lex(Result);
721 break;
722 case CLK_PTHLexer:
723 ReturnedToken = CurPTHLexer->Lex(Result);
724 break;
725 case CLK_TokenLexer:
726 ReturnedToken = CurTokenLexer->Lex(Result);
727 break;
728 case CLK_CachingLexer:
729 CachingLex(Result);
730 ReturnedToken = true;
731 break;
732 case CLK_LexAfterModuleImport:
733 LexAfterModuleImport(Result);
734 ReturnedToken = true;
735 break;
736 }
737 } while (!ReturnedToken);
738
739 LastTokenWasAt = Result.is(tok::at);
740 }
741
742
743 /// \brief Lex a token following the 'import' contextual keyword.
744 ///
LexAfterModuleImport(Token & Result)745 void Preprocessor::LexAfterModuleImport(Token &Result) {
746 // Figure out what kind of lexer we actually have.
747 recomputeCurLexerKind();
748
749 // Lex the next token.
750 Lex(Result);
751
752 // The token sequence
753 //
754 // import identifier (. identifier)*
755 //
756 // indicates a module import directive. We already saw the 'import'
757 // contextual keyword, so now we're looking for the identifiers.
758 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
759 // We expected to see an identifier here, and we did; continue handling
760 // identifiers.
761 ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
762 Result.getLocation()));
763 ModuleImportExpectsIdentifier = false;
764 CurLexerKind = CLK_LexAfterModuleImport;
765 return;
766 }
767
768 // If we're expecting a '.' or a ';', and we got a '.', then wait until we
769 // see the next identifier.
770 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
771 ModuleImportExpectsIdentifier = true;
772 CurLexerKind = CLK_LexAfterModuleImport;
773 return;
774 }
775
776 // If we have a non-empty module path, load the named module.
777 if (!ModuleImportPath.empty()) {
778 Module *Imported = nullptr;
779 if (getLangOpts().Modules) {
780 Imported = TheModuleLoader.loadModule(ModuleImportLoc,
781 ModuleImportPath,
782 Module::Hidden,
783 /*IsIncludeDirective=*/false);
784 if (Imported)
785 makeModuleVisible(Imported, ModuleImportLoc);
786 }
787 if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
788 Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
789 }
790 }
791
makeModuleVisible(Module * M,SourceLocation Loc)792 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
793 CurSubmoduleState->VisibleModules.setVisible(
794 M, Loc, [](Module *) {},
795 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
796 // FIXME: Include the path in the diagnostic.
797 // FIXME: Include the import location for the conflicting module.
798 Diag(ModuleImportLoc, diag::warn_module_conflict)
799 << Path[0]->getFullModuleName()
800 << Conflict->getFullModuleName()
801 << Message;
802 });
803
804 // Add this module to the imports list of the currently-built submodule.
805 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
806 BuildingSubmoduleStack.back().M->Imports.insert(M);
807 }
808
FinishLexStringLiteral(Token & Result,std::string & String,const char * DiagnosticTag,bool AllowMacroExpansion)809 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
810 const char *DiagnosticTag,
811 bool AllowMacroExpansion) {
812 // We need at least one string literal.
813 if (Result.isNot(tok::string_literal)) {
814 Diag(Result, diag::err_expected_string_literal)
815 << /*Source='in...'*/0 << DiagnosticTag;
816 return false;
817 }
818
819 // Lex string literal tokens, optionally with macro expansion.
820 SmallVector<Token, 4> StrToks;
821 do {
822 StrToks.push_back(Result);
823
824 if (Result.hasUDSuffix())
825 Diag(Result, diag::err_invalid_string_udl);
826
827 if (AllowMacroExpansion)
828 Lex(Result);
829 else
830 LexUnexpandedToken(Result);
831 } while (Result.is(tok::string_literal));
832
833 // Concatenate and parse the strings.
834 StringLiteralParser Literal(StrToks, *this);
835 assert(Literal.isAscii() && "Didn't allow wide strings in");
836
837 if (Literal.hadError)
838 return false;
839
840 if (Literal.Pascal) {
841 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
842 << /*Source='in...'*/0 << DiagnosticTag;
843 return false;
844 }
845
846 String = Literal.GetString();
847 return true;
848 }
849
parseSimpleIntegerLiteral(Token & Tok,uint64_t & Value)850 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
851 assert(Tok.is(tok::numeric_constant));
852 SmallString<8> IntegerBuffer;
853 bool NumberInvalid = false;
854 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
855 if (NumberInvalid)
856 return false;
857 NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
858 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
859 return false;
860 llvm::APInt APVal(64, 0);
861 if (Literal.GetIntegerValue(APVal))
862 return false;
863 Lex(Tok);
864 Value = APVal.getLimitedValue();
865 return true;
866 }
867
addCommentHandler(CommentHandler * Handler)868 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
869 assert(Handler && "NULL comment handler");
870 assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
871 CommentHandlers.end() && "Comment handler already registered");
872 CommentHandlers.push_back(Handler);
873 }
874
removeCommentHandler(CommentHandler * Handler)875 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
876 std::vector<CommentHandler *>::iterator Pos
877 = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
878 assert(Pos != CommentHandlers.end() && "Comment handler not registered");
879 CommentHandlers.erase(Pos);
880 }
881
HandleComment(Token & result,SourceRange Comment)882 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
883 bool AnyPendingTokens = false;
884 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
885 HEnd = CommentHandlers.end();
886 H != HEnd; ++H) {
887 if ((*H)->HandleComment(*this, Comment))
888 AnyPendingTokens = true;
889 }
890 if (!AnyPendingTokens || getCommentRetentionState())
891 return false;
892 Lex(result);
893 return true;
894 }
895
~ModuleLoader()896 ModuleLoader::~ModuleLoader() { }
897
~CommentHandler()898 CommentHandler::~CommentHandler() { }
899
~CodeCompletionHandler()900 CodeCompletionHandler::~CodeCompletionHandler() { }
901
createPreprocessingRecord()902 void Preprocessor::createPreprocessingRecord() {
903 if (Record)
904 return;
905
906 Record = new PreprocessingRecord(getSourceManager());
907 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
908 }
909