clang 20.0.0git
ModuleDepCollector.cpp
Go to the documentation of this file.
1//===- ModuleDepCollector.cpp - Callbacks to collect deps -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/Support/BLAKE3.h"
17#include "llvm/Support/StringSaver.h"
18#include <optional>
19
20using namespace clang;
21using namespace tooling;
22using namespace dependencies;
23
24void ModuleDeps::forEachFileDep(llvm::function_ref<void(StringRef)> Cb) const {
25 SmallString<0> PathBuf;
26 PathBuf.reserve(256);
27 for (StringRef FileDep : FileDeps) {
28 auto ResolvedFileDep =
29 ASTReader::ResolveImportedPath(PathBuf, FileDep, FileDepsBaseDir);
30 Cb(*ResolvedFileDep);
31 }
32}
33
34const std::vector<std::string> &ModuleDeps::getBuildArguments() {
35 assert(!std::holds_alternative<std::monostate>(BuildInfo) &&
36 "Using uninitialized ModuleDeps");
37 if (const auto *CI = std::get_if<CowCompilerInvocation>(&BuildInfo))
38 BuildInfo = CI->getCC1CommandLine();
39 return std::get<std::vector<std::string>>(BuildInfo);
40}
41
42static void
45 const PrebuiltModuleVFSMapT &PrebuiltModuleVFSMap,
46 ScanningOptimizations OptimizeArgs) {
47 if (any(OptimizeArgs & ScanningOptimizations::HeaderSearch)) {
48 // Only preserve search paths that were used during the dependency scan.
49 std::vector<HeaderSearchOptions::Entry> Entries;
50 std::swap(Opts.UserEntries, Entries);
51
52 llvm::BitVector SearchPathUsage(Entries.size());
53 llvm::DenseSet<const serialization::ModuleFile *> Visited;
54 std::function<void(const serialization::ModuleFile *)> VisitMF =
55 [&](const serialization::ModuleFile *MF) {
56 SearchPathUsage |= MF->SearchPathUsage;
57 Visited.insert(MF);
59 if (!Visited.contains(Import))
60 VisitMF(Import);
61 };
62 VisitMF(&MF);
63
64 if (SearchPathUsage.size() != Entries.size())
65 llvm::report_fatal_error(
66 "Inconsistent search path options between modules detected");
67
68 for (auto Idx : SearchPathUsage.set_bits())
69 Opts.UserEntries.push_back(std::move(Entries[Idx]));
70 }
71 if (any(OptimizeArgs & ScanningOptimizations::VFS)) {
72 std::vector<std::string> VFSOverlayFiles;
73 std::swap(Opts.VFSOverlayFiles, VFSOverlayFiles);
74
75 llvm::BitVector VFSUsage(VFSOverlayFiles.size());
76 llvm::DenseSet<const serialization::ModuleFile *> Visited;
77 std::function<void(const serialization::ModuleFile *)> VisitMF =
78 [&](const serialization::ModuleFile *MF) {
79 Visited.insert(MF);
81 VFSUsage |= MF->VFSUsage;
82 // We only need to recurse into implicit modules. Other module types
83 // will have the correct set of VFSs for anything they depend on.
85 if (!Visited.contains(Import))
86 VisitMF(Import);
87 } else {
88 // This is not an implicitly built module, so it may have different
89 // VFS options. Fall back to a string comparison instead.
90 auto VFSMap = PrebuiltModuleVFSMap.find(MF->FileName);
91 if (VFSMap == PrebuiltModuleVFSMap.end())
92 return;
93 for (std::size_t I = 0, E = VFSOverlayFiles.size(); I != E; ++I) {
94 if (VFSMap->second.contains(VFSOverlayFiles[I]))
95 VFSUsage[I] = true;
96 }
97 }
98 };
99 VisitMF(&MF);
100
101 if (VFSUsage.size() != VFSOverlayFiles.size())
102 llvm::report_fatal_error(
103 "Inconsistent -ivfsoverlay options between modules detected");
104
105 for (auto Idx : VFSUsage.set_bits())
106 Opts.VFSOverlayFiles.push_back(std::move(VFSOverlayFiles[Idx]));
107 }
108}
109
111 bool IsSystemModule) {
112 // If this is not a system module or -Wsystem-headers was passed, don't
113 // optimize.
114 if (!IsSystemModule)
115 return;
116 bool Wsystem_headers = false;
117 for (StringRef Opt : Opts.Warnings) {
118 bool isPositive = !Opt.consume_front("no-");
119 if (Opt == "system-headers")
120 Wsystem_headers = isPositive;
121 }
122 if (Wsystem_headers)
123 return;
124
125 // Remove all warning flags. System modules suppress most, but not all,
126 // warnings.
127 Opts.Warnings.clear();
128 Opts.UndefPrefixes.clear();
129 Opts.Remarks.clear();
130}
131
132static std::vector<std::string> splitString(std::string S, char Separator) {
133 SmallVector<StringRef> Segments;
134 StringRef(S).split(Segments, Separator, /*MaxSplit=*/-1, /*KeepEmpty=*/false);
135 std::vector<std::string> Result;
136 Result.reserve(Segments.size());
137 for (StringRef Segment : Segments)
138 Result.push_back(Segment.str());
139 return Result;
140}
141
142void ModuleDepCollector::addOutputPaths(CowCompilerInvocation &CI,
143 ModuleDeps &Deps) {
148 Controller.lookupModuleOutput(
150 if (!CI.getDependencyOutputOpts().OutputFile.empty()) {
156 '\0');
157 if (!CI.getDependencyOutputOpts().OutputFile.empty() &&
158 CI.getDependencyOutputOpts().Targets.empty()) {
159 // Fallback to -o as dependency target, as in the driver.
162 CI.getMutDependencyOutputOpts().Targets.push_back(std::string(Target));
163 }
164 }
165}
166
168 const LangOptions &LangOpts,
169 CodeGenOptions &CGOpts) {
170 // TODO: Figure out better way to set options to their default value.
171 if (ProgramAction == frontend::GenerateModule) {
172 CGOpts.MainFileName.clear();
173 CGOpts.DwarfDebugFlags.clear();
174 }
175 if (ProgramAction == frontend::GeneratePCH ||
176 (ProgramAction == frontend::GenerateModule && !LangOpts.ModulesCodegen)) {
177 CGOpts.DebugCompilationDir.clear();
178 CGOpts.CoverageCompilationDir.clear();
179 CGOpts.CoverageDataFile.clear();
180 CGOpts.CoverageNotesFile.clear();
181 CGOpts.ProfileInstrumentUsePath.clear();
182 CGOpts.SampleProfileFile.clear();
183 CGOpts.ProfileRemappingFile.clear();
184 }
185}
186
191
192 // The scanner takes care to avoid passing non-affecting module maps to the
193 // explicit compiles. No need to do extra work just to find out there are no
194 // module map files to prune.
196
197 // Remove options incompatible with explicit module build or are likely to
198 // differ between identical modules discovered from different translation
199 // units.
200 CI.getFrontendOpts().Inputs.clear();
201 CI.getFrontendOpts().OutputFile.clear();
202 // LLVM options are not going to affect the AST
203 CI.getFrontendOpts().LLVMArgs.clear();
204
206 CI.getCodeGenOpts());
207
208 // Map output paths that affect behaviour to "-" so their existence is in the
209 // context hash. The final path will be computed in addOutputPaths.
212 if (!CI.getDependencyOutputOpts().OutputFile.empty())
214 CI.getDependencyOutputOpts().Targets.clear();
215
219 CI.getFrontendOpts().MTMigrateDir.clear();
220 CI.getLangOpts().ModuleName.clear();
221
222 // Remove any macro definitions that are explicitly ignored.
223 if (!CI.getHeaderSearchOpts().ModulesIgnoreMacros.empty()) {
224 llvm::erase_if(
226 [&CI](const std::pair<std::string, bool> &Def) {
227 StringRef MacroDef = Def.first;
228 return CI.getHeaderSearchOpts().ModulesIgnoreMacros.contains(
229 llvm::CachedHashString(MacroDef.split('=').first));
230 });
231 // Remove the now unused option.
233 }
234
235 return CI;
236}
237
239ModuleDepCollector::getInvocationAdjustedForModuleBuildWithoutOutputs(
240 const ModuleDeps &Deps,
241 llvm::function_ref<void(CowCompilerInvocation &)> Optimize) const {
242 CowCompilerInvocation CI = CommonInvocation;
243
246
247 // Inputs
248 InputKind ModuleMapInputKind(CI.getFrontendOpts().DashX.getLanguage(),
250 CI.getMutFrontendOpts().Inputs.emplace_back(Deps.ClangModuleMapFile,
251 ModuleMapInputKind);
252
253 auto CurrentModuleMapEntry =
255 assert(CurrentModuleMapEntry && "module map file entry not found");
256
257 // Remove directly passed modulemap files. They will get added back if they
258 // were actually used.
260
261 auto DepModuleMapFiles = collectModuleMapFiles(Deps.ClangModuleDeps);
262 for (StringRef ModuleMapFile : Deps.ModuleMapFileDeps) {
263 // TODO: Track these as `FileEntryRef` to simplify the equality check below.
264 auto ModuleMapEntry =
265 ScanInstance.getFileManager().getOptionalFileRef(ModuleMapFile);
266 assert(ModuleMapEntry && "module map file entry not found");
267
268 // Don't report module maps describing eagerly-loaded dependency. This
269 // information will be deserialized from the PCM.
270 // TODO: Verify this works fine when modulemap for module A is eagerly
271 // loaded from A.pcm, and module map passed on the command line contains
272 // definition of a submodule: "explicit module A.Private { ... }".
273 if (EagerLoadModules && DepModuleMapFiles.contains(*ModuleMapEntry))
274 continue;
275
276 // Don't report module map file of the current module unless it also
277 // describes a dependency (for symmetry).
278 if (*ModuleMapEntry == *CurrentModuleMapEntry &&
279 !DepModuleMapFiles.contains(*ModuleMapEntry))
280 continue;
281
282 CI.getMutFrontendOpts().ModuleMapFiles.emplace_back(ModuleMapFile);
283 }
284
285 // Report the prebuilt modules this module uses.
286 for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps)
287 CI.getMutFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile);
288
289 // Add module file inputs from dependencies.
290 addModuleFiles(CI, Deps.ClangModuleDeps);
291
293 // Apply -Wsystem-headers-in-module for the current module.
294 if (llvm::is_contained(CI.getDiagnosticOpts().SystemHeaderWarningsModules,
295 Deps.ID.ModuleName))
296 CI.getMutDiagnosticOpts().Warnings.push_back("system-headers");
297 // Remove the now unused option(s).
299 }
300
301 Optimize(CI);
302
303 return CI;
304}
305
306llvm::DenseSet<const FileEntry *> ModuleDepCollector::collectModuleMapFiles(
307 ArrayRef<ModuleID> ClangModuleDeps) const {
308 llvm::DenseSet<const FileEntry *> ModuleMapFiles;
309 for (const ModuleID &MID : ClangModuleDeps) {
310 ModuleDeps *MD = ModuleDepsByID.lookup(MID);
311 assert(MD && "Inconsistent dependency info");
312 // TODO: Track ClangModuleMapFile as `FileEntryRef`.
313 auto FE = ScanInstance.getFileManager().getOptionalFileRef(
315 assert(FE && "Missing module map file that was previously found");
316 ModuleMapFiles.insert(*FE);
317 }
318 return ModuleMapFiles;
319}
320
321void ModuleDepCollector::addModuleMapFiles(
322 CompilerInvocation &CI, ArrayRef<ModuleID> ClangModuleDeps) const {
323 if (EagerLoadModules)
324 return; // Only pcm is needed for eager load.
325
326 for (const ModuleID &MID : ClangModuleDeps) {
327 ModuleDeps *MD = ModuleDepsByID.lookup(MID);
328 assert(MD && "Inconsistent dependency info");
330 }
331}
332
333void ModuleDepCollector::addModuleFiles(
334 CompilerInvocation &CI, ArrayRef<ModuleID> ClangModuleDeps) const {
335 for (const ModuleID &MID : ClangModuleDeps) {
336 std::string PCMPath =
338 if (EagerLoadModules)
339 CI.getFrontendOpts().ModuleFiles.push_back(std::move(PCMPath));
340 else
342 {MID.ModuleName, std::move(PCMPath)});
343 }
344}
345
346void ModuleDepCollector::addModuleFiles(
347 CowCompilerInvocation &CI, ArrayRef<ModuleID> ClangModuleDeps) const {
348 for (const ModuleID &MID : ClangModuleDeps) {
349 std::string PCMPath =
351 if (EagerLoadModules)
352 CI.getMutFrontendOpts().ModuleFiles.push_back(std::move(PCMPath));
353 else
355 {MID.ModuleName, std::move(PCMPath)});
356 }
357}
358
360 switch (FIF.getKind().getLanguage()) {
362 case Language::Asm:
364 return false;
365 default:
366 return true;
367 }
368}
369
373 CI.getLangOpts(), CI.getCodeGenOpts());
374
375 if (llvm::any_of(CI.getFrontendOpts().Inputs, needsModules)) {
376 Preprocessor &PP = ScanInstance.getPreprocessor();
377 if (Module *CurrentModule = PP.getCurrentModuleImplementation())
378 if (OptionalFileEntryRef CurrentModuleMap =
380 .getModuleMap()
381 .getModuleMapFileForUniquing(CurrentModule))
382 CI.getFrontendOpts().ModuleMapFiles.emplace_back(
383 CurrentModuleMap->getNameAsRequested());
384
385 SmallVector<ModuleID> DirectDeps;
386 for (const auto &KV : ModularDeps)
387 if (DirectModularDeps.contains(KV.first))
388 DirectDeps.push_back(KV.second->ID);
389
390 // TODO: Report module maps the same way it's done for modular dependencies.
391 addModuleMapFiles(CI, DirectDeps);
392
393 addModuleFiles(CI, DirectDeps);
394
395 for (const auto &KV : DirectPrebuiltModularDeps)
396 CI.getFrontendOpts().ModuleFiles.push_back(KV.second.PCMFile);
397 }
398}
399
400static std::string getModuleContextHash(const ModuleDeps &MD,
401 const CowCompilerInvocation &CI,
402 bool EagerLoadModules,
403 llvm::vfs::FileSystem &VFS) {
404 llvm::HashBuilder<llvm::TruncatedBLAKE3<16>, llvm::endianness::native>
405 HashBuilder;
406 SmallString<32> Scratch;
407
408 // Hash the compiler version and serialization version to ensure the module
409 // will be readable.
410 HashBuilder.add(getClangFullRepositoryVersion());
412 llvm::ErrorOr<std::string> CWD = VFS.getCurrentWorkingDirectory();
413 if (CWD)
414 HashBuilder.add(*CWD);
415
416 // Hash the BuildInvocation without any input files.
417 SmallString<0> ArgVec;
418 ArgVec.reserve(4096);
419 CI.generateCC1CommandLine([&](const Twine &Arg) {
420 Arg.toVector(ArgVec);
421 ArgVec.push_back('\0');
422 });
423 HashBuilder.add(ArgVec);
424
425 // Hash the module dependencies. These paths may differ even if the invocation
426 // is identical if they depend on the contents of the files in the TU -- for
427 // example, case-insensitive paths to modulemap files. Usually such a case
428 // would indicate a missed optimization to canonicalize, but it may be
429 // difficult to canonicalize all cases when there is a VFS.
430 for (const auto &ID : MD.ClangModuleDeps) {
431 HashBuilder.add(ID.ModuleName);
432 HashBuilder.add(ID.ContextHash);
433 }
434
435 HashBuilder.add(EagerLoadModules);
436
437 llvm::BLAKE3Result<16> Hash = HashBuilder.final();
438 std::array<uint64_t, 2> Words;
439 static_assert(sizeof(Hash) == sizeof(Words), "Hash must match Words");
440 std::memcpy(Words.data(), Hash.data(), sizeof(Hash));
441 return toString(llvm::APInt(sizeof(Words) * 8, Words), 36, /*Signed=*/false);
442}
443
444void ModuleDepCollector::associateWithContextHash(
445 const CowCompilerInvocation &CI, ModuleDeps &Deps) {
447 Deps, CI, EagerLoadModules, ScanInstance.getVirtualFileSystem());
448 bool Inserted = ModuleDepsByID.insert({Deps.ID, &Deps}).second;
449 (void)Inserted;
450 assert(Inserted && "duplicate module mapping");
451}
452
456 FileID PrevFID,
459 return;
460
461 // This has to be delayed as the context hash can change at the start of
462 // `CompilerInstance::ExecuteAction`.
463 if (MDC.ContextHash.empty()) {
464 MDC.ContextHash = MDC.ScanInstance.getInvocation().getModuleHash();
465 MDC.Consumer.handleContextHash(MDC.ContextHash);
466 }
467
468 SourceManager &SM = MDC.ScanInstance.getSourceManager();
469
470 // Dependency generation really does want to go all the way to the
471 // file entry for a source location to find out what is depended on.
472 // We do not want #line markers to affect dependency generation!
473 if (std::optional<StringRef> Filename = SM.getNonBuiltinFilenameForID(FID))
474 MDC.addFileDep(llvm::sys::path::remove_leading_dotslash(*Filename));
475}
476
478 SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
479 bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File,
480 StringRef SearchPath, StringRef RelativePath, const Module *SuggestedModule,
481 bool ModuleImported, SrcMgr::CharacteristicKind FileType) {
482 if (!File && !ModuleImported) {
483 // This is a non-modular include that HeaderSearch failed to find. Add it
484 // here as `FileChanged` will never see it.
485 MDC.addFileDep(FileName);
486 }
487 handleImport(SuggestedModule);
488}
489
492 const Module *Imported) {
493 if (MDC.ScanInstance.getPreprocessor().isInImportingCXXNamedModules()) {
494 P1689ModuleInfo RequiredModule;
495 RequiredModule.ModuleName = Path[0].first->getName().str();
497 MDC.RequiredStdCXXModules.push_back(RequiredModule);
498 return;
499 }
500
501 handleImport(Imported);
502}
503
504void ModuleDepCollectorPP::handleImport(const Module *Imported) {
505 if (!Imported)
506 return;
507
508 const Module *TopLevelModule = Imported->getTopLevelModule();
509
510 if (MDC.isPrebuiltModule(TopLevelModule))
511 MDC.DirectPrebuiltModularDeps.insert(
512 {TopLevelModule, PrebuiltModuleDep{TopLevelModule}});
513 else
514 MDC.DirectModularDeps.insert(TopLevelModule);
515}
516
518 FileID MainFileID = MDC.ScanInstance.getSourceManager().getMainFileID();
519 MDC.MainFile = std::string(MDC.ScanInstance.getSourceManager()
520 .getFileEntryRefForID(MainFileID)
521 ->getName());
522
523 auto &PP = MDC.ScanInstance.getPreprocessor();
524 if (PP.isInNamedModule()) {
525 P1689ModuleInfo ProvidedModule;
526 ProvidedModule.ModuleName = PP.getNamedModuleName();
528 ProvidedModule.IsStdCXXModuleInterface = PP.isInNamedInterfaceUnit();
529 // Don't put implementation (non partition) unit as Provide.
530 // Put the module as required instead. Since the implementation
531 // unit will import the primary module implicitly.
532 if (PP.isInImplementationUnit())
533 MDC.RequiredStdCXXModules.push_back(ProvidedModule);
534 else
535 MDC.ProvidedStdCXXModule = ProvidedModule;
536 }
537
538 if (!MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude.empty())
539 MDC.addFileDep(MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude);
540
541 for (const Module *M :
542 MDC.ScanInstance.getPreprocessor().getAffectingClangModules())
543 if (!MDC.isPrebuiltModule(M))
544 MDC.DirectModularDeps.insert(M);
545
546 for (const Module *M : MDC.DirectModularDeps)
547 handleTopLevelModule(M);
548
549 MDC.Consumer.handleDependencyOutputOpts(*MDC.Opts);
550
551 if (MDC.IsStdModuleP1689Format)
553 MDC.ProvidedStdCXXModule, MDC.RequiredStdCXXModules);
554
555 for (auto &&I : MDC.ModularDeps)
556 MDC.Consumer.handleModuleDependency(*I.second);
557
558 for (const Module *M : MDC.DirectModularDeps) {
559 auto It = MDC.ModularDeps.find(M);
560 // Only report direct dependencies that were successfully handled.
561 if (It != MDC.ModularDeps.end())
562 MDC.Consumer.handleDirectModuleDependency(It->second->ID);
563 }
564
565 for (auto &&I : MDC.FileDeps)
566 MDC.Consumer.handleFileDependency(I);
567
568 for (auto &&I : MDC.DirectPrebuiltModularDeps)
569 MDC.Consumer.handlePrebuiltModuleDependency(I.second);
570}
571
572std::optional<ModuleID>
573ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
574 assert(M == M->getTopLevelModule() && "Expected top level module!");
575
576 // A top-level module might not be actually imported as a module when
577 // -fmodule-name is used to compile a translation unit that imports this
578 // module. In that case it can be skipped. The appropriate header
579 // dependencies will still be reported as expected.
580 if (!M->