10#include "mlir/IR/Attributes.h"
11#include "mlir/IR/BuiltinAttributeInterfaces.h"
12#include "mlir/IR/IRMapping.h"
13#include "mlir/IR/Location.h"
14#include "mlir/IR/Value.h"
32#include "llvm/ADT/StringRef.h"
33#include "llvm/ADT/TypeSwitch.h"
34#include "llvm/IR/Instructions.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/Support/MemoryBuffer.h"
37#include "llvm/Support/Path.h"
38#include "llvm/Support/VirtualFileSystem.h"
47#define GEN_PASS_DEF_LOWERINGPREPARE
48#include "clang/CIR/Dialect/Passes.h.inc"
52 SmallString<128> fileName;
54 if (mlirModule.getSymName())
55 fileName = llvm::sys::path::filename(mlirModule.getSymName()->str());
60 for (
size_t i = 0; i < fileName.size(); ++i) {
71struct LoweringPreparePass
72 :
public impl::LoweringPrepareBase<LoweringPreparePass> {
73 LoweringPreparePass() =
default;
83 LoweringPreparePass(
const LoweringPreparePass &other)
84 : impl::LoweringPrepareBase<LoweringPreparePass>(other) {}
86 void runOnOperation()
override;
88 void runOnOp(mlir::Operation *op);
89 void lowerCastOp(cir::CastOp op);
90 void lowerComplexDivOp(cir::ComplexDivOp op);
91 void lowerComplexMulOp(cir::ComplexMulOp op);
92 void lowerUnaryOp(cir::UnaryOpInterface op);
93 void lowerGetGlobalOp(cir::GetGlobalOp op);
94 void lowerGlobalOp(cir::GlobalOp op);
95 void lowerThreeWayCmpOp(cir::CmpThreeWayOp op);
96 void lowerArrayDtor(cir::ArrayDtor op);
97 void lowerArrayCtor(cir::ArrayCtor op);
98 void lowerTrivialCopyCall(cir::CallOp op);
99 void lowerStoreOfConstAggregate(cir::StoreOp op);
100 void lowerLocalInitOp(cir::LocalInitOp op);
105 cir::FuncOp getCalledFunction(cir::CallOp callOp);
114 cir::GlobalOp getOrCreateConstAggregateGlobal(CIRBaseBuilderTy &builder,
116 llvm::StringRef baseName,
118 mlir::TypedAttr constant);
121 cir::FuncOp buildCXXGlobalVarDeclInitFunc(cir::GlobalOp op);
124 void defineGlobalThreadLocalWrapper(cir::GlobalOp op, cir::FuncOp initAlias,
125 bool isVarDefinition);
127 cir::FuncOp defineGlobalThreadLocalInitAlias(cir::GlobalOp op,
128 cir::FuncOp aliasee);
130 cir::FuncOp getOrCreateThreadLocalWrapper(CIRBaseBuilderTy &builder,
136 cir::IfOp buildGlobalTlsGuardCheck(CIRBaseBuilderTy &builder,
137 mlir::Location loc, cir::GlobalOp guard);
139 cir::FuncOp getOrCreateDtorFunc(CIRBaseBuilderTy &builder, cir::GlobalOp op,
140 mlir::Region &dtorRegion,
141 cir::CallOp &dtorCall);
144 void buildCXXGlobalInitFunc();
147 void buildCXXGlobalTlsFunc();
150 void buildGlobalCtorDtorList();
152 cir::FuncOp buildRuntimeFunction(
153 mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc,
155 cir::GlobalLinkageKind linkage = cir::GlobalLinkageKind::ExternalLinkage);
157 cir::GlobalOp getOrCreateRuntimeVariable(
158 mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc,
160 cir::GlobalLinkageKind linkage = cir::GlobalLinkageKind::ExternalLinkage,
161 cir::VisibilityKind visibility = cir::VisibilityKind::Default);
167 llvm::StringMap<FuncOp> cudaKernelMap;
168 llvm::SmallVector<std::pair<cir::GlobalOp, cir::CUDAVarRegistrationInfoAttr>>
173 void buildCUDAModuleCtor();
174 std::optional<FuncOp> buildCUDAModuleDtor();
175 std::optional<FuncOp> buildHIPModuleDtor();
176 std::optional<FuncOp> buildCUDARegisterGlobals();
177 void buildCUDARegisterVars(cir::CIRBaseBuilderTy &builder,
178 FuncOp regGlobalFunc);
179 void buildCUDARegisterGlobalFunctions(cir::CIRBaseBuilderTy &builder,
180 FuncOp regGlobalFunc);
183 void handleStaticLocal(cir::GlobalOp globalOp, cir::LocalInitOp localInitOp);
192 cir::FuncOp getTlsInitFn();
195 cir::GlobalOp createGlobalThreadLocalGuard(CIRBaseBuilderTy &builder,
199 cir::GlobalOp createGuardGlobalOp(CIRBaseBuilderTy &builder,
200 mlir::Location loc, llvm::StringRef name,
201 cir::IntType guardTy,
202 cir::GlobalLinkageKind linkage);
205 cir::GlobalOp getStaticLocalDeclGuardAddress(llvm::StringRef globalSymName) {
206 auto it = staticLocalDeclGuardMap.find(globalSymName);
207 if (it != staticLocalDeclGuardMap.end())
213 void setStaticLocalDeclGuardAddress(llvm::StringRef globalSymName,
214 cir::GlobalOp guard) {
215 staticLocalDeclGuardMap[globalSymName] = guard;
219 cir::GlobalOp getOrCreateStaticLocalDeclGuardAddress(
220 CIRBaseBuilderTy &builder, cir::GlobalOp globalOp, StringRef guardName,
221 bool isLocalVarDecl,
bool useInt8GuardVariable) {
223 cir::CIRDataLayout dataLayout(mlirModule);
224 cir::IntType guardTy;
225 clang::CharUnits guardAlignment;
228 if (useInt8GuardVariable) {
229 guardTy = cir::IntType::get(&getContext(), 8,
true);
231 }
else if (useARMGuardVarABI()) {
233 const unsigned sizeTypeSize =
234 astCtx->getTypeSize(astCtx->getSignedSizeType());
236 cir::IntType::get(&getContext(), sizeTypeSize,
true);
240 guardTy = cir::IntType::get(&getContext(), 64,
true);
244 assert(guardTy && guardAlignment.
getQuantity() != 0);
246 llvm::StringRef globalSymName = globalOp.getSymName();
247 cir::GlobalOp guard = getStaticLocalDeclGuardAddress(globalSymName);
250 guard = createGuardGlobalOp(builder, globalOp->getLoc(), guardName,
251 guardTy, globalOp.getLinkage());
252 guard.setInitialValueAttr(cir::IntAttr::get(guardTy, 0));
253 guard.setDSOLocal(globalOp.getDsoLocal());
254 guard.setAlignment(guardAlignment.
getAsAlign().value());
255 guard.setTlsModel(globalOp.getTlsModel());
261 bool hasComdat = globalOp.getComdat();
262 const llvm::Triple &triple = astCtx->getTargetInfo().getTriple();
265 if (!isLocalVarDecl && hasComdat &&
266 (triple.isOSBinFormatELF() || triple.isOSBinFormatWasm())) {
268 guard.setComdat(
true);
269 }
else if (hasComdat && globalOp.isWeakForLinker()) {
270 guard.setComdat(
true);
273 setStaticLocalDeclGuardAddress(globalSymName, guard);
282 clang::ASTContext *astCtx;
285 mlir::ModuleOp mlirModule;
305 mlir::SymbolTableCollection symbolTables;
308 llvm::StringMap<uint32_t> dynamicInitializerNames;
309 llvm::SmallVector<cir::FuncOp> dynamicInitializers;
310 llvm::SmallVector<cir::FuncOp> globalThreadLocalInitializers;
311 llvm::StringMap<cir::FuncOp> threadLocalWrappers;
312 llvm::StringMap<cir::FuncOp> threadLocalInitAliases;
315 llvm::StringMap<cir::GlobalOp> staticLocalDeclGuardMap;
317 llvm::StringMap<llvm::SmallVector<cir::GlobalOp, 1>> constAggregateGlobals;
320 llvm::SmallVector<std::pair<std::string, uint32_t>, 4> globalCtorList;
322 llvm::SmallVector<std::pair<std::string, uint32_t>, 4> globalDtorList;
326 bool useARMGuardVarABI()
const {
327 switch (astCtx->getCXXABIKind()) {
328 case clang::TargetCXXABI::GenericARM:
329 case clang::TargetCXXABI::iOS:
330 case clang::TargetCXXABI::WatchOS:
331 case clang::TargetCXXABI::GenericAArch64:
332 case clang::TargetCXXABI::WebAssembly:
339 void emitGlobalGuardedDtorRegion(CIRBaseBuilderTy &builder,
340 cir::GlobalOp global,
341 mlir::Region &dtorRegion,
bool tls,
342 mlir::Block &entryBB) {
344 builder.setInsertionPointToStart(&mlirModule.getBodyRegion().front());
345 cir::GlobalOp handle = getOrCreateRuntimeVariable(
346 builder,
"__dso_handle", global.getLoc(), builder.getI8Type(),
347 cir::GlobalLinkageKind::ExternalLinkage, cir::VisibilityKind::Hidden);
353 cir::CallOp dtorCall;
354 cir::FuncOp dtorFunc =
355 getOrCreateDtorFunc(builder, global, dtorRegion, dtorCall);
360 cir::PointerType voidFnPtrTy = builder.
getVoidFnPtrTy({voidPtrTy});
361 cir::PointerType handlePtrTy = builder.
getPointerTo(handle.getSymType());
363 builder.
getVoidFnTy({voidFnPtrTy, voidPtrTy, handlePtrTy});
365 llvm::StringLiteral nameAtExit =
"__cxa_atexit";
367 nameAtExit = astCtx->getTargetInfo().getTriple().isOSDarwin()
368 ? llvm::StringLiteral(
"_tlv_atexit")
369 : llvm::StringLiteral(
"__cxa_thread_atexit");
371 cir::FuncOp fnAtExit = buildRuntimeFunction(builder, nameAtExit,
372 global.getLoc(), fnAtExitType);
376 builder.setInsertionPointAfter(dtorCall);
378 auto dtorPtrTy = cir::PointerType::get(dtorFunc.getFunctionType());
379 args[0] = cir::GetGlobalOp::create(builder, dtorCall.getLoc(), dtorPtrTy,
380 dtorFunc.getSymName());
381 args[0] = cir::CastOp::create(builder, dtorCall.getLoc(), voidFnPtrTy,
382 cir::CastKind::bitcast, args[0]);
384 cir::CastOp::create(builder, dtorCall.getLoc(), voidPtrTy,
385 cir::CastKind::bitcast, dtorCall.getArgOperand(0));
386 args[2] = cir::GetGlobalOp::create(builder, handle.getLoc(), handlePtrTy,
387 handle.getSymName());
388 builder.
createCallOp(dtorCall.getLoc(), fnAtExit, args);
390 mlir::Block &dtorBlock = dtorRegion.front();
391 entryBB.getOperations().splice(entryBB.end(), dtorBlock.getOperations(),
393 std::prev(dtorBlock.end()));
396 builder.setInsertionPointToEnd(&entryBB);
402 void emitCXXGuardedInitIf(CIRBaseBuilderTy &builder, cir::GlobalOp globalOp,
403 mlir::Region &ctorRegion, mlir::Region &dtorRegion,
404 cir::ASTVarDeclInterface varDecl,
405 mlir::Value guardPtr, cir::PointerType guardPtrTy,
407 auto loc = globalOp->getLoc();
427 mlir::Block *insertBlock = builder.getInsertionBlock();
428 if (!ctorRegion.empty()) {
429 assert(ctorRegion.hasOneBlock() &&
"Enforced by MaxSizedRegion<1>");
431 mlir::Block &block = ctorRegion.front();
432 insertBlock->getOperations().splice(
433 insertBlock->end(), block.getOperations(), block.begin(),
434 std::prev(block.end()));
437 if (!dtorRegion.empty()) {
438 assert(dtorRegion.hasOneBlock() &&
"Enforced by MaxSizedRegion<1>");
440 emitGlobalGuardedDtorRegion(builder, globalOp, dtorRegion, !threadsafe,
443 builder.setInsertionPointToEnd(insertBlock);
444 ctorRegion.getBlocks().clear();
452 mlir::Value acquireResult = acquireCall.getResult();
455 loc, mlir::cast<cir::IntType>(acquireResult.getType()), 0);
456 auto shouldInit = builder.
createCompare(loc, cir::CmpOpKind::ne,
457 acquireResult, acquireZero);
462 cir::IfOp::create(builder, loc, shouldInit,
false,
463 [](mlir::OpBuilder &, mlir::Location) {});
464 mlir::OpBuilder::InsertionGuard insertGuard(builder);
465 builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
479 mlir::ValueRange{guardPtr});
482 }
else if (!
varDecl.isLocalVarDecl()) {
488 globalOp->emitError(
"NYI: non-threadsafe init for non-local variables");
503 void setASTContext(clang::ASTContext *
c) { astCtx =
c; }
508cir::GlobalOp LoweringPreparePass::getOrCreateRuntimeVariable(
509 mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc,
510 mlir::Type type, cir::GlobalLinkageKind linkage,
511 cir::VisibilityKind visibility) {
512 cir::GlobalOp g = dyn_cast_or_null<cir::GlobalOp>(
513 mlir::SymbolTable::lookupNearestSymbolFrom(
514 mlirModule, mlir::StringAttr::get(mlirModule->getContext(), name)));
516 g = cir::GlobalOp::create(builder, loc, name, type);
518 cir::GlobalLinkageKindAttr::get(builder.getContext(), linkage));
519 mlir::SymbolTable::setSymbolVisibility(
520 g, mlir::SymbolTable::Visibility::Private);
521 g.setGlobalVisibility(visibility);
526cir::FuncOp LoweringPreparePass::buildRuntimeFunction(
527 mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc,
528 cir::FuncType type, cir::GlobalLinkageKind linkage) {
529 cir::FuncOp f = dyn_cast_or_null<FuncOp>(SymbolTable::lookupNearestSymbolFrom(
530 mlirModule, StringAttr::get(mlirModule->getContext(), name)));
532 f = cir::FuncOp::create(builder, loc, name, type);
534 cir::GlobalLinkageKindAttr::get(builder.getContext(), linkage));
535 mlir::SymbolTable::setSymbolVisibility(
536 f, mlir::SymbolTable::Visibility::Private);
546 builder.setInsertionPoint(op);
548 mlir::Value src = op.getSrc();
549 mlir::Value imag = builder.
getNullValue(src.getType(), op.getLoc());
555 cir::CastKind elemToBoolKind) {
557 builder.setInsertionPoint(op);
559 mlir::Value src = op.getSrc();
560 if (!mlir::isa<cir::BoolType>(op.getType()))
567 cir::BoolType boolTy = builder.
getBoolTy();
568 mlir::Value srcRealToBool =
569 builder.
createCast(op.getLoc(), elemToBoolKind, srcReal, boolTy);
570 mlir::Value srcImagToBool =
571 builder.
createCast(op.getLoc(), elemToBoolKind, srcImag, boolTy);
572 return builder.
createLogicalOr(op.getLoc(), srcRealToBool, srcImagToBool);
577 cir::CastKind scalarCastKind) {
579 builder.setInsertionPoint(op);
581 mlir::Value src = op.getSrc();
582 auto dstComplexElemTy =
583 mlir::cast<cir::ComplexType>(op.getType()).getElementType();
588 mlir::Value dstReal = builder.
createCast(op.getLoc(), scalarCastKind, srcReal,
590 mlir::Value dstImag = builder.
createCast(op.getLoc(), scalarCastKind, srcImag,
595void LoweringPreparePass::lowerCastOp(cir::CastOp op) {
596 mlir::MLIRContext &ctx = getContext();
597 mlir::Value loweredValue = [&]() -> mlir::Value {
598 switch (op.getKind()) {
599 case cir::CastKind::float_to_complex:
600 case cir::CastKind::int_to_complex:
602 case cir::CastKind::float_complex_to_real:
603 case cir::CastKind::int_complex_to_real:
605 case cir::CastKind::float_complex_to_bool:
607 case cir::CastKind::int_complex_to_bool:
609 case cir::CastKind::float_complex:
611 case cir::CastKind::float_complex_to_int_complex:
613 case cir::CastKind::int_complex:
615 case cir::CastKind::int_complex_to_float_complex:
623 op.replaceAllUsesWith(loweredValue);
630 llvm::StringRef (*libFuncNameGetter)(llvm::APFloat::Semantics),
631 mlir::Location loc, cir::ComplexType ty, mlir::Value lhsReal,
632 mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag) {
633 cir::FPTypeInterface elementTy =
634 mlir::cast<cir::FPTypeInterface>(ty.getElementType());
636 llvm::StringRef libFuncName = libFuncNameGetter(
637 llvm::APFloat::SemanticsToEnum(elementTy.getFloatSemantics()));
640 cir::FuncType libFuncTy = cir::FuncType::get(libFuncInputTypes, ty);
646 mlir::OpBuilder::InsertionGuard ipGuard{builder};
647 builder.setInsertionPointToStart(pass.mlirModule.getBody());
648 libFunc = pass.buildRuntimeFunction(builder, libFuncName, loc, libFuncTy);
652 builder.
createCallOp(loc, libFunc, {lhsReal, lhsImag, rhsReal, rhsImag});
653 return call.getResult();
656static llvm::StringRef
659 case llvm::APFloat::S_IEEEhalf:
661 case llvm::APFloat::S_IEEEsingle:
663 case llvm::APFloat::S_IEEEdouble:
665 case llvm::APFloat::S_PPCDoubleDouble:
667 case llvm::APFloat::S_x87DoubleExtended:
669 case llvm::APFloat::S_IEEEquad:
672 llvm_unreachable(
"unsupported floating point type");
678 mlir::Value lhsReal, mlir::Value lhsImag,
679 mlir::Value rhsReal, mlir::Value rhsImag) {
681 mlir::Value &a = lhsReal;
682 mlir::Value &
b = lhsImag;
683 mlir::Value &
c = rhsReal;
684 mlir::Value &d = rhsImag;
686 mlir::Value ac = builder.
createMul(loc, a,
c);
687 mlir::Value bd = builder.
createMul(loc,
b, d);
689 mlir::Value dd = builder.
createMul(loc, d, d);
690 mlir::Value acbd = builder.
createAdd(loc, ac, bd);
691 mlir::Value ccdd = builder.
createAdd(loc, cc, dd);
692 mlir::Value resultReal = builder.
createDiv(loc, acbd, ccdd);
695 mlir::Value ad = builder.
createMul(loc, a, d);
696 mlir::Value bcad = builder.
createSub(loc, bc, ad);
697 mlir::Value resultImag = builder.
createDiv(loc, bcad, ccdd);
703 mlir::Value lhsReal, mlir::Value lhsImag,
704 mlir::Value rhsReal, mlir::Value rhsImag) {
725 mlir::Value &a = lhsReal;
726 mlir::Value &
b = lhsImag;
727 mlir::Value &
c = rhsReal;
728 mlir::Value &d = rhsImag;
730 auto trueBranchBuilder = [&](mlir::OpBuilder &, mlir::Location) {
732 mlir::Value rd = builder.
createMul(loc, r, d);
733 mlir::Value tmp = builder.
createAdd(loc,
c, rd);
735 mlir::Value br = builder.
createMul(loc,
b, r);
736 mlir::Value abr = builder.
createAdd(loc, a, br);
737 mlir::Value e = builder.
createDiv(loc, abr, tmp);
739 mlir::Value ar = builder.
createMul(loc, a, r);
740 mlir::Value bar = builder.
createSub(loc,
b, ar);
741 mlir::Value f = builder.
createDiv(loc, bar, tmp);
747 auto falseBranchBuilder = [&](mlir::OpBuilder &, mlir::Location) {
749 mlir::Value rc = builder.
createMul(loc, r,
c);
750 mlir::Value tmp = builder.
createAdd(loc, d, rc);
752 mlir::Value ar = builder.
createMul(loc, a, r);
753 mlir::Value arb = builder.
createAdd(loc, ar,
b);
754 mlir::Value e = builder.
createDiv(loc, arb, tmp);
756 mlir::Value br = builder.
createMul(loc,
b, r);
757 mlir::Value bra = builder.
createSub(loc, br, a);
758 mlir::Value f = builder.
createDiv(loc, bra, tmp);
764 auto cFabs = cir::FAbsOp::create(builder, loc,
c);
765 auto dFabs = cir::FAbsOp::create(builder, loc, d);
766 cir::CmpOp cmpResult =
767 builder.
createCompare(loc, cir::CmpOpKind::ge, cFabs, dFabs);
768 auto ternary = cir::TernaryOp::create(builder, loc, cmpResult,
769 trueBranchBuilder, falseBranchBuilder);
771 return ternary.getResult();
778 auto getHigherPrecisionFPType = [&context](mlir::Type type) -> mlir::Type {
779 if (mlir::isa<cir::FP16Type>(type))
780 return cir::SingleType::get(&context);
782 if (mlir::isa<cir::SingleType>(type) || mlir::isa<cir::BF16Type>(type))
783 return cir::DoubleType::get(&context);
785 if (mlir::isa<cir::DoubleType>(type))
786 return cir::LongDoubleType::get(&context, type);
791 auto getFloatTypeSemantics =
792 [&cc](mlir::Type type) ->
const llvm::fltSemantics & {
794 if (mlir::isa<cir::FP16Type>(type))
797 if (mlir::isa<cir::BF16Type>(type))
800 if (mlir::isa<cir::SingleType>(type))
803 if (mlir::isa<cir::DoubleType>(type))
806 if (mlir::isa<cir::LongDoubleType>(type)) {
808 llvm_unreachable(
"NYI Float type semantics with OpenMP");
812 if (mlir::isa<cir::FP128Type>(type)) {
814 llvm_unreachable(
"NYI Float type semantics with OpenMP");
818 llvm_unreachable(
"Unsupported float type semantics");
821 const mlir::Type higherElementType = getHigherPrecisionFPType(elementType);
822 const llvm::fltSemantics &elementTypeSemantics =
823 getFloatTypeSemantics(elementType);
824 const llvm::fltSemantics &higherElementTypeSemantics =
825 getFloatTypeSemantics(higherElementType);
834 if (llvm::APFloat::semanticsMaxExponent(elementTypeSemantics) * 2 + 1 <=
835 llvm::APFloat::semanticsMaxExponent(higherElementTypeSemantics)) {
836 return higherElementType;
846 mlir::Location loc, cir::ComplexDivOp op, mlir::Value lhsReal,
847 mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag,
849 cir::ComplexType complexTy = op.getType();
850 if (mlir::isa<cir::FPTypeInterface>(complexTy.getElementType())) {
851 cir::ComplexRangeKind range = op.getRange();
852 if (range == cir::ComplexRangeKind::Improved)
856 if (range == cir::ComplexRangeKind::Full)
858 loc, complexTy, lhsReal, lhsImag, rhsReal,
861 if (range == cir::ComplexRangeKind::Promoted) {
862 mlir::Type originalElementType = complexTy.getElementType();
863 mlir::Type higherPrecisionElementType =
865 originalElementType);
867 if (!higherPrecisionElementType)
871 cir::CastKind floatingCastKind = cir::CastKind::floating;
872 lhsReal = builder.
createCast(floatingCastKind, lhsReal,
873 higherPrecisionElementType);
874 lhsImag = builder.
createCast(floatingCastKind, lhsImag,
875 higherPrecisionElementType);
876 rhsReal = builder.
createCast(floatingCastKind, rhsReal,
877 higherPrecisionElementType);
878 rhsImag = builder.
createCast(floatingCastKind, rhsImag,
879 higherPrecisionElementType);
882 builder, loc, lhsReal, lhsImag, rhsReal, rhsImag);
887 mlir::Value finalReal =
888 builder.
createCast(floatingCastKind, resultReal, originalElementType);
889 mlir::Value finalImag =
890 builder.
createCast(floatingCastKind, resultImag, originalElementType);
899void LoweringPreparePass::lowerComplexDivOp(cir::ComplexDivOp op) {
900 cir::CIRBaseBuilderTy builder(getContext());
901 builder.setInsertionPointAfter(op);
902 mlir::Location loc = op.getLoc();
903 mlir::TypedValue<cir::ComplexType> lhs = op.getLhs();
904 mlir::TypedValue<cir::ComplexType> rhs = op.getRhs();
910 mlir::Value loweredResult =
912 rhsImag, getContext(), *astCtx);
913 op.replaceAllUsesWith(loweredResult);
917static llvm::StringRef
920 case llvm::APFloat::S_IEEEhalf:
922 case llvm::APFloat::S_IEEEsingle:
924 case llvm::APFloat::S_IEEEdouble:
926 case llvm::APFloat::S_PPCDoubleDouble:
928 case llvm::APFloat::S_x87DoubleExtended:
930 case llvm::APFloat::S_IEEEquad:
933 llvm_unreachable(
"unsupported floating point type");
939 mlir::Location loc, cir::ComplexMulOp op,
940 mlir::Value lhsReal, mlir::Value lhsImag,
941 mlir::Value rhsReal, mlir::Value rhsImag) {
943 mlir::Value resultRealLhs = builder.
createMul(loc, lhsReal, rhsReal);
944 mlir::Value resultRealRhs = builder.
createMul(loc, lhsImag, rhsImag);
945 mlir::Value resultImagLhs = builder.
createMul(loc, lhsReal, rhsImag);
946 mlir::Value resultImagRhs = builder.
createMul(loc, lhsImag, rhsReal);
947 mlir::Value resultReal = builder.
createSub(loc, resultRealLhs, resultRealRhs);
948 mlir::Value resultImag = builder.
createAdd(loc, resultImagLhs, resultImagRhs);
949 mlir::Value algebraicResult =
952 cir::ComplexType complexTy = op.getType();
953 cir::ComplexRangeKind rangeKind = op.getRange();
954 if (mlir::isa<cir::IntType>(complexTy.getElementType()) ||
955 rangeKind == cir::ComplexRangeKind::Basic ||
956 rangeKind == cir::ComplexRangeKind::Improved ||
957 rangeKind == cir::ComplexRangeKind::Promoted)
958 return algebraicResult;
965 mlir::Value resultRealIsNaN = builder.
createIsNaN(loc, resultReal);
966 mlir::Value resultImagIsNaN = builder.
createIsNaN(loc, resultImag);
967 mlir::Value resultRealAndImagAreNaN =
970 return cir::TernaryOp::create(
971 builder, loc, resultRealAndImagAreNaN,
972 [&](mlir::OpBuilder &, mlir::Location) {
975 lhsReal, lhsImag, rhsReal, rhsImag);
978 [&](mlir::OpBuilder &, mlir::Location) {
984void LoweringPreparePass::lowerComplexMulOp(cir::ComplexMulOp op) {
985 cir::CIRBaseBuilderTy builder(getContext());
986 builder.setInsertionPointAfter(op);
987 mlir::Location loc = op.getLoc();
988 mlir::TypedValue<cir::ComplexType> lhs = op.getLhs();
989 mlir::TypedValue<cir::ComplexType> rhs = op.getRhs();
994 mlir::Value loweredResult =
lowerComplexMul(*
this, builder, loc, op, lhsReal,
995 lhsImag, rhsReal, rhsImag);
996 op.replaceAllUsesWith(loweredResult);
1000void LoweringPreparePass::lowerUnaryOp(cir::UnaryOpInterface op) {
1001 if (!mlir::isa<cir::ComplexType>(op.getResult().getType()))
1004 mlir::Location loc = op->getLoc();
1005 CIRBaseBuilderTy builder(getContext());
1006 builder.setInsertionPointAfter(op);
1008 mlir::Value operand = op.getInput();
1012 mlir::Value resultReal = operandReal;
1013 mlir::Value resultImag = operandImag;
1015 llvm::TypeSwitch<mlir::Operation *>(op)
1017 [&](
auto) { resultReal = builder.
createInc(loc, operandReal); })
1019 [&](
auto) { resultReal = builder.
createDec(loc, operandReal); })
1020 .Case<cir::MinusOp>([&](
auto) {
1021 resultReal = builder.
createMinus(loc, operandReal);
1022 resultImag = builder.
createMinus(loc, operandImag);
1025 [&](
auto) { resultImag = builder.
createMinus(loc, operandImag); })
1026 .
Default([](
auto) { llvm_unreachable(
"unhandled unary complex op"); });
1029 op->replaceAllUsesWith(mlir::ValueRange{result});
1033cir::FuncOp LoweringPreparePass::getOrCreateDtorFunc(CIRBaseBuilderTy &builder,
1035 mlir::Region &dtorRegion,
1036 cir::CallOp &dtorCall) {
1037 mlir::OpBuilder::InsertionGuard guard(builder);
1040 cir::VoidType voidTy = builder.
getVoidTy();
1041 auto voidPtrTy = cir::PointerType::get(voidTy);
1044 mlir::Block &dtorBlock = dtorRegion.front();
1048 auto opIt = dtorBlock.getOperations().begin();
1049 cir::GetGlobalOp ggop = mlir::cast<cir::GetGlobalOp>(*opIt);
1060 if (dtorBlock.getOperations().size() == 3) {
1061 auto callOp = mlir::dyn_cast<cir::CallOp>(&*(++opIt));
1062 auto yieldOp = mlir::dyn_cast<cir::YieldOp>(&*(++opIt));
1063 if (yieldOp && callOp && callOp.getNumOperands() == 1 &&
1064 callOp.getArgOperand(0) == ggop) {
1066 return getCalledFunction(callOp);
1073 builder.setInsertionPointAfter(op);
1074 SmallString<256> fnName(
"__cxx_global_array_dtor");
1075 uint32_t cnt = dynamicInitializerNames[fnName]++;
1077 fnName +=
"." + std::to_string(cnt);
1080 auto fnType = cir::FuncType::get({voidPtrTy}, voidTy);
1081 cir::FuncOp dtorFunc =
1082 buildRuntimeFunction(builder, fnName, op.getLoc(), fnType,
1083 cir::GlobalLinkageKind::InternalLinkage);
1085 SmallVector<mlir::NamedAttribute> paramAttrs;
1086 paramAttrs.push_back(
1087 builder.getNamedAttr(
"llvm.noundef", builder.getUnitAttr()));
1088 SmallVector<mlir::Attribute> argAttrDicts;
1089 argAttrDicts.push_back(
1090 mlir::DictionaryAttr::get(builder.getContext(), paramAttrs));
1091 dtorFunc.setArgAttrsAttr(
1092 mlir::ArrayAttr::get(builder.getContext(), argAttrDicts));
1094 mlir::Block *entryBB = dtorFunc.addEntryBlock();
1097 entryBB->getOperations().splice(entryBB->begin(), dtorBlock.getOperations(),
1098 dtorBlock.begin(), dtorBlock.end());
1101 cir::GetGlobalOp dtorGGop =
1102 mlir::cast<cir::GetGlobalOp>(entryBB->getOperations().front());
1103 builder.setInsertionPointToStart(&dtorBlock);
1104 builder.clone(*dtorGGop.getOperation());
1108 mlir::Value dtorArg = entryBB->getArgument(0);
1109 dtorGGop.replaceAllUsesWith(dtorArg);
1113 mlir::Block &finalBlock = dtorFunc.getBody().back();
1114 auto yieldOp = cast<cir::YieldOp>(finalBlock.getTerminator());
1115 builder.setInsertionPoint(yieldOp);
1116 cir::ReturnOp::create(builder, yieldOp->getLoc());
1121 cir::GetGlobalOp origGGop =
1122 mlir::cast<cir::GetGlobalOp>(dtorBlock.getOperations().front());
1123 builder.setInsertionPointAfter(origGGop);
1124 mlir::Value ggopResult = origGGop.getResult();
1125 dtorCall = builder.
createCallOp(op.getLoc(), dtorFunc, ggopResult);
1128 auto finalYield = cir::YieldOp::create(builder, op.getLoc());
1131 dtorBlock.getOperations().erase(std::next(mlir::Block::iterator(finalYield)),
1133 dtorRegion.getBlocks().erase(std::next(dtorRegion.begin()), dtorRegion.end());
1139LoweringPreparePass::buildCXXGlobalVarDeclInitFunc(cir::GlobalOp op) {
1142 SmallString<256> fnName(
"__cxx_global_var_init");
1144 uint32_t cnt = dynamicInitializerNames[fnName]++;
1146 fnName +=
"." + std::to_string(cnt);
1149 CIRBaseBuilderTy builder(getContext());
1150 builder.setInsertionPointAfter(op);
1151 cir::VoidType voidTy = builder.
getVoidTy();
1152 auto fnType = cir::FuncType::get({}, voidTy);
1153 FuncOp f = buildRuntimeFunction(builder, fnName, op.getLoc(), fnType,
1154 cir::GlobalLinkageKind::InternalLinkage);
1162 mlir::Block *entryBB = f.addEntryBlock();
1163 builder.setInsertionPointToStart(entryBB);
1167 bool needsTlsGuard = op.getDynTlsRefs() && op.getDynTlsRefs()->getGuardName();
1169 if (needsTlsGuard) {
1170 guardIf = buildGlobalTlsGuardCheck(
1171 builder, op.getLoc(),
1172 getOrCreateStaticLocalDeclGuardAddress(
1173 builder, op, op.getDynTlsRefs()->getGuardName().getValue(),
1175 op.hasInternalLinkage()));
1176 builder.setInsertionPointToEnd(&guardIf.getThenRegion().front());
1179 if (!op.getCtorRegion().empty()) {
1180 mlir::Block &block = op.getCtorRegion().front();
1181 mlir::Block *insertBlock = builder.getBlock();
1182 insertBlock->getOperations().splice(insertBlock->end(),
1183 block.getOperations(), block.begin(),
1184 std::prev(block.end()));
1188 mlir::Region &dtorRegion = op.getDtorRegion();
1189 if (!dtorRegion.empty()) {
1192 emitGlobalGuardedDtorRegion(builder, op, dtorRegion,
1193 op.getTlsModel().has_value(),
1194 *builder.getBlock());
1198 if (needsTlsGuard) {
1199 builder.setInsertionPointToEnd(&guardIf.getThenRegion().back());
1200 cir::YieldOp::create(builder, op.getLoc());
1204 builder.setInsertionPointToEnd(entryBB);
1205 mlir::Operation *yieldOp =
nullptr;
1206 if (!op.getCtorRegion().empty()) {
1207 mlir::Block &block = op.getCtorRegion().front();
1208 yieldOp = &block.getOperations().back();
1210 assert(!dtorRegion.empty());
1211 mlir::Block &block = dtorRegion.front();
1212 yieldOp = &block.getOperations().back();
1215 assert(isa<cir::YieldOp>(*yieldOp));
1216 cir::ReturnOp::create(builder, yieldOp->getLoc());
1221LoweringPreparePass::getGuardAcquireFn(cir::PointerType guardPtrTy) {
1223 CIRBaseBuilderTy builder(getContext());
1224 mlir::OpBuilder::InsertionGuard ipGuard{builder};
1225 builder.setInsertionPointToStart(mlirModule.getBody());
1226 mlir::Location loc = mlirModule.getLoc();
1227 cir::IntType intTy = cir::IntType::get(&getContext(), 32,
true);
1228 auto fnType = cir::FuncType::get({guardPtrTy}, intTy);
1229 return buildRuntimeFunction(builder,
"__cxa_guard_acquire", loc, fnType);
1233LoweringPreparePass::getGuardReleaseFn(cir::PointerType guardPtrTy) {
1235 CIRBaseBuilderTy builder(getContext());
1236 mlir::OpBuilder::InsertionGuard ipGuard{builder};
1237 builder.setInsertionPointToStart(mlirModule.getBody());
1238 mlir::Location loc = mlirModule.getLoc();
1239 cir::VoidType voidTy = cir::VoidType::get(&getContext());
1240 auto fnType = cir::FuncType::get({guardPtrTy}, voidTy);
1241 return buildRuntimeFunction(builder,
"__cxa_guard_release", loc, fnType);
1244cir::FuncOp LoweringPreparePass::getTlsInitFn() {
1246 CIRBaseBuilderTy builder(getContext());
1247 mlir::OpBuilder::InsertionGuard _{builder};
1248 builder.setInsertionPointToStart(mlirModule.getBody());
1249 mlir::Location loc = mlirModule.getLoc();
1251 return buildRuntimeFunction(builder,
"__tls_init", loc, fnType,
1252 cir::GlobalLinkageKind::InternalLinkage);
1255cir::GlobalOp LoweringPreparePass::createGuardGlobalOp(
1256 CIRBaseBuilderTy &builder, mlir::Location loc, llvm::StringRef name,
1257 cir::IntType guardTy, cir::GlobalLinkageKind linkage) {
1258 mlir::OpBuilder::InsertionGuard guard(builder);
1259 builder.setInsertionPointToStart(mlirModule.getBody());
1260 cir::GlobalOp g = cir::GlobalOp::create(builder, loc, name, guardTy);
1262 cir::GlobalLinkageKindAttr::get(builder.getContext(), linkage));
1263 mlir::SymbolTable::setSymbolVisibility(
1264 g, mlir::SymbolTable::Visibility::Private);
1268void LoweringPreparePass::handleStaticLocal(cir::GlobalOp globalOp,
1269 cir::LocalInitOp localInitOp) {
1270 CIRBaseBuilderTy builder(getContext());
1272 std::optional<cir::ASTVarDeclInterface> astOption = globalOp.getAst();
1273 assert(astOption.has_value());
1274 cir::ASTVarDeclInterface
varDecl = astOption.value();
1276 builder.setInsertionPointAfter(localInitOp);
1277 mlir::Block *localInitBlock = builder.getInsertionBlock();
1280 mlir::Operation *ret = localInitBlock->getTerminator();
1284 builder.setInsertionPointAfter(localInitOp);
1288 bool nonTemplateInline =
1294 if (nonTemplateInline) {
1295 globalOp->emitError(
1296 "NYI: guarded initialization for inline namespace-scope variables");
1303 bool threadsafe = astCtx->
getLangOpts().ThreadsafeStatics &&
1304 (
varDecl.isLocalVarDecl() || nonTemplateInline) &&
1309 bool useInt8GuardVariable = !threadsafe && globalOp.hasInternalLinkage();
1312 cir::GlobalOp guard = getOrCreateStaticLocalDeclGuardAddress(
1313 builder, globalOp, globalOp.getStaticLocalGuard()->getName().getValue(),
1314 varDecl.isLocalVarDecl(), useInt8GuardVariable);
1317 localInitBlock->push_back(ret);
1321 mlir::Value guardPtr = builder.
createGetGlobal(guard, localInitOp.getTls());
1343 unsigned maxInlineWidthInBits =
1346 if (!threadsafe || maxInlineWidthInBits) {
1348 auto bytePtrTy = cir::PointerType::get(builder.
getSIntNTy(8));
1349 mlir::Value bytePtr = builder.
createBitcast(guardPtr, bytePtrTy);
1351 localInitOp.getLoc(), bytePtr, *guard.getAlignment());
1360 auto loadOp = mlir::cast<cir::LoadOp>(guardLoad.getDefiningOp());
1361 loadOp.setMemOrder(cir::MemOrder::Acquire);
1362 loadOp.setSyncScope(cir::SyncScopeKind::System);
1385 if (useARMGuardVarABI() && !useInt8GuardVariable) {
1387 localInitOp.getLoc(), mlir::cast<cir::IntType>(guardLoad.getType()),
1389 guardLoad = builder.
createAnd(localInitOp.getLoc(), guardLoad, one);
1394 localInitOp.getLoc(), mlir::cast<cir::IntType>(guardLoad.getType()), 0);
1395 auto needsInit = builder.
createCompare(localInitOp.getLoc(),
1396 cir::CmpOpKind::eq, guardLoad, zero);
1400 builder, globalOp.getLoc(), needsInit,
1401 false, [&](mlir::OpBuilder &, mlir::Location) {
1402 emitCXXGuardedInitIf(builder, globalOp, localInitOp.getCtorRegion(),
1403 localInitOp.getDtorRegion(), varDecl, guardPtr,
1404 builder.getPointerTo(guard.getSymType()),
1410 globalOp->emitError(
"NYI: guarded init without inline atomics support");
1415 builder.getInsertionBlock()->push_back(ret);
1418void LoweringPreparePass::lowerLocalInitOp(cir::LocalInitOp initOp) {
1421 if (initOp.getCtorRegion().empty() && initOp.getDtorRegion().empty()) {
1426 cir::GlobalOp globalOp = initOp.getReferencedGlobal(symbolTables);
1427 assert(globalOp &&
"No global-op found");
1429 handleStaticLocal(globalOp, initOp);
1436 return tls == cir::TLS_Model::GeneralDynamic &&
1440static cir::GlobalLinkageKind
1443 return op.getLinkage();
1448 return op.getLinkage();
1452 if (op.isDeclaration())
1453 return cir::GlobalLinkageKind::LinkOnceODRLinkage;
1454 return cir::GlobalLinkageKind::WeakODRLinkage;
1458LoweringPreparePass::getOrCreateThreadLocalWrapper(CIRBaseBuilderTy &builder,
1460 mlir::OpBuilder::InsertionGuard insertGuard(builder);
1461 builder.setInsertionPointToStart(&mlirModule.getBodyRegion().front());
1463 mlir::StringAttr wrapperName = op.getDynTlsRefs()->getWrapperName();
1465 auto existingWrapperIter = threadLocalWrappers.find(wrapperName.getValue());
1466 if (existingWrapperIter != threadLocalWrappers.end())
1467 return existingWrapperIter->second;
1470 auto funcType = cir::FuncType::get({}, builder.
getPointerTo(op.getSymType()));
1472 cir::FuncOp::create(builder, op.getLoc(), wrapperName, funcType);
1474 cir::GlobalLinkageKind linkageKind =
1476 func.setLinkageAttr(
1477 cir::GlobalLinkageKindAttr::get(&getContext(), linkageKind));
1482 func.isWeakForLinker())
1483 func.setComdat(
true);
1485 mlir::SymbolTable::setSymbolVisibility(
1486 func, mlir::SymbolTable::Visibility::Private);
1491 op.getGlobalVisibility() == cir::VisibilityKind::Hidden)
1492 func.setGlobalVisibility(cir::VisibilityKind::Hidden);
1495 op->emitError(
"Unhandled thread wrapper attributes for CC and Nounwind");
1497 threadLocalWrappers.insert({wrapperName.getValue(), func});
1501void LoweringPreparePass::defineGlobalThreadLocalWrapper(cir::GlobalOp op,
1502 cir::FuncOp initAlias,
1503 bool isVarDefinition) {
1504 CIRBaseBuilderTy builder(getContext());
1505 cir::FuncOp wrapper = getOrCreateThreadLocalWrapper(builder, op);
1506 mlir::Block *entryBB = wrapper.addEntryBlock();
1507 builder.setInsertionPointToStart(entryBB);
1511 mlir::Location aliasLoc = initAlias.getLoc();
1512 if (!isVarDefinition) {
1514 mlir::Value funcLoad = cir::GetGlobalOp::create(
1515 builder, aliasLoc, cir::PointerType::get(initAlias.getFunctionType()),
1516 initAlias.getSymName());
1517 mlir::Value nullCheck =
1519 mlir::Value cmp = cir::CmpOp::create(
1520 builder, aliasLoc, cir::CmpOpKind::ne, funcLoad, nullCheck);
1521 cir::IfOp::create(builder, aliasLoc, cmp,
false,
1522 [&](mlir::OpBuilder &, mlir::Location loc) {
1524 cir::YieldOp::create(builder, aliasLoc);
1533 cir::ReturnOp::create(builder, op.getLoc(), {get});
1537LoweringPreparePass::defineGlobalThreadLocalInitAlias(cir::GlobalOp op,
1538 cir::FuncOp aliasee) {
1539 CIRBaseBuilderTy builder(getContext());
1540 mlir::OpBuilder::InsertionGuard insertGuard(builder);
1541 builder.setInsertionPointToStart(&mlirModule.getBodyRegion().front());
1542 mlir::StringAttr aliasName = op.getDynTlsRefs()->getInitName();
1543 auto existingAliasIter = threadLocalInitAliases.find(aliasName.getValue());
1545 if (existingAliasIter != threadLocalInitAliases.end())
1546 return existingAliasIter->second;
1550 cir::FuncOp::create(builder, op.getLoc(), aliasName, funcType);
1551 alias.setLinkage(op.getLinkage());
1554 alias.setAliasee(aliasee.getSymName());
1559 alias.setLinkage(cir::GlobalLinkageKind::ExternalWeakLinkage);
1560 mlir::SymbolTable::setSymbolVisibility(
1561 alias, mlir::SymbolTable::Visibility::Private);
1564 threadLocalInitAliases.insert({aliasName.getValue(), alias});
1568void LoweringPreparePass::lowerGlobalOp(GlobalOp op) {
1570 if (op.getStaticLocalGuard())
1573 mlir::Region &ctorRegion = op.getCtorRegion();
1574 mlir::Region &dtorRegion = op.getDtorRegion();
1575 cir::FuncOp initAlias;
1577 if (!ctorRegion.empty() || !dtorRegion.empty()) {
1580 cir::FuncOp f = buildCXXGlobalVarDeclInitFunc(op);
1583 ctorRegion.getBlocks().clear();
1584 dtorRegion.getBlocks().clear();
1587 if (op.getTlsModel() == TLS_Model::GeneralDynamic &&
1588 !op.getStaticLocalGuard().has_value()) {
1600 if (op.getDynTlsRefs()->getGuardName()) {
1602 initAlias = defineGlobalThreadLocalInitAlias(op, f);
1605 initAlias = defineGlobalThreadLocalInitAlias(op, getTlsInitFn());
1609 globalThreadLocalInitializers.push_back(f);
1612 dynamicInitializers.push_back(f);
1614 }
else if (op.getTlsModel() == TLS_Model::GeneralDynamic &&
1615 op.getDynTlsRefs() && op.isDeclaration()) {
1618 initAlias = defineGlobalThreadLocalInitAlias(op, {});
1624 if (op.getTlsModel() == TLS_Model::GeneralDynamic && op.getDynTlsRefs())
1625 defineGlobalThreadLocalWrapper(op, initAlias, !op.isDeclaration());
1630void LoweringPreparePass::lowerGetGlobalOp(GetGlobalOp op) {
1633 auto globalOp = mlir::cast<cir::GlobalOp>(
1634 symbolTables.lookupNearestSymbolFrom(op, op.getNameAttr()));
1640 if (globalOp.getTlsModel() != TLS_Model::GeneralDynamic ||
1641 !globalOp.getDynTlsRefs())
1659 mlir::Operation *parentOp = op->getParentOp();
1660 if (parentOp == globalOp) {
1661 mlir::Region *ctorRegion = &globalOp.getCtorRegion();
1662 mlir::Region *dtorRegion = &globalOp.getDtorRegion();
1664 if (!ctorRegion->empty() && &*ctorRegion->op_begin() == op.getOperation())
1666 if (!dtorRegion->empty() && &*dtorRegion->op_begin() == op.getOperation())
1670 CIRBaseBuilderTy builder(getContext());
1671 cir::FuncOp wrapperFunc = getOrCreateThreadLocalWrapper(builder, globalOp);
1673 builder.setInsertionPoint(op);
1675 wrapperFunc.getLoc(),
1676 mlir::FlatSymbolRefAttr::get(wrapperFunc.getSymNameAttr()),
1677 wrapperFunc.getFunctionType().getReturnType(), {});
1678 op->replaceAllUsesWith(call);
1682void LoweringPreparePass::lowerThreeWayCmpOp(CmpThreeWayOp op) {
1683 CIRBaseBuilderTy builder(getContext());
1684 builder.setInsertionPointAfter(op);
1686 mlir::Location loc = op->getLoc();
1687 cir::CmpThreeWayInfoAttr cmpInfo = op.getInfo();
1696 mlir::Value transformedResult;
1697 if (cmpInfo.getOrdering() != CmpOrdering::Partial) {
1700 builder.
createCompare(loc, CmpOpKind::lt, op.getLhs(), op.getRhs());
1701 mlir::Value selectOnLt = builder.
createSelect(loc, lt, ltRes, gtRes);
1703 builder.
createCompare(loc, CmpOpKind::eq, op.getLhs(), op.getRhs());
1704 transformedResult = builder.
createSelect(loc, eq, eqRes, selectOnLt);
1708 loc, op.getType(), cmpInfo.getUnordered().value());
1711 builder.
createCompare(loc, CmpOpKind::eq, op.getLhs(), op.getRhs());
1712 mlir::Value selectOnEq = builder.
createSelect(loc, eq, eqRes, unorderedRes);
1714 builder.
createCompare(loc, CmpOpKind::gt, op.getLhs(), op.getRhs());
1715 mlir::Value selectOnGt = builder.
createSelect(loc, gt, gtRes, selectOnEq);
1717 builder.
createCompare(loc, CmpOpKind::lt, op.getLhs(), op.getRhs());
1718 transformedResult = builder.
createSelect(loc, lt, ltRes, selectOnGt);
1721 op.replaceAllUsesWith(transformedResult);
1725template <
typename AttributeTy>
1726static llvm::SmallVector<mlir::Attribute>
1730 for (
const auto &[name, priority] : list)
1731 attrs.push_back(AttributeTy::get(context, name, priority));
1735void LoweringPreparePass::buildGlobalCtorDtorList() {
1736 if (!globalCtorList.empty()) {
1737 llvm::SmallVector<mlir::Attribute> globalCtors =
1741 mlirModule->setAttr(cir::CIRDialect::getGlobalCtorsAttrName(),
1742 mlir::ArrayAttr::get(&getContext(), globalCtors));
1745 if (!globalDtorList.empty()) {
1746 llvm::SmallVector<mlir::Attribute> globalDtors =
1749 mlirModule->setAttr(cir::CIRDialect::getGlobalDtorsAttrName(),
1750 mlir::ArrayAttr::get(&getContext(), globalDtors));
1755LoweringPreparePass::createGlobalThreadLocalGuard(CIRBaseBuilderTy &builder,
1756 mlir::Location loc) {
1757 mlir::OpBuilder::InsertionGuard guard(builder);
1758 builder.setInsertionPointToStart(mlirModule.getBody());
1761 cir::IntType guardTy = builder.
getSIntNTy(8);
1762 auto g = cir::GlobalOp::create(builder, loc,
"__tls_guard", guardTy);
1763 g.setLinkageAttr(cir::GlobalLinkageKindAttr::get(
1764 builder.getContext(), cir::GlobalLinkageKind::InternalLinkage));
1768 g.setTlsModel(TLS_Model::GeneralDynamic);
1769 g.setInitialValueAttr(cir::IntAttr::get(guardTy, 0));
1773cir::IfOp LoweringPreparePass::buildGlobalTlsGuardCheck(
1774 CIRBaseBuilderTy &builder, mlir::Location loc, cir::GlobalOp guard) {
1776 mlir::Value getGuardValue = getGuard;
1781 if (guard.getSymType() != builder.
getSIntNTy(8))
1783 getGuard, cir::PointerType::get(builder.
getSIntNTy(8)));
1785 mlir::Value guardLoad =
1789 builder.
createCompare(loc, cir::CmpOpKind::eq, guardLoad, zero);
1790 return cir::IfOp::create(
1792 false, [&](mlir::OpBuilder &, mlir::Location loc) {
1796 loc, builder.
getConstantInt(loc, guard.getSymType(), 1), getGuard);
1800void LoweringPreparePass::buildCXXGlobalTlsFunc() {
1801 if (globalThreadLocalInitializers.empty())
1807 cir::FuncOp tlsInit = getTlsInitFn();
1808 mlir::Location loc = tlsInit.getLoc();
1809 CIRBaseBuilderTy builder(getContext());
1810 mlir::Block *entryBB = tlsInit.addEntryBlock();
1811 builder.setInsertionPointToStart(entryBB);
1813 cir::IfOp ifOperation = buildGlobalTlsGuardCheck(
1814 builder, loc, createGlobalThreadLocalGuard(builder, loc));
1817 builder.setInsertionPointToEnd(&ifOperation.getThenRegion().front());
1818 for (cir::FuncOp initFunc : globalThreadLocalInitializers)
1820 cir::YieldOp::create(builder, loc);
1822 builder.setInsertionPointAfter(ifOperation);
1823 cir::ReturnOp::create(builder, loc);
1826void LoweringPreparePass::buildCXXGlobalInitFunc() {
1827 if (dynamicInitializers.empty())
1834 SmallString<256> fnName;
1842 llvm::raw_svector_ostream
out(fnName);
1843 std::unique_ptr<clang::MangleContext> mangleCtx(
1845 cast<clang::ItaniumMangleContext>(*mangleCtx)
1848 fnName +=
"_GLOBAL__sub_I_";
1852 CIRBaseBuilderTy builder(getContext());
1853 builder.setInsertionPointToEnd(&mlirModule.getBodyRegion().back());
1854 auto fnType = cir::FuncType::get({}, builder.
getVoidTy());
1856 buildRuntimeFunction(builder, fnName, mlirModule.getLoc(), fnType,
1857 cir::GlobalLinkageKind::ExternalLinkage);
1858 builder.setInsertionPointToStart(f.addEntryBlock());
1859 for (cir::FuncOp &f : dynamicInitializers)
1863 globalCtorList.emplace_back(fnName,
1864 cir::GlobalCtorAttr::getDefaultPriority());
1866 cir::ReturnOp::create(builder, f.getLoc());
1875 mlir::Operation *op, mlir::Type eltTy,
1877 mlir::Value numElements,
1878 uint64_t arrayLen,
bool isCtor) {
1879 mlir::Location loc = op->getLoc();
1880 bool isDynamic = numElements !=
nullptr;
1884 const unsigned sizeTypeSize =
1890 mlir::Value begin, end;
1893 end = cir::PtrStrideOp::create(builder, loc, eltTy, begin, numElements);
1895 mlir::Value endOffsetVal =
1897 begin = cir::CastOp::create(builder, loc, eltTy,
1898 cir::CastKind::array_to_ptrdecay, addr);
1899 end = cir::PtrStrideOp::create(builder, loc, eltTy, begin, endOffsetVal);
1902 mlir::Value start = isCtor ? begin : end;
1903 mlir::Value stop = isCtor ? end : begin;
1909 mlir::Value guardCond;
1912 guardCond = cir::CmpOp::create(builder, loc, cir::CmpOpKind::ne,
1918 cir::CmpOp::create(builder, loc, cir::CmpOpKind::ne, start, stop);
1920 ifOp = cir::IfOp::create(builder, loc, guardCond,
1922 [&](mlir::OpBuilder &, mlir::Location) {});
1923 builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
1931 mlir::Block *bodyBlock = &op->getRegion(0).front();
1936 auto cloneRegionBodyInto = [&](mlir::Block *srcBlock,
1937 mlir::Value replacement) {
1938 mlir::IRMapping map;
1939 map.map(srcBlock->getArgument(0), replacement);
1940 for (mlir::Operation ®ionOp : *srcBlock) {
1941 if (!mlir::isa<cir::YieldOp>(®ionOp))
1942 builder.clone(regionOp, map);
1946 mlir::Block *partialDtorBlock =
nullptr;
1947 if (
auto arrayCtor = mlir::dyn_cast<cir::ArrayCtor>(op)) {
1948 mlir::Region &partialDtor = arrayCtor.getPartialDtor();
1949 if (!partialDtor.empty())
1950 partialDtorBlock = &partialDtor.front();
1951 }
else if (
auto arrayDtor = mlir::dyn_cast<cir::ArrayDtor>(op)) {
1960 if (arrayDtor.getDtorMayThrow())
1961 partialDtorBlock = bodyBlock;
1964 auto emitCtorDtorLoop = [&]() {
1968 [&](mlir::OpBuilder &
b, mlir::Location loc) {
1969 auto currentElement = cir::LoadOp::create(
b, loc, eltTy, tmpAddr);
1970 auto cmp = cir::CmpOp::create(builder, loc, cir::CmpOpKind::ne,
1971 currentElement, stop);
1975 [&](mlir::OpBuilder &
b, mlir::Location loc) {
1976 auto currentElement = cir::LoadOp::create(
b, loc, eltTy, tmpAddr);
1978 cloneRegionBodyInto(bodyBlock, currentElement);
1979 mlir::Value stride = builder.
getUnsignedInt(loc, 1, sizeTypeSize);
1980 auto nextElement = cir::PtrStrideOp::create(builder, loc, eltTy,
1981 currentElement, stride);
1984 mlir::Value stride = builder.
getSignedInt(loc, -1, sizeTypeSize);
1985 auto prevElement = cir::PtrStrideOp::create(builder, loc, eltTy,
1986 currentElement, stride);
1988 cloneRegionBodyInto(bodyBlock, prevElement);
1991 cir::YieldOp::create(
b, loc);
1995 if (partialDtorBlock) {
1996 cir::CleanupScopeOp::create(
1997 builder, loc, cir::CleanupKind::EH,
1999 [&](mlir::OpBuilder &
b, mlir::Location loc) {
2001 cir::YieldOp::create(
b, loc);
2004 [&](mlir::OpBuilder &
b, mlir::Location loc) {
2005 auto cur = cir::LoadOp::create(
b, loc, eltTy, tmpAddr);
2007 cir::CmpOp::create(builder, loc, cir::CmpOpKind::ne, cur, begin);
2009 builder, loc, cmp,
false,
2010 [&](mlir::OpBuilder &
b, mlir::Location loc) {
2014 [&](mlir::OpBuilder &
b, mlir::Location loc) {
2015 auto el = cir::LoadOp::create(
b, loc, eltTy, tmpAddr);
2016 auto neq = cir::CmpOp::create(
2017 builder, loc, cir::CmpOpKind::ne, el, begin);
2021 [&](mlir::OpBuilder &
b, mlir::Location loc) {
2022 auto el = cir::LoadOp::create(
b, loc, eltTy, tmpAddr);
2023 mlir::Value negOne =
2025 auto prev = cir::PtrStrideOp::create(builder, loc, eltTy,
2028 cloneRegionBodyInto(partialDtorBlock, prev);
2031 cir::YieldOp::create(builder, loc);
2033 cir::YieldOp::create(
b, loc);
2040 cir::YieldOp::create(builder, loc);
2045void LoweringPreparePass::lowerArrayDtor(cir::ArrayDtor op) {
2046 CIRBaseBuilderTy builder(getContext());
2047 builder.setInsertionPointAfter(op.getOperation());
2049 mlir::Type eltTy = op->getRegion(0).getArgument(0).getType();
2051 if (op.getNumElements()) {
2053 op.getNumElements(), 0,
2059 mlir::cast<cir::ArrayType>(op.getAddr().getType().getPointee()).getSize();
2065void LoweringPreparePass::lowerArrayCtor(cir::ArrayCtor op) {
2066 cir::CIRBaseBuilderTy builder(getContext());
2067 builder.setInsertionPointAfter(op.getOperation());
2069 mlir::Type eltTy = op->getRegion(0).getArgument(0).getType();
2071 if (op.getNumElements()) {
2073 op.getNumElements(), 0,
2079 mlir::cast<cir::ArrayType>(op.getAddr().getType().getPointee()).getSize();
2085cir::FuncOp LoweringPreparePass::getCalledFunction(cir::CallOp callOp) {
2086 mlir::SymbolRefAttr sym = llvm::dyn_cast_if_present<mlir::SymbolRefAttr>(
2087 callOp.getCallableForCallee());
2090 return symbolTables.lookupNearestSymbolFrom<cir::FuncOp>(callOp, sym);
2093void LoweringPreparePass::lowerTrivialCopyCall(cir::CallOp op) {
2094 cir::FuncOp funcOp = getCalledFunction(op);
2098 std::optional<cir::CtorKind> ctorKind = funcOp.getCxxConstructorKind();
2099 if (ctorKind && *ctorKind == cir::CtorKind::Copy &&
2100 funcOp.isCxxTrivialMemberFunction()) {
2102 CIRBaseBuilderTy builder(getContext());
2103 mlir::ValueRange operands = op.getOperands();
2104 mlir::Value dest = operands[0];
2105 mlir::Value src = operands[1];
2106 builder.setInsertionPoint(op);
2112cir::GlobalOp LoweringPreparePass::getOrCreateConstAggregateGlobal(
2113 CIRBaseBuilderTy &builder, mlir::Location loc, llvm::StringRef baseName,
2114 mlir::Type ty, mlir::TypedAttr constant) {
2116 llvm::SmallVector<cir::GlobalOp, 1> &versions =
2117 constAggregateGlobals[baseName];
2120 for (cir::GlobalOp gv : versions) {
2121 if (gv.getSymType() == ty && gv.getInitialValue() == constant)
2129 llvm::SmallString<128>
name(baseName);
2130 size_t baseLen =
name.size();
2131 unsigned version = versions.size();
2133 name.resize(baseLen);
2135 name.push_back(
'.');
2136 llvm::Twine(version).toVector(name);
2138 auto existingGv = symbolTables.lookupSymbolIn<cir::GlobalOp>(
2139 mlirModule, mlir::StringAttr::get(&getContext(), name));
2142 versions.push_back(existingGv);
2143 if (existingGv.getSymType() == ty &&
2144 existingGv.getInitialValue() == constant)
2150 mlir::OpBuilder::InsertionGuard guard(builder);
2151 builder.setInsertionPointToStart(mlirModule.getBody());
2153 cir::GlobalOp::create(builder, loc, name, ty,
2155 cir::LangAddressSpaceAttr::get(
2156 &getContext(), cir::LangAddressSpace::Default),
2157 cir::GlobalLinkageKind::PrivateLinkage);
2158 mlir::SymbolTable::setSymbolVisibility(
2159 gv, mlir::SymbolTable::Visibility::Private);
2160 gv.setInitialValueAttr(constant);
2164 symbolTables.getSymbolTable(mlirModule).insert(gv);
2166 versions.push_back(gv);
2170void LoweringPreparePass::lowerStoreOfConstAggregate(cir::StoreOp op) {
2172 auto constOp = op.getValue().getDefiningOp<cir::ConstantOp>();
2176 mlir::Type ty = constOp.getType();
2177 if (!mlir::isa<cir::ArrayType, cir::RecordType>(ty))
2183 auto alloca = op.getAddr().getDefiningOp<cir::AllocaOp>();
2187 mlir::TypedAttr constant = constOp.getValue();
2198 auto func = op->getParentOfType<cir::FuncOp>();
2201 llvm::StringRef funcName = func.getSymName();
2204 llvm::StringRef varName = alloca.getName();
2207 std::string baseName = (
"__const." + funcName +
"." + varName).str();
2208 CIRBaseBuilderTy builder(getContext());
2212 cir::GlobalOp gv = getOrCreateConstAggregateGlobal(builder, op.getLoc(),
2213 baseName, ty, constant);
2216 builder.setInsertionPoint(op);
2218 auto ptrTy = cir::PointerType::get(ty);
2219 mlir::Value globalPtr =
2220 cir::GetGlobalOp::create(builder, op.getLoc(), ptrTy, gv.getSymName());
2229 if (constOp.use_empty())
2233void LoweringPreparePass::runOnOp(mlir::Operation *op) {
2234 if (
auto arrayCtor = dyn_cast<cir::ArrayCtor>(op)) {
2235 lowerArrayCtor(arrayCtor);
2236 }
else if (
auto arrayDtor = dyn_cast<cir::ArrayDtor>(op)) {
2237 lowerArrayDtor(arrayDtor);
2238 }
else if (
auto cast = mlir::dyn_cast<cir::CastOp>(op)) {
2240 }
else if (
auto complexDiv = mlir::dyn_cast<cir::ComplexDivOp>(op)) {
2241 lowerComplexDivOp(complexDiv);
2242 }
else if (
auto complexMul = mlir::dyn_cast<cir::ComplexMulOp>(op)) {
2243 lowerComplexMulOp(complexMul);
2244 }
else if (
auto glob = mlir::dyn_cast<cir::GlobalOp>(op)) {
2245 lowerGlobalOp(glob);
2246 if (
auto regAttr = glob->getAttrOfType<CUDAVarRegistrationInfoAttr>(
2247 CUDAVarRegistrationInfoAttr::getMnemonic()))
2248 cudaDeviceVars.emplace_back(glob, regAttr);
2249 }
else if (
auto getGlob = mlir::dyn_cast<cir::GetGlobalOp>(op)) {
2250 lowerGetGlobalOp(getGlob);
2251 }
else if (
auto unaryOp = mlir::dyn_cast<cir::UnaryOpInterface>(op)) {
2252 lowerUnaryOp(unaryOp);
2253 }
else if (
auto callOp = dyn_cast<cir::CallOp>(op)) {
2254 lowerTrivialCopyCall(callOp);
2255 }
else if (
auto storeOp = dyn_cast<cir::StoreOp>(op)) {
2256 lowerStoreOfConstAggregate(storeOp);
2257 }
else if (
auto fnOp = dyn_cast<cir::FuncOp>(op)) {
2258 if (
auto globalCtor = fnOp.getGlobalCtorPriority())
2259 globalCtorList.emplace_back(fnOp.getName(), globalCtor.value());
2260 else if (
auto globalDtor = fnOp.getGlobalDtorPriority())
2261 globalDtorList.emplace_back(fnOp.getName(), globalDtor.value());
2263 if (mlir::Attribute attr =
2264 fnOp->getAttr(cir::CUDAKernelNameAttr::getMnemonic())) {
2265 auto kernelNameAttr = dyn_cast<CUDAKernelNameAttr>(attr);
2266 llvm::StringRef kernelName = kernelNameAttr.getKernelName();
2267 cudaKernelMap[kernelName] = fnOp;
2269 }
else if (
auto threeWayCmp = dyn_cast<cir::CmpThreeWayOp>(op)) {
2270 lowerThreeWayCmpOp(threeWayCmp);
2271 }
else if (
auto initOp = dyn_cast<cir::LocalInitOp>(op)) {
2272 lowerLocalInitOp(initOp);
2283 llvm::StringRef name) {
2284 return (
"__" + prefix + name).str();
2306void LoweringPreparePass::buildCUDAModuleCtor() {
2309 if (astCtx->
getLangOpts().GPURelocatableDeviceCode)
2310 llvm_unreachable(
"GPU RDC NYI");
2314 if (cudaKernelMap.empty() && cudaDeviceVars.empty())
2319 mlir::Attribute cudaBinaryHandleAttr =
2320 mlirModule->getAttr(CIRDialect::getCUDABinaryHandleAttrName());
2321 if (!cudaBinaryHandleAttr) {
2327 llvm::StringRef cudaGPUBinaryName =
2328 mlir::cast<CUDABinaryHandleAttr>(cudaBinaryHandleAttr)
2332 llvm::vfs::FileSystem &vfs =
2334 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> gpuBinaryOrErr =
2335 vfs.getBufferForFile(cudaGPUBinaryName);
2336 if (std::error_code ec = gpuBinaryOrErr.getError()) {
2337 mlirModule->emitError(
"cannot open GPU binary file: " + cudaGPUBinaryName +
2338 ": " + ec.message());
2341 std::unique_ptr<llvm::MemoryBuffer> gpuBinary =
2342 std::move(gpuBinaryOrErr.get());
2346 mlir::Location loc = mlirModule->getLoc();
2347 CIRBaseBuilderTy builder(getContext());
2348 builder.setInsertionPointToStart(mlirModule.getBody());
2352 PointerType voidPtrPtrTy = builder.
getPointerTo(voidPtrTy);
2354 IntType charTy = cir::IntType::get(&getContext(), astCtx->
getCharWidth(),
2360 llvm::StringRef fatbinConstName =
2361 astCtx->
getLangOpts().HIP ?
".hip_fatbin" :
".nv_fatbin";
2363 llvm::StringRef fatbinSectionName =
2364 astCtx->
getLangOpts().HIP ?
".hipFatBinSegment" :
".nvFatBinSegment";
2368 ArrayType::get(&getContext(), charTy, gpuBinary->getBuffer().size());
2370 GlobalOp fatbinStr = GlobalOp::create(builder, loc, fatbinStrName, fatbinType,
2372 GlobalLinkageKind::PrivateLinkage);
2373 fatbinStr.setAlignment(8);
2374 fatbinStr.setInitialValueAttr(cir::ConstArrayAttr::get(
2375 fatbinType, StringAttr::get(gpuBinary->getBuffer(), fatbinType)));
2376 fatbinStr.setSection(fatbinConstName);
2377 fatbinStr.setPrivate();
2381 auto fatbinWrapperType = RecordType::get(
2382 &getContext(), {intTy, intTy, voidPtrTy, voidPtrTy},
2383 false,
false, RecordType::RecordKind::Struct);
2384 std::string fatbinWrapperName =
2386 GlobalOp fatbinWrapper = GlobalOp::create(
2387 builder, loc, fatbinWrapperName, fatbinWrapperType,
2388 true, {}, GlobalLinkageKind::PrivateLinkage);
2389 fatbinWrapper.setSection(fatbinSectionName);
2391 constexpr unsigned cudaFatMagic = 0x466243b1;
2392 constexpr unsigned hipFatMagic = 0x48495046;
2393 unsigned fatMagic =
isHIP ? hipFatMagic : cudaFatMagic;
2395 auto magicInit = IntAttr::get(intTy, fatMagic);
2396 auto versionInit = IntAttr::get(intTy, 1);
2397 auto fatbinStrSymbol =
2398 mlir::FlatSymbolRefAttr::get(fatbinStr.getSymNameAttr());
2399 auto fatbinInit = GlobalViewAttr::get(voidPtrTy, fatbinStrSymbol);
2401 fatbinWrapper.setInitialValueAttr(cir::ConstRecordAttr::get(
2403 mlir::ArrayAttr::get(&getContext(),
2404 {magicInit, versionInit, fatbinInit, unusedInit})));
2407 std::string gpubinHandleName =
2410 GlobalOp gpuBinHandle = GlobalOp::create(
2411 builder, loc, gpubinHandleName, voidPtrPtrTy,
2412 false, {}, cir::GlobalLinkageKind::InternalLinkage);
2414 gpuBinHandle.setPrivate();
2419 std::string regFuncName =
2421 FuncType regFuncType = FuncType::get({voidPtrTy}, voidPtrPtrTy);
2422 cir::FuncOp regFunc =
2423 buildRuntimeFunction(builder, regFuncName, loc, regFuncType);
2426 cir::FuncOp moduleCtor = buildRuntimeFunction(
2427 builder, moduleCtorName, loc, FuncType::get({}, voidTy),
2428 GlobalLinkageKind::InternalLinkage);
2430 globalCtorList.emplace_back(moduleCtorName,
2431 cir::GlobalCtorAttr::getDefaultPriority());
2432 builder.setInsertionPointToStart(moduleCtor.addEntryBlock());
2440 mlir::Block *entryBlock = builder.getInsertionBlock();
2441 mlir::Region *parent = entryBlock->getParent();
2442 mlir::Block *ifBlock = builder.createBlock(parent);
2443 mlir::Block *exitBlock = builder.createBlock(parent);
2445 mlir::OpBuilder::InsertionGuard guard(builder);
2446 builder.setInsertionPointToEnd(entryBlock);
2447 mlir::Value handle =
2449 auto handlePtrTy = mlir::cast<cir::PointerType>(handle.getType());
2450 mlir::Value nullPtr = builder.
getNullPtr(handlePtrTy, loc);
2451 mlir::Value isNull =
2452 builder.
createCompare(loc, cir::CmpOpKind::eq, handle, nullPtr);
2453 cir::BrCondOp::create(builder, loc, isNull, ifBlock, exitBlock);
2457 mlir::OpBuilder::InsertionGuard guard(builder);
2458 builder.setInsertionPointToStart(ifBlock);
2460 mlir::Value fatbinVoidPtr = builder.
createBitcast(wrapper, voidPtrTy);
2461 cir::CallOp gpuBinaryHandleCall =
2463 mlir::Value gpuBinaryHandle = gpuBinaryHandleCall.getResult();
2465 mlir::Value gpuBinaryHandleGlobal = builder.
createGetGlobal(gpuBinHandle);
2466 builder.
createStore(loc, gpuBinaryHandle, gpuBinaryHandleGlobal);
2467 cir::BrOp::create(builder, loc, exitBlock);
2472 mlir::OpBuilder::InsertionGuard guard(builder);
2473 builder.setInsertionPointToStart(exitBlock);
2474 mlir::Value gHandle =
2477 if (std::optional<FuncOp> regGlobal = buildCUDARegisterGlobals())
2480 if (std::optional<FuncOp> dtor = buildHIPModuleDtor()) {
2481 cir::CIRBaseBuilderTy globalBuilder(getContext());
2482 globalBuilder.setInsertionPointToStart(mlirModule.getBody());
2483 FuncOp atexit = buildRuntimeFunction(
2484 globalBuilder,
"atexit", loc,
2485 FuncType::get(PointerType::get(dtor->getFunctionType()), intTy));
2486 mlir::Value dtorFunc = GetGlobalOp::create(
2487 builder, loc, PointerType::get(dtor->getFunctionType()),
2488 mlir::FlatSymbolRefAttr::get(dtor->getSymNameAttr()));
2491 cir::ReturnOp::create(builder, loc);
2495 if (!astCtx->
getLangOpts().GPURelocatableDeviceCode) {
2503 mlir::Value fatbinVoidPtr = builder.
createBitcast(wrapper, voidPtrTy);
2504 cir::CallOp gpuBinaryHandleCall =
2506 mlir::Value gpuBinaryHandle = gpuBinaryHandleCall.getResult();
2508 mlir::Value gpuBinaryHandleGlobal = builder.
createGetGlobal(gpuBinHandle);
2509 builder.
createStore(loc, gpuBinaryHandle, gpuBinaryHandleGlobal);
2512 if (std::optional<FuncOp> regGlobal = buildCUDARegisterGlobals()) {
2513 builder.
createCallOp(loc, *regGlobal, gpuBinaryHandle);
2522 cir::CIRBaseBuilderTy globalBuilder(getContext());
2523 globalBuilder.setInsertionPointToStart(mlirModule.getBody());
2525 buildRuntimeFunction(globalBuilder,
"__cudaRegisterFatBinaryEnd", loc,
2526 FuncType::get({voidPtrPtrTy}, voidTy));
2530 llvm_unreachable(
"GPU RDC NYI");
2535 if (std::optional<FuncOp> dtor = buildCUDAModuleDtor()) {
2538 cir::CIRBaseBuilderTy globalBuilder(getContext());
2539 globalBuilder.setInsertionPointToStart(mlirModule.getBody());
2540 FuncOp atexit = buildRuntimeFunction(
2541 globalBuilder,
"atexit", loc,
2542 FuncType::get(PointerType::get(dtor->getFunctionType()), intTy));
2543 mlir::Value dtorFunc = GetGlobalOp::create(
2544 builder, loc, PointerType::get(dtor->getFunctionType()),
2545 mlir::FlatSymbolRefAttr::get(dtor->getSymNameAttr()));
2548 cir::ReturnOp::create(builder, loc);
2551std::optional<FuncOp> LoweringPreparePass::buildCUDAModuleDtor() {
2552 if (!mlirModule->getAttr(CIRDialect::getCUDABinaryHandleAttrName()))
2557 VoidType voidTy = VoidType::get(&getContext());
2558 PointerType voidPtrPtrTy = PointerType::get(PointerType::get(voidTy));
2560 mlir::Location loc = mlirModule.getLoc();
2562 cir::CIRBaseBuilderTy builder(getContext());
2563 builder.setInsertionPointToStart(mlirModule.getBody());
2566 std::string unregisterFuncName =
2568 FuncOp unregisterFunc = buildRuntimeFunction(
2569 builder, unregisterFuncName, loc, FuncType::get({voidPtrPtrTy}, voidTy));
2578 buildRuntimeFunction(builder, dtorName, loc, FuncType::get({}, voidTy),
2579 GlobalLinkageKind::InternalLinkage);
2581 builder.setInsertionPointToStart(dtor.addEntryBlock());
2587 GlobalOp gpubinGlobal = cast<GlobalOp>(mlirModule.lookupSymbol(gpubinName));
2589 mlir::Value gpubin = builder.
createLoad(loc, gpubinAddress);
2591 ReturnOp::create(builder, loc);
2608std::optional<FuncOp> LoweringPreparePass::buildHIPModuleDtor() {
2609 if (!mlirModule->getAttr(CIRDialect::getCUDABinaryHandleAttrName()))
2614 VoidType voidTy = VoidType::get(&getContext());
2615 PointerType voidPtrPtrTy = PointerType::get(PointerType::get(voidTy));
2617 mlir::Location loc = mlirModule.getLoc();
2619 cir::CIRBaseBuilderTy builder(getContext());
2620 builder.setInsertionPointToStart(mlirModule.getBody());
2623 std::string unregisterFuncName =
2625 FuncOp unregisterFunc = buildRuntimeFunction(
2626 builder, unregisterFuncName, loc, FuncType::get({voidPtrPtrTy}, voidTy));
2630 buildRuntimeFunction(builder, dtorName, loc, FuncType::get({}, voidTy),
2631 GlobalLinkageKind::InternalLinkage);
2634 GlobalOp gpuBinGlobal = cast<GlobalOp>(mlirModule.lookupSymbol(gpubinName));
2636 mlir::Block *entryBlock = dtor.addEntryBlock();
2637 mlir::Block *ifBlock = builder.createBlock(&dtor.getBody());
2638 mlir::Block *exitBlock = builder.createBlock(&dtor.getBody());
2640 mlir::OpBuilder::InsertionGuard guard(builder);
2641 builder.setInsertionPointToEnd(entryBlock);
2642 mlir::Value handle =
2644 auto handlePtrTy = mlir::cast<cir::PointerType>(handle.getType());
2645 mlir::Value nullPtr = builder.
getNullPtr(handlePtrTy, loc);
2646 mlir::Value isNotNull =
2647 builder.
createCompare(loc, cir::CmpOpKind::ne, handle, nullPtr);
2648 cir::BrCondOp::create(builder, loc, isNotNull, ifBlock, exitBlock);
2652 mlir::OpBuilder::InsertionGuard ifGuard(builder);
2653 builder.setInsertionPointToStart(ifBlock);
2656 cir::BrOp::create(builder, loc, exitBlock);
2659 mlir::OpBuilder::InsertionGuard exitGuard(builder);
2660 builder.setInsertionPointToStart(exitBlock);
2661 cir::ReturnOp::create(builder, loc);
2667std::optional<FuncOp> LoweringPreparePass::buildCUDARegisterGlobals() {
2668 if (cudaKernelMap.empty() && cudaDeviceVars.empty())
2671 cir::CIRBaseBuilderTy builder(getContext());
2672 builder.setInsertionPointToStart(mlirModule.getBody());
2674 mlir::Location loc = mlirModule.getLoc();
2677 auto voidTy = VoidType::get(&getContext());
2678 auto voidPtrTy = PointerType::get(voidTy);
2679 auto voidPtrPtrTy = PointerType::get(voidPtrTy);
2683 std::string regGlobalFuncName =
2685 auto regGlobalFuncTy = FuncType::get({voidPtrPtrTy}, voidTy);
2686 FuncOp regGlobalFunc =
2687 buildRuntimeFunction(builder, regGlobalFuncName, loc, regGlobalFuncTy,
2688 GlobalLinkageKind::InternalLinkage);
2689 builder.setInsertionPointToStart(regGlobalFunc.addEntryBlock());
2691 buildCUDARegisterGlobalFunctions(builder, regGlobalFunc);
2692 buildCUDARegisterVars(builder, regGlobalFunc);
2694 ReturnOp::create(builder, loc);
2695 return regGlobalFunc;
2698void LoweringPreparePass::buildCUDARegisterGlobalFunctions(
2699 cir::CIRBaseBuilderTy &builder, FuncOp regGlobalFunc) {
2700 mlir::Location loc = mlirModule.getLoc();
2702 cir::CIRDataLayout dataLayout(mlirModule);
2704 auto voidTy = VoidType::get(&getContext());
2705 auto voidPtrTy = PointerType::get(voidTy);
2706 auto voidPtrPtrTy = PointerType::get(voidPtrTy);
2708 IntType charTy = cir::IntType::get(&getContext(), astCtx->
getCharWidth(),
2712 mlir::Value fatbinHandle = *regGlobalFunc.args_begin();
2714 cir::CIRBaseBuilderTy globalBuilder(getContext());
2715 globalBuilder.setInsertionPointToStart(mlirModule.getBody());
2729 FuncOp cudaRegisterFunction = buildRuntimeFunction(
2731 FuncType::get({voidPtrPtrTy, voidPtrTy, voidPtrTy, voidPtrTy, intTy,
2732 voidPtrTy, voidPtrTy, voidPtrTy, voidPtrTy, voidPtrTy},
2735 auto makeConstantString = [&](llvm::StringRef str) -> GlobalOp {
2736 auto strType = ArrayType::get(&getContext(), charTy, 1 + str.size());
2737 auto tmpString = cir::GlobalOp::create(
2738 globalBuilder, loc, (
".str" + str).str(), strType,
2740 cir::GlobalLinkageKind::PrivateLinkage);
2743 tmpString.setInitialValueAttr(
2744 ConstArrayAttr::get(strType, StringAttr::get(str +
"\0", strType)));
2745 tmpString.setPrivate();
2749 cir::ConstantOp cirNullPtr = builder.
getNullPtr(voidPtrTy, loc);
2751 for (
auto kernelName : cudaKernelMap.keys()) {
2752 FuncOp deviceStub = cudaKernelMap[kernelName];
2753 GlobalOp deviceFuncStr = makeConstantString(kernelName);
2757 mlir::Value hostFunc;
2764 auto funcHandle = cast<GlobalOp>(mlirModule.lookupSymbol(kernelName));
2769 GetGlobalOp::create(
2770 builder, loc, PointerType::get(deviceStub.getFunctionType()),
2771 mlir::FlatSymbolRefAttr::get(deviceStub.getSymNameAttr())),
2775 loc, cudaRegisterFunction,
2776 {fatbinHandle, hostFunc, deviceFunc, deviceFunc,
2777 ConstantOp::create(builder, loc, IntAttr::get(intTy, -1)), cirNullPtr,
2778 cirNullPtr, cirNullPtr, cirNullPtr, cirNullPtr});
2785void LoweringPreparePass::buildCUDARegisterVars(cir::CIRBaseBuilderTy &builder,
2786 FuncOp regGlobalFunc) {
2787 mlir::Location loc = mlirModule.getLoc();
2789 cir::CIRDataLayout dataLayout(mlirModule);
2792 PointerType voidPtrPtrTy = builder.
getPointerTo(voidPtrTy);
2796 IntType charTy = cir::IntType::get(&getContext(), astCtx->
getCharWidth(),
2799 if (cudaDeviceVars.empty())
2802 cir::CIRBaseBuilderTy globalBuilder(getContext());
2803 globalBuilder.setInsertionPointToStart(mlirModule.getBody());
2810 cir::VoidType voidTy = builder.
getVoidTy();
2811 FuncOp cudaRegisterVar = buildRuntimeFunction(
2813 FuncType::get({voidPtrPtrTy, voidPtrTy, voidPtrTy, voidPtrTy, intTy,
2814 sizeTy, intTy, intTy},
2817 auto makeConstantString = [&](llvm::StringRef str) -> GlobalOp {
2818 auto strType = ArrayType::get(&getContext(), charTy, 1 + str.size());
2819 auto tmpString = cir::GlobalOp::create(
2820 globalBuilder, loc, (
".str" + str).str(), strType,
2822 cir::GlobalLinkageKind::PrivateLinkage);
2823 tmpString.setInitialValueAttr(
2824 ConstArrayAttr::get(strType, StringAttr::get(str +
"\0", strType)));
2825 tmpString.setPrivate();
2829 mlir::Value fatbinHandle = *regGlobalFunc.args_begin();
2831 for (
auto &[global, regAttr] : cudaDeviceVars) {
2832 switch (regAttr.getKind()) {
2833 case cir::CUDADeviceVarKind::Variable:
2835 case cir::CUDADeviceVarKind::Surface:
2836 llvm_unreachable(
"Surface registration NYI");
2837 case cir::CUDADeviceVarKind::Texture:
2838 llvm_unreachable(
"Texture registration NYI");
2841 if (regAttr.getIsManaged())
2842 llvm_unreachable(
"Managed variable registration NYI");
2844 GlobalOp deviceNameStr = makeConstantString(regAttr.getDeviceSideName());
2847 mlir::Value hostVar =
2850 auto isExtern = ConstantOp::create(
2851 builder, loc, IntAttr::get(intTy, regAttr.getIsExtern() ? 1 : 0));
2852 llvm::TypeSize size = dataLayout.getTypeAllocSize(global.getSymType());
2853 auto varSize = ConstantOp::create(
2854 builder, loc, IntAttr::get(sizeTy, size.getFixedValue()));
2855 auto isConstant = ConstantOp::create(
2856 builder, loc, IntAttr::get(intTy, regAttr.getIsConstant() ? 1 : 0));
2857 auto normalized = ConstantOp::create(builder, loc, IntAttr::get(intTy, 0));
2859 {fatbinHandle, hostVar, deviceName, deviceName,
2860 isExtern, varSize, isConstant, normalized});
2864void LoweringPreparePass::runOnOperation() {
2865 mlir::Operation *op = getOperation();
2866 if (isa<::mlir::ModuleOp>(op))
2867 mlirModule = cast<::mlir::ModuleOp>(op);
2869 llvm::SmallVector<mlir::Operation *> opsToTransform;
2871 op->walk([&](mlir::Operation *op) {
2872 if (mlir::isa<cir::ArrayCtor, cir::ArrayDtor, cir::CastOp,
2873 cir::ComplexMulOp, cir::ComplexDivOp, cir::DynamicCastOp,
2874 cir::FuncOp, cir::CallOp, cir::GetGlobalOp, cir::GlobalOp,
2875 cir::StoreOp, cir::CmpThreeWayOp, cir::IncOp, cir::DecOp,
2876 cir::MinusOp, cir::NotOp, cir::LocalInitOp>(op))
2877 opsToTransform.push_back(op);
2880 for (mlir::Operation *o : opsToTransform)
2883 buildCXXGlobalInitFunc();
2884 buildCXXGlobalTlsFunc();
2886 buildCUDAModuleCtor();
2888 buildGlobalCtorDtorList();
2892 return std::make_unique<LoweringPreparePass>();
2895std::unique_ptr<Pass>
2897 auto pass = std::make_unique<LoweringPreparePass>();
2898 pass->setASTContext(astCtx);
2899 return std::move(pass);
Defines the clang::ASTContext interface.
static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, int MaxLevel, int Level=0)
static llvm::FunctionCallee getGuardReleaseFn(CodeGenModule &CGM, llvm::PointerType *GuardPtrTy)
static llvm::FunctionCallee getGuardAcquireFn(CodeGenModule &CGM, llvm::PointerType *GuardPtrTy)
static mlir::Value buildRangeReductionComplexDiv(CIRBaseBuilderTy &builder, mlir::Location loc, mlir::Value lhsReal, mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag)
static llvm::StringRef getComplexDivLibCallName(llvm::APFloat::Semantics semantics)
static llvm::SmallVector< mlir::Attribute > prepareCtorDtorAttrList(mlir::MLIRContext *context, llvm::ArrayRef< std::pair< std::string, uint32_t > > list)
static llvm::StringRef getComplexMulLibCallName(llvm::APFloat::Semantics semantics)
static cir::GlobalLinkageKind getThreadLocalWrapperLinkage(GlobalOp op, clang::ASTContext &astCtx)
static mlir::Value buildComplexBinOpLibCall(LoweringPreparePass &pass, CIRBaseBuilderTy &builder, llvm::StringRef(*libFuncNameGetter)(llvm::APFloat::Semantics), mlir::Location loc, cir::ComplexType ty, mlir::Value lhsReal, mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag)
static mlir::Value lowerComplexMul(LoweringPreparePass &pass, CIRBaseBuilderTy &builder, mlir::Location loc, cir::ComplexMulOp op, mlir::Value lhsReal, mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag)
static std::string addUnderscoredPrefix(llvm::StringRef prefix, llvm::StringRef name)
static SmallString< 128 > getTransformedFileName(mlir::ModuleOp mlirModule)
static mlir::Value lowerComplexToComplexCast(mlir::MLIRContext &ctx, cir::CastOp op, cir::CastKind scalarCastKind)
static void lowerArrayDtorCtorIntoLoop(cir::CIRBaseBuilderTy &builder, clang::ASTContext *astCtx, mlir::Operation *op, mlir::Type eltTy, mlir::Value addr, mlir::Value numElements, uint64_t arrayLen, bool isCtor)
Lower a cir.array.ctor or cir.array.dtor into a do-while loop that iterates over every element.
static bool isThreadWrapperReplaceable(cir::TLS_Model tls, clang::ASTContext &astCtx)
static mlir::Value lowerComplexToScalarCast(mlir::MLIRContext &ctx, cir::CastOp op, cir::CastKind elemToBoolKind)
static mlir::Value buildAlgebraicComplexDiv(CIRBaseBuilderTy &builder, mlir::Location loc, mlir::Value lhsReal, mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag)
static llvm::StringRef getCUDAPrefix(clang::ASTContext *astCtx)
static mlir::Type higherPrecisionElementTypeForComplexArithmetic(mlir::MLIRContext &context, clang::ASTContext &cc, CIRBaseBuilderTy &builder, mlir::Type elementType)
static mlir::Value lowerScalarToComplexCast(mlir::MLIRContext &ctx, cir::CastOp op)
static mlir::Value lowerComplexDiv(LoweringPreparePass &pass, CIRBaseBuilderTy &builder, mlir::Location loc, cir::ComplexDivOp op, mlir::Value lhsReal, mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag, mlir::MLIRContext &mlirCx, clang::ASTContext &cc)
Defines the clang::Module class, which describes a module in the source code.
static bool compare(const PathDiagnostic &X, const PathDiagnostic &Y)
Defines the SourceManager interface.
Defines various enumerations that describe declaration and type specifiers.
Defines the TargetCXXABI class, which abstracts details of the C++ ABI that we're targeting.
__device__ __2f16 float c
mlir::Value createDiv(mlir::Location loc, mlir::Value lhs, mlir::Value rhs)
mlir::TypedAttr getConstNullPtrAttr(mlir::Type t)
mlir::Value createDec(mlir::Location loc, mlir::Value input, bool nsw=false)
mlir::Value createLogicalOr(mlir::Location loc, mlir::Value lhs, mlir::Value rhs)
mlir::Value createSub(mlir::Location loc, mlir::Value lhs, mlir::Value rhs, OverflowBehavior ob=OverflowBehavior::None)
cir::ConditionOp createCondition(mlir::Value condition)
Create a loop condition.
mlir::Value createInc(mlir::Location loc, mlir::Value input, bool nsw=false)
cir::CopyOp createCopy(mlir::Value dst, mlir::Value src, bool isVolatile=false, bool skipTailPadding=false)
Create a copy with inferred length.
cir::VoidType getVoidTy()
cir::ConstantOp getNullValue(mlir::Type ty, mlir::Location loc)
mlir::Value createCast(mlir::Location loc, cir::CastKind kind, mlir::Value src, mlir::Type newTy)
cir::PointerType getVoidFnPtrTy(mlir::TypeRange argTypes={})
Returns void (*)(T...) as a cir::PointerType.
mlir::Value createAdd(mlir::Location loc, mlir::Value lhs, mlir::Value rhs, OverflowBehavior ob=OverflowBehavior::None)
cir::PointerType getPointerTo(mlir::Type ty)
mlir::Value createComplexImag(mlir::Location loc, mlir::Value operand)
cir::ConstantOp getNullPtr(mlir::Type ty, mlir::Location loc)
cir::IntType getUIntNTy(int n)
cir::DoWhileOp createDoWhile(mlir::Location loc, llvm::function_ref< void(mlir::OpBuilder &, mlir::Location)> condBuilder, llvm::function_ref< void(mlir::OpBuilder &, mlir::Location)> bodyBuilder)
Create a do-while operation.
cir::GetGlobalOp createGetGlobal(mlir::Location loc, cir::GlobalOp global, bool threadLocal=false)
mlir::Value getSignedInt(mlir::Location loc, int64_t val, unsigned numBits)
mlir::Value createAnd(mlir::Location loc, mlir::Value lhs, mlir::Value rhs)
mlir::Value createBitcast(mlir::Value src, mlir::Type newTy)
cir::FuncType getVoidFnTy(mlir::TypeRange argTypes={})
Returns void (T...) as a cir::FuncType.
cir::CmpOp createCompare(mlir::Location loc, cir::CmpOpKind kind, mlir::Value lhs, mlir::Value rhs)
mlir::IntegerAttr getAlignmentAttr(clang::CharUnits alignment)
mlir::Value createSelect(mlir::Location loc, mlir::Value condition, mlir::Value trueValue, mlir::Value falseValue)
mlir::Value createMul(mlir::Location loc, mlir::Value lhs, mlir::Value rhs, OverflowBehavior ob=OverflowBehavior::None)
cir::LoadOp createLoad(mlir::Location loc, mlir::Value ptr, bool isVolatile=false, uint64_t alignment=0)
mlir::Value createMinus(mlir::Location loc, mlir::Value input, bool nsw=false)
cir::ConstantOp getConstantInt(mlir::Location loc, mlir::Type ty, int64_t value)
mlir::Value createComplexCreate(mlir::Location loc, mlir::Value real, mlir::Value imag)
cir::PointerType getVoidPtrTy(clang::LangAS langAS=clang::LangAS::Default)
mlir::Value createIsNaN(mlir::Location loc, mlir::Value operand)
cir::IntType getSIntNTy(int n)
mlir::Value createAlignedLoad(mlir::Location loc, mlir::Value ptr, uint64_t alignment)
cir::CallOp createCallOp(mlir::Location loc, mlir::SymbolRefAttr callee, mlir::Type returnType, mlir::ValueRange operands, llvm::ArrayRef< mlir::NamedAttribute > attrs={}, llvm::ArrayRef< mlir::NamedAttrList > argAttrs={}, llvm::ArrayRef< mlir::NamedAttribute > resAttrs={})
cir::StoreOp createStore(mlir::Location loc, mlir::Value val, mlir::Value dst, bool isVolatile=false, mlir::IntegerAttr align={}, cir::SyncScopeKindAttr scope={}, cir::MemOrderAttr order={})
cir::YieldOp createYield(mlir::Location loc, mlir::ValueRange value={})
Create a yield operation.
mlir::Value createLogicalAnd(mlir::Location loc, mlir::Value lhs, mlir::Value rhs)
mlir::Value createAlloca(mlir::Location loc, cir::PointerType addrType, mlir::Type type, llvm::StringRef name, mlir::IntegerAttr alignment, mlir::Value dynAllocSize)
cir::BoolType getBoolTy()
mlir::Value getUnsignedInt(mlir::Location loc, uint64_t val, unsigned numBits)
mlir::Value createComplexReal(mlir::Location loc, mlir::Value operand)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
SourceManager & getSourceManager()
MangleContext * createMangleContext(const TargetInfo *T=nullptr)
If T is null pointer, assume the target in ASTContext.
const LangOptions & getLangOpts() const
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
const TargetInfo & getTargetInfo() const
QualType getSignedSizeType() const
Return the unique signed counterpart of the integer type corresponding to size_t.
Module * getCurrentNamedModule() const
Get module under construction, nullptr if this is not a C++20 module.
uint64_t getCharWidth() const
Return the size of the character type, in bits.
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
static CharUnits One()
One - Construct a CharUnits quantity of one.
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
llvm::vfs::FileSystem & getVirtualFileSystem() const
bool isModuleImplementation() const
Is this a module implementation.
FileManager & getFileManager() const
Exposes information about the current target.
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
unsigned getMaxAtomicInlineWidth() const
Return the maximum width lock-free atomic operation which can be inlined given the supported features...
const llvm::fltSemantics & getDoubleFormat() const
const llvm::fltSemantics & getHalfFormat() const
const llvm::fltSemantics & getBFloat16Format() const
const llvm::fltSemantics & getLongDoubleFormat() const
const llvm::fltSemantics & getFloatFormat() const
virtual uint64_t getMaxPointerWidth() const
Return the maximum width of pointers on this target.
const llvm::fltSemantics & getFloat128Format() const
const llvm::VersionTuple & getSDKVersion() const
Defines the clang::TargetInfo interface.
static bool isLocalLinkage(GlobalLinkageKind linkage)
static bool isWeakODRLinkage(GlobalLinkageKind linkage)
static bool isLinkOnceLinkage(GlobalLinkageKind linkage)
const internal::VariadicDynCastAllOfMatcher< Decl, VarDecl > varDecl
Matches variable declarations.
bool isHIP(ID Id)
isHIP - Is this a HIP input.
bool isTemplateInstantiation(TemplateSpecializationKind Kind)
Determine whether this template specialization kind refers to an instantiation of an entity (as oppos...
bool CudaFeatureEnabled(llvm::VersionTuple, CudaFeature)
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
@ CUDA_USES_FATBIN_REGISTER_END
std::unique_ptr< Pass > createLoweringPreparePass()
static bool hipModuleCtor()
static bool guardAbortOnException()
static bool opGlobalAnnotations()
static bool opGlobalCtorPriority()
static bool shouldSplitConstantStore()
static bool shouldUseMemSetToInitialize()
static bool opFuncExtraAttrs()
static bool shouldUseBZeroPlusStoresToInitialize()
static bool fastMathFlags()
static bool astVarDeclInterface()