10#include "TargetInfo.h"
12#include "llvm/Support/AMDGPUAddrSpace.h"
25 static const unsigned MaxNumRegsForArgsRet = 16;
27 unsigned numRegsForType(
QualType Ty)
const;
31 uint64_t Members)
const override;
34 llvm::Type *coerceKernelArgumentType(llvm::Type *Ty,
unsigned FromAS,
35 unsigned ToAS)
const {
37 auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Ty);
38 if (PtrTy && PtrTy->getAddressSpace() == FromAS)
39 return llvm::PointerType::get(Ty->getContext(), ToAS);
50 unsigned &NumRegsLeft)
const;
57bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(
QualType Ty)
const {
61bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
62 const Type *
Base, uint64_t Members)
const {
63 uint32_t NumRegs = (getContext().getTypeSize(
Base) + 31) / 32;
66 return Members * NumRegs <= MaxNumRegsForArgsRet;
70unsigned AMDGPUABIInfo::numRegsForType(
QualType Ty)
const {
76 QualType EltTy = VT->getElementType();
77 unsigned EltSize = getContext().getTypeSize(EltTy);
81 return (VT->getNumElements() + 1) / 2;
83 unsigned EltNumRegs = (EltSize + 31) / 32;
84 return EltNumRegs * VT->getNumElements();
93 NumRegs += numRegsForType(FieldTy);
99 return (getContext().getTypeSize(Ty) + 31) / 32;
108 unsigned ArgumentIndex = 0;
111 unsigned NumRegsLeft = MaxNumRegsForArgsRet;
113 if (CC == llvm::CallingConv::AMDGPU_KERNEL) {
114 Arg.info = classifyKernelArgumentType(Arg.type);
116 bool FixedArgument = ArgumentIndex++ < numFixedArguments;
124 const bool IsIndirect =
false;
125 const bool AllowHigherAlign =
false;
127 getContext().getTypeInfoInChars(Ty),
159 llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
163 if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
182 llvm::Type *OrigLTy = CGT.ConvertType(Ty);
183 llvm::Type *LTy = OrigLTy;
184 if (getContext().getLangOpts().
HIP) {
185 LTy = coerceKernelArgumentType(
186 OrigLTy, getContext().getTargetAddressSpace(LangAS::Default),
187 getContext().getTargetAddressSpace(LangAS::cuda_device));
196 if (!getContext().getLangOpts().
OpenCL && LTy == OrigLTy &&
199 getContext().getTypeAlignInChars(Ty),
200 getContext().getTargetAddressSpace(LangAS::opencl_constant),
211 unsigned &NumRegsLeft)
const {
212 assert(NumRegsLeft <= MaxNumRegsForArgsRet &&
"register estimate underflow");
249 unsigned NumRegs = (
Size + 31) / 32;
250 NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
259 llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
263 if (NumRegsLeft > 0) {
264 unsigned NumRegs = numRegsForType(Ty);
265 if (NumRegsLeft >= NumRegs) {
266 NumRegsLeft -= NumRegs;
274 getContext().getTypeAlignInChars(Ty),
275 getContext().getTargetAddressSpace(LangAS::opencl_private));
281 unsigned NumRegs = numRegsForType(Ty);
282 NumRegsLeft -= std::min(NumRegs, NumRegsLeft);
293 void setFunctionDeclAttributes(
const FunctionDecl *FD, llvm::Function *F,
303 llvm::PointerType *
T,
QualType QT)
const override;
307 getABIInfo().getDataLayout().getAllocaAddrSpace());
313 llvm::AtomicOrdering Ordering,
314 llvm::LLVMContext &Ctx)
const override;
316 llvm::Instruction &AtomicInst,
319 llvm::Function *BlockInvokeFunc,
320 llvm::Type *BlockTy)
const override;
328 llvm::GlobalValue *GV) {
329 if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility)
332 return !
D->
hasAttr<OMPDeclareTargetDeclAttr>() &&
334 (isa<FunctionDecl>(
D) &&
D->
hasAttr<CUDAGlobalAttr>()) ||
337 cast<VarDecl>(
D)->getType()->isCUDADeviceBuiltinSurfaceType() ||
338 cast<VarDecl>(
D)->getType()->isCUDADeviceBuiltinTextureType())));
341void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
343 const auto *ReqdWGS =
345 const bool IsOpenCLKernel =
349 const auto *FlatWGS = FD->
getAttr<AMDGPUFlatWorkGroupSizeAttr>();
350 if (ReqdWGS || FlatWGS) {
352 }
else if (IsOpenCLKernel || IsHIPKernel) {
355 const unsigned OpenCLDefaultMaxWorkGroupSize = 256;
356 const unsigned DefaultMaxWorkGroupSize =
357 IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize
359 std::string AttrVal =
360 std::string(
"1,") + llvm::utostr(DefaultMaxWorkGroupSize);
361 F->addFnAttr(
"amdgpu-flat-work-group-size", AttrVal);
364 if (
const auto *
Attr = FD->
getAttr<AMDGPUWavesPerEUAttr>())
367 if (
const auto *
Attr = FD->
getAttr<AMDGPUNumSGPRAttr>()) {
368 unsigned NumSGPR =
Attr->getNumSGPR();
371 F->addFnAttr(
"amdgpu-num-sgpr", llvm::utostr(NumSGPR));
374 if (
const auto *
Attr = FD->
getAttr<AMDGPUNumVGPRAttr>()) {
378 F->addFnAttr(
"amdgpu-num-vgpr", llvm::utostr(NumVGPR));
381 if (
const auto *
Attr = FD->
getAttr<AMDGPUMaxNumWorkGroupsAttr>()) {
387 ?
Attr->getMaxNumWorkGroupsY()
392 ?
Attr->getMaxNumWorkGroupsZ()
398 llvm::raw_svector_ostream OS(AttrVal);
399 OS <<
X <<
',' << Y <<
',' << Z;
401 F->addFnAttr(
"amdgpu-max-num-workgroups", AttrVal.str());
407void AMDGPUTargetCodeGenInfo::emitTargetGlobals(
409 StringRef Name =
"__oclc_ABI_version";
410 llvm::GlobalVariable *OriginalGV = CGM.
getModule().getNamedGlobal(Name);
411 if (OriginalGV && !llvm::GlobalVariable::isExternalLinkage(OriginalGV->getLinkage()))
415 llvm::CodeObjectVersionKind::COV_None)
418 auto *
Type = llvm::IntegerType::getIntNTy(CGM.
getModule().getContext(), 32);
419 llvm::Constant *COV = llvm::ConstantInt::get(
424 auto *GV =
new llvm::GlobalVariable(
425 CGM.
getModule(),
Type,
true, llvm::GlobalValue::WeakODRLinkage, COV, Name,
426 nullptr, llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
428 GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
429 GV->setVisibility(llvm::GlobalValue::VisibilityTypes::HiddenVisibility);
433 OriginalGV->replaceAllUsesWith(GV);
434 GV->takeName(OriginalGV);
435 OriginalGV->eraseFromParent();
439void AMDGPUTargetCodeGenInfo::setTargetAttributes(
442 GV->setVisibility(llvm::GlobalValue::ProtectedVisibility);
443 GV->setDSOLocal(
true);
446 if (GV->isDeclaration())
449 llvm::Function *F = dyn_cast<llvm::Function>(GV);
455 setFunctionDeclAttributes(FD, F, M);
457 if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
458 F->addFnAttr(
"amdgpu-ieee",
"false");
461unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv()
const {
462 return llvm::CallingConv::AMDGPU_KERNEL;
470llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
474 return llvm::ConstantPointerNull::get(PT);
477 auto NPT = llvm::PointerType::get(
479 return llvm::ConstantExpr::getAddrSpaceCast(
480 llvm::ConstantPointerNull::get(NPT), PT);
484AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(
CodeGenModule &CGM,
488 "Address space agnostic languages only");
492 return DefaultGlobalAS;
494 LangAS AddrSpace =
D->getType().getAddressSpace();
495 if (AddrSpace != LangAS::Default)
499 if (
D->getType().isConstantStorage(CGM.
getContext(),
false,
false) &&
500 D->hasConstantInitialization()) {
504 return DefaultGlobalAS;
508AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(
const LangOptions &LangOpts,
510 llvm::AtomicOrdering Ordering,
511 llvm::LLVMContext &Ctx)
const {
514 case SyncScope::HIPSingleThread:
515 case SyncScope::SingleScope:
516 Name =
"singlethread";
518 case SyncScope::HIPWavefront:
519 case SyncScope::OpenCLSubGroup:
520 case SyncScope::WavefrontScope:
523 case SyncScope::HIPWorkgroup: