10#include "TargetInfo.h"
12#include "llvm/Support/AMDGPUAddrSpace.h"
25 static const unsigned MaxNumRegsForArgsRet = 16;
27 unsigned numRegsForType(
QualType Ty)
const;
31 uint64_t Members)
const override;
34 llvm::Type *coerceKernelArgumentType(llvm::Type *Ty,
unsigned FromAS,
35 unsigned ToAS)
const {
37 auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Ty);
38 if (PtrTy && PtrTy->getAddressSpace() == FromAS)
39 return llvm::PointerType::get(Ty->getContext(), ToAS);
50 unsigned &NumRegsLeft)
const;
56 llvm::FixedVectorType *
62 if (
T->getNumElements() == 3 && getDataLayout().getTypeSizeInBits(
T) == 96)
68bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(
QualType Ty)
const {
72bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
73 const Type *
Base, uint64_t Members)
const {
74 uint32_t NumRegs = (getContext().getTypeSize(
Base) + 31) / 32;
77 return Members * NumRegs <= MaxNumRegsForArgsRet;
81unsigned AMDGPUABIInfo::numRegsForType(
QualType Ty)
const {
87 QualType EltTy = VT->getElementType();
88 unsigned EltSize = getContext().getTypeSize(EltTy);
92 return (VT->getNumElements() + 1) / 2;
94 unsigned EltNumRegs = (EltSize + 31) / 32;
95 return EltNumRegs * VT->getNumElements();
104 NumRegs += numRegsForType(FieldTy);
110 return (getContext().getTypeSize(Ty) + 31) / 32;
119 unsigned ArgumentIndex = 0;
122 unsigned NumRegsLeft = MaxNumRegsForArgsRet;
124 if (CC == llvm::CallingConv::AMDGPU_KERNEL) {
125 Arg.info = classifyKernelArgumentType(Arg.type);
127 bool FixedArgument = ArgumentIndex++ < numFixedArguments;
135 const bool IsIndirect =
false;
136 const bool AllowHigherAlign =
false;
138 getContext().getTypeInfoInChars(Ty),
170 llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
174 if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
193 llvm::Type *OrigLTy = CGT.ConvertType(Ty);
194 llvm::Type *LTy = OrigLTy;
195 if (getContext().getLangOpts().
HIP) {
196 LTy = coerceKernelArgumentType(
197 OrigLTy, getContext().getTargetAddressSpace(LangAS::Default),
198 getContext().getTargetAddressSpace(LangAS::cuda_device));
207 if (!getContext().getLangOpts().
OpenCL && LTy == OrigLTy &&
210 getContext().getTypeAlignInChars(Ty),
211 getContext().getTargetAddressSpace(LangAS::opencl_constant),
222 unsigned &NumRegsLeft)
const {
223 assert(NumRegsLeft <= MaxNumRegsForArgsRet &&
"register estimate underflow");
260 unsigned NumRegs = (
Size + 31) / 32;
261 NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
270 llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
274 if (NumRegsLeft > 0) {
275 unsigned NumRegs = numRegsForType(Ty);
276 if (NumRegsLeft >= NumRegs) {
277 NumRegsLeft -= NumRegs;
285 getContext().getTypeAlignInChars(Ty),
286 getContext().getTargetAddressSpace(LangAS::opencl_private));
292 unsigned NumRegs = numRegsForType(Ty);
293 NumRegsLeft -= std::min(NumRegs, NumRegsLeft);
304 void setFunctionDeclAttributes(
const FunctionDecl *FD, llvm::Function *F,
314 llvm::PointerType *
T,
QualType QT)
const override;
318 getABIInfo().getDataLayout().getAllocaAddrSpace());
324 llvm::AtomicOrdering Ordering,
325 llvm::LLVMContext &Ctx)
const override;
327 llvm::Instruction &AtomicInst,
330 llvm::Function *BlockInvokeFunc,
331 llvm::Type *BlockTy)
const override;
339 llvm::GlobalValue *GV) {
340 if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility)
343 return !
D->
hasAttr<OMPDeclareTargetDeclAttr>() &&
345 (isa<FunctionDecl>(
D) &&
D->
hasAttr<CUDAGlobalAttr>()) ||
348 cast<VarDecl>(
D)->getType()->isCUDADeviceBuiltinSurfaceType() ||
349 cast<VarDecl>(
D)->getType()->isCUDADeviceBuiltinTextureType())));
352void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
354 const auto *ReqdWGS =
356 const bool IsOpenCLKernel =
360 const auto *FlatWGS = FD->
getAttr<AMDGPUFlatWorkGroupSizeAttr>();
361 if (ReqdWGS || FlatWGS) {
363 }
else if (IsOpenCLKernel || IsHIPKernel) {
366 const unsigned OpenCLDefaultMaxWorkGroupSize = 256;
367 const unsigned DefaultMaxWorkGroupSize =
368 IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize
370 std::string AttrVal =
371 std::string(
"1,") + llvm::utostr(DefaultMaxWorkGroupSize);
372 F->addFnAttr(
"amdgpu-flat-work-group-size", AttrVal);
375 if (
const auto *
Attr = FD->
getAttr<AMDGPUWavesPerEUAttr>())
378 if (
const auto *
Attr = FD->
getAttr<AMDGPUNumSGPRAttr>()) {
379 unsigned NumSGPR =
Attr->getNumSGPR();
382 F->addFnAttr(
"amdgpu-num-sgpr", llvm::utostr(NumSGPR));
385 if (
const auto *
Attr = FD->
getAttr<AMDGPUNumVGPRAttr>()) {
389 F->addFnAttr(
"amdgpu-num-vgpr", llvm::utostr(NumVGPR));
392 if (
const auto *
Attr = FD->
getAttr<AMDGPUMaxNumWorkGroupsAttr>()) {
398 ?
Attr->getMaxNumWorkGroupsY()
403 ?
Attr->getMaxNumWorkGroupsZ()