1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/IR/DataLayout.h"
21
22 using namespace clang;
23 using namespace clang::targets;
24
25 namespace clang {
26 namespace targets {
27
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30
31 static const char *const DataLayoutStringR600 =
32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
34
35 static const char *const DataLayoutStringAMDGCN =
36 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
39 "-ni:7";
40
41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42 Generic, // Default
43 Global, // opencl_global
44 Local, // opencl_local
45 Constant, // opencl_constant
46 Private, // opencl_private
47 Generic, // opencl_generic
48 Global, // cuda_device
49 Constant, // cuda_constant
50 Local, // cuda_shared
51 Generic, // ptr32_sptr
52 Generic, // ptr32_uptr
53 Generic // ptr64
54 };
55
56 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
57 Private, // Default
58 Global, // opencl_global
59 Local, // opencl_local
60 Constant, // opencl_constant
61 Private, // opencl_private
62 Generic, // opencl_generic
63 Global, // cuda_device
64 Constant, // cuda_constant
65 Local, // cuda_shared
66 Generic, // ptr32_sptr
67 Generic, // ptr32_uptr
68 Generic // ptr64
69
70 };
71 } // namespace targets
72 } // namespace clang
73
74 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
75 #define BUILTIN(ID, TYPE, ATTRS) \
76 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
77 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
78 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
79 #include "clang/Basic/BuiltinsAMDGPU.def"
80 };
81
82 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
83 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
84 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
85 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
86 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
87 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
88 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
89 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
90 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
91 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
92 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
93 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
94 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
95 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
96 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
97 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
98 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
99 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
100 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
101 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
102 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
103 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
104 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
105 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
106 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
107 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
108 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
109 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
110 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
111 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
112 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
113 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
114 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
115 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
116 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
117 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
118 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
119 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
120 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
121 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
122 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
123 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
124 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
125 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
126 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
127 "flat_scratch_lo", "flat_scratch_hi"
128 };
129
getGCCRegNames() const130 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
131 return llvm::makeArrayRef(GCCRegNames);
132 }
133
initFeatureMap(llvm::StringMap<bool> & Features,DiagnosticsEngine & Diags,StringRef CPU,const std::vector<std::string> & FeatureVec) const134 bool AMDGPUTargetInfo::initFeatureMap(
135 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
136 const std::vector<std::string> &FeatureVec) const {
137
138 using namespace llvm::AMDGPU;
139
140 // XXX - What does the member GPU mean if device name string passed here?
141 if (isAMDGCN(getTriple())) {
142 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
143 case GK_GFX1012:
144 case GK_GFX1011:
145 Features["dot1-insts"] = true;
146 Features["dot2-insts"] = true;
147 Features["dot5-insts"] = true;
148 Features["dot6-insts"] = true;
149 LLVM_FALLTHROUGH;
150 case GK_GFX1010:
151 Features["dl-insts"] = true;
152 Features["ci-insts"] = true;
153 Features["flat-address-space"] = true;
154 Features["16-bit-insts"] = true;
155 Features["dpp"] = true;
156 Features["gfx8-insts"] = true;
157 Features["gfx9-insts"] = true;
158 Features["gfx10-insts"] = true;
159 Features["s-memrealtime"] = true;
160 break;
161 case GK_GFX908:
162 Features["dot3-insts"] = true;
163 Features["dot4-insts"] = true;
164 Features["dot5-insts"] = true;
165 Features["dot6-insts"] = true;
166 LLVM_FALLTHROUGH;
167 case GK_GFX906:
168 Features["dl-insts"] = true;
169 Features["dot1-insts"] = true;
170 Features["dot2-insts"] = true;
171 LLVM_FALLTHROUGH;
172 case GK_GFX909:
173 case GK_GFX904:
174 case GK_GFX902:
175 case GK_GFX900:
176 Features["gfx9-insts"] = true;
177 LLVM_FALLTHROUGH;
178 case GK_GFX810:
179 case GK_GFX803:
180 case GK_GFX802:
181 case GK_GFX801:
182 Features["gfx8-insts"] = true;
183 Features["16-bit-insts"] = true;
184 Features["dpp"] = true;
185 Features["s-memrealtime"] = true;
186 LLVM_FALLTHROUGH;
187 case GK_GFX704:
188 case GK_GFX703:
189 case GK_GFX702:
190 case GK_GFX701:
191 case GK_GFX700:
192 Features["ci-insts"] = true;
193 Features["flat-address-space"] = true;
194 LLVM_FALLTHROUGH;
195 case GK_GFX601:
196 case GK_GFX600:
197 break;
198 case GK_NONE:
199 break;
200 default:
201 llvm_unreachable("Unhandled GPU!");
202 }
203 } else {
204 if (CPU.empty())
205 CPU = "r600";
206
207 switch (llvm::AMDGPU::parseArchR600(CPU)) {
208 case GK_CAYMAN:
209 case GK_CYPRESS:
210 case GK_RV770:
211 case GK_RV670:
212 // TODO: Add fp64 when implemented.
213 break;
214 case GK_TURKS:
215 case GK_CAICOS:
216 case GK_BARTS:
217 case GK_SUMO:
218 case GK_REDWOOD:
219 case GK_JUNIPER:
220 case GK_CEDAR:
221 case GK_RV730:
222 case GK_RV710:
223 case GK_RS880:
224 case GK_R630:
225 case GK_R600:
226 break;
227 default:
228 llvm_unreachable("Unhandled GPU!");
229 }
230 }
231
232 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
233 }
234
adjustTargetOptions(const CodeGenOptions & CGOpts,TargetOptions & TargetOpts) const235 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
236 TargetOptions &TargetOpts) const {
237 bool hasFP32Denormals = false;
238 bool hasFP64Denormals = false;
239
240 for (auto &I : TargetOpts.FeaturesAsWritten) {
241 if (I == "+fp32-denormals" || I == "-fp32-denormals")
242 hasFP32Denormals = true;
243 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
244 hasFP64Denormals = true;
245 }
246 if (!hasFP32Denormals)
247 TargetOpts.Features.push_back(
248 (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
249 ? '+' : '-') + Twine("fp32-denormals"))
250 .str());
251 // Always do not flush fp64 or fp16 denorms.
252 if (!hasFP64Denormals && hasFP64())
253 TargetOpts.Features.push_back("+fp64-fp16-denormals");
254 }
255
fillValidCPUList(SmallVectorImpl<StringRef> & Values) const256 void AMDGPUTargetInfo::fillValidCPUList(
257 SmallVectorImpl<StringRef> &Values) const {
258 if (isAMDGCN(getTriple()))
259 llvm::AMDGPU::fillValidArchListAMDGCN(Values);
260 else
261 llvm::AMDGPU::fillValidArchListR600(Values);
262 }
263
setAddressSpaceMap(bool DefaultIsPrivate)264 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
265 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
266 }
267
AMDGPUTargetInfo(const llvm::Triple & Triple,const TargetOptions & Opts)268 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
269 const TargetOptions &Opts)
270 : TargetInfo(Triple),
271 GPUKind(isAMDGCN(Triple) ?
272 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
273 llvm::AMDGPU::parseArchR600(Opts.CPU)),
274 GPUFeatures(isAMDGCN(Triple) ?
275 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
276 llvm::AMDGPU::getArchAttrR600(GPUKind)) {
277 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
278 : DataLayoutStringR600);
279 assert(DataLayout->getAllocaAddrSpace() == Private);
280
281 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
282 !isAMDGCN(Triple));
283 UseAddrSpaceMapMangling = true;
284
285 HasLegalHalfType = true;
286 HasFloat16 = true;
287
288 // Set pointer width and alignment for target address space 0.
289 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
290 if (getMaxPointerWidth() == 64) {
291 LongWidth = LongAlign = 64;
292 SizeType = UnsignedLong;
293 PtrDiffType = SignedLong;
294 IntPtrType = SignedLong;
295 }
296
297 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
298 }
299
adjust(LangOptions & Opts)300 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
301 TargetInfo::adjust(Opts);
302 // ToDo: There are still a few places using default address space as private
303 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
304 // can be removed from the following line.
305 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
306 !isAMDGCN(getTriple()));
307 }
308
getTargetBuiltins() const309 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
310 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
311 Builtin::FirstTSBuiltin);
312 }
313
getTargetDefines(const LangOptions & Opts,MacroBuilder & Builder) const314 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
315 MacroBuilder &Builder) const {
316 Builder.defineMacro("__AMD__");
317 Builder.defineMacro("__AMDGPU__");
318
319 if (isAMDGCN(getTriple()))
320 Builder.defineMacro("__AMDGCN__");
321 else
322 Builder.defineMacro("__R600__");
323
324 if (GPUKind != llvm::AMDGPU::GK_NONE) {
325 StringRef CanonName = isAMDGCN(getTriple()) ?
326 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
327 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
328 }
329
330 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
331 // removed in the near future.
332 if (hasFMAF())
333 Builder.defineMacro("__HAS_FMAF__");
334 if (hasFastFMAF())
335 Builder.defineMacro("FP_FAST_FMAF");
336 if (hasLDEXPF())
337 Builder.defineMacro("__HAS_LDEXPF__");
338 if (hasFP64())
339 Builder.defineMacro("__HAS_FP64__");
340 if (hasFastFMA())
341 Builder.defineMacro("FP_FAST_FMA");
342 }
343
setAuxTarget(const TargetInfo * Aux)344 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
345 assert(HalfFormat == Aux->HalfFormat);
346 assert(FloatFormat == Aux->FloatFormat);
347 assert(DoubleFormat == Aux->DoubleFormat);
348
349 // On x86_64 long double is 80-bit extended precision format, which is
350 // not supported by AMDGPU. 128-bit floating point format is also not
351 // supported by AMDGPU. Therefore keep its own format for these two types.
352 auto SaveLongDoubleFormat = LongDoubleFormat;
353 auto SaveFloat128Format = Float128Format;
354 copyAuxTarget(Aux);
355 LongDoubleFormat = SaveLongDoubleFormat;
356 Float128Format = SaveFloat128Format;
357 }
358